diff --git a/.gitignore b/.gitignore
index 485fb98d..9d711cb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 
 # header files generated
 cbind/*.h
+util/psb_metis_int.h
 
 # Make.inc generated
 /Make.inc
diff --git a/Make.inc.in b/Make.inc.in
index b6ae650f..5741fca7 100755
--- a/Make.inc.in
+++ b/Make.inc.in
@@ -23,7 +23,7 @@ EXTRA_OPT=@EXTRA_OPT@
 MPFC=@MPIFC@
 MPCC=@MPICC@
 
-FLINK=$(MPFC)
+FLINK=@FLINK@
 
 LIBS=@LIBS@
 
diff --git a/base/comm/internals/psi_cswapdata.F90 b/base/comm/internals/psi_cswapdata.F90
index 5af5b79e..5f0b8bdc 100644
--- a/base/comm/internals/psi_cswapdata.F90
+++ b/base/comm/internals/psi_cswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_cswapdata_vect
 !   
 !   
 ! 
-subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_cswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_c_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_cswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_cswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_cswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_c_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_cswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_cswapdata_a.F90 b/base/comm/internals/psi_cswapdata_a.F90
index 43b91872..715b674e 100644
--- a/base/comm/internals/psi_cswapdata_a.F90
+++ b/base/comm/internals/psi_cswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_cswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_cswapdatav
 !   
 !   
 ! 
-subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_cswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_cswapidxv
@@ -656,8 +655,8 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   complex(psb_spk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_cswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_cswaptran.F90 b/base/comm/internals/psi_cswaptran.F90
index 401d8435..aefb6b01 100644
--- a/base/comm/internals/psi_cswaptran.F90
+++ b/base/comm/internals/psi_cswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_cswaptran_vect
 !   
 !   
 ! 
-subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ctran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_c_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_ctran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_cswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ctran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_c_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_ctran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_cswaptran_a.F90 b/base/comm/internals/psi_cswaptran_a.F90
index 508e445d..a7f2c687 100644
--- a/base/comm/internals/psi_cswaptran_a.F90
+++ b/base/comm/internals/psi_cswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_cswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_cswaptranm
 
-subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_ctranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ctranidxm
@@ -174,8 +174,8 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   complex(psb_spk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_ctranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_cswaptranv
 !   
 !   
 ! 
-subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ctranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ctranidxv
@@ -671,8 +667,8 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   complex(psb_spk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_ctranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_dswapdata.F90 b/base/comm/internals/psi_dswapdata.F90
index f99f0254..fe529706 100644
--- a/base/comm/internals/psi_dswapdata.F90
+++ b/base/comm/internals/psi_dswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_dswapdata_vect
 !   
 !   
 ! 
-subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_d_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_dswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_dswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_d_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_dswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_dswapdata_a.F90 b/base/comm/internals/psi_dswapdata_a.F90
index b85b1c6d..aff32517 100644
--- a/base/comm/internals/psi_dswapdata_a.F90
+++ b/base/comm/internals/psi_dswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_dswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_dswapdatav
 !   
 !   
 ! 
-subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_dswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dswapidxv
@@ -656,8 +655,8 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   real(psb_dpk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_dswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_dswaptran.F90 b/base/comm/internals/psi_dswaptran.F90
index de46ad03..df98e1ae 100644
--- a/base/comm/internals/psi_dswaptran.F90
+++ b/base/comm/internals/psi_dswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_dswaptran_vect
 !   
 !   
 ! 
-subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dtran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_d_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_dtran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_dswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dtran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_d_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_dtran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_dswaptran_a.F90 b/base/comm/internals/psi_dswaptran_a.F90
index 8bc7b82f..ed13df40 100644
--- a/base/comm/internals/psi_dswaptran_a.F90
+++ b/base/comm/internals/psi_dswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_dswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_dswaptranm
 
-subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_dtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dtranidxm
@@ -174,8 +174,8 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   real(psb_dpk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_dtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_dswaptranv
 !   
 !   
 ! 
-subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_dtranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_dtranidxv
@@ -671,8 +667,8 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   real(psb_dpk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_dtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_eswapdata_a.F90 b/base/comm/internals/psi_eswapdata_a.F90
index 3dc1786e..6a644563 100644
--- a/base/comm/internals/psi_eswapdata_a.F90
+++ b/base/comm/internals/psi_eswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_eswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_eswapdatav
 !   
 !   
 ! 
-subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_eswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_eswapidxv
@@ -656,8 +655,8 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_epk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_eswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_eswaptran_a.F90 b/base/comm/internals/psi_eswaptran_a.F90
index 11419613..78ed7d8b 100644
--- a/base/comm/internals/psi_eswaptran_a.F90
+++ b/base/comm/internals/psi_eswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_eswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_eswaptranm
 
-subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_etranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_etranidxm
@@ -174,8 +174,8 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   integer(psb_epk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_etranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_eswaptranv
 !   
 !   
 ! 
-subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_etranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_etranidxv
@@ -671,8 +667,8 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_epk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_etranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_i2swapdata_a.F90 b/base/comm/internals/psi_i2swapdata_a.F90
index 6d1928d3..42b4498e 100644
--- a/base/comm/internals/psi_i2swapdata_a.F90
+++ b/base/comm/internals/psi_i2swapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_i2swapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_i2swapdatav
 !   
 !   
 ! 
-subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_i2swapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_i2swapidxv
@@ -656,8 +655,8 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_i2pk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_i2swapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_i2swaptran_a.F90 b/base/comm/internals/psi_i2swaptran_a.F90
index 26f3c820..f94bf29e 100644
--- a/base/comm/internals/psi_i2swaptran_a.F90
+++ b/base/comm/internals/psi_i2swaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_i2swaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_i2swaptranm
 
-subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_i2tranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_i2tranidxm
@@ -174,8 +174,8 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   integer(psb_i2pk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_i2tranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_i2swaptranv
 !   
 !   
 ! 
-subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_i2tranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_i2tranidxv
@@ -671,8 +667,8 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_i2pk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_i2tranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_iswapdata.F90 b/base/comm/internals/psi_iswapdata.F90
index e541ff6d..ff4bd074 100644
--- a/base/comm/internals/psi_iswapdata.F90
+++ b/base/comm/internals/psi_iswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_iswapdata_vect
 !   
 !   
 ! 
-subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_iswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_i_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_iswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_iswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_iswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_i_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_iswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_iswaptran.F90 b/base/comm/internals/psi_iswaptran.F90
index 46bb18b5..75a0a185 100644
--- a/base/comm/internals/psi_iswaptran.F90
+++ b/base/comm/internals/psi_iswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_iswaptran_vect
 !   
 !   
 ! 
-subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_itran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_i_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_itran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_iswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_itran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_i_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_itran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_lswapdata.F90 b/base/comm/internals/psi_lswapdata.F90
index 088c6508..9201ebfa 100644
--- a/base/comm/internals/psi_lswapdata.F90
+++ b/base/comm/internals/psi_lswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_lswapdata_vect
 !   
 !   
 ! 
-subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_lswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_l_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_lswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_lswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_lswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_l_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_lswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_lswaptran.F90 b/base/comm/internals/psi_lswaptran.F90
index 60470169..b2b9536c 100644
--- a/base/comm/internals/psi_lswaptran.F90
+++ b/base/comm/internals/psi_lswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_lswaptran_vect
 !   
 !   
 ! 
-subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ltran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_l_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_ltran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_lswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ltran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_l_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_ltran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_mswapdata_a.F90 b/base/comm/internals/psi_mswapdata_a.F90
index 8e86c515..e71f3a52 100644
--- a/base/comm/internals/psi_mswapdata_a.F90
+++ b/base/comm/internals/psi_mswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_mswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_mswapdatav
 !   
 !   
 ! 
-subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_mswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_mswapidxv
@@ -656,8 +655,8 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_mpk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_mswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_mswaptran_a.F90 b/base/comm/internals/psi_mswaptran_a.F90
index 65b8e367..3a780142 100644
--- a/base/comm/internals/psi_mswaptran_a.F90
+++ b/base/comm/internals/psi_mswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_mswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_mswaptranm
 
-subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_mtranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_mtranidxm
@@ -174,8 +174,8 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   integer(psb_mpk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_mtranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_mswaptranv
 !   
 !   
 ! 
-subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_mtranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_mtranidxv
@@ -671,8 +667,8 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   integer(psb_mpk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_mtranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_sswapdata.F90 b/base/comm/internals/psi_sswapdata.F90
index 307195bb..e4f11bd0 100644
--- a/base/comm/internals/psi_sswapdata.F90
+++ b/base/comm/internals/psi_sswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_sswapdata_vect
 !   
 !   
 ! 
-subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_sswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_s_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_sswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_sswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_sswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_s_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_sswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_sswapdata_a.F90 b/base/comm/internals/psi_sswapdata_a.F90
index 6d74e1ad..044dc141 100644
--- a/base/comm/internals/psi_sswapdata_a.F90
+++ b/base/comm/internals/psi_sswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_sswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_sswapdatav
 !   
 !   
 ! 
-subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_sswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_sswapidxv
@@ -656,8 +655,8 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   real(psb_spk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_sswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_sswaptran.F90 b/base/comm/internals/psi_sswaptran.F90
index 25aa5303..90c4b275 100644
--- a/base/comm/internals/psi_sswaptran.F90
+++ b/base/comm/internals/psi_sswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_sswaptran_vect
 !   
 !   
 ! 
-subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_stran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_s_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_stran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_sswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_stran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_s_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_stran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_sswaptran_a.F90 b/base/comm/internals/psi_sswaptran_a.F90
index 866456d4..434cec4c 100644
--- a/base/comm/internals/psi_sswaptran_a.F90
+++ b/base/comm/internals/psi_sswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_sswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_sswaptranm
 
-subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_stranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_stranidxm
@@ -174,8 +174,8 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   real(psb_spk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_stranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_sswaptranv
 !   
 !   
 ! 
-subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_stranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_stranidxv
@@ -671,8 +667,8 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   real(psb_spk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_stranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_zswapdata.F90 b/base/comm/internals/psi_zswapdata.F90
index e892a795..991d6e40 100644
--- a/base/comm/internals/psi_zswapdata.F90
+++ b/base/comm/internals/psi_zswapdata.F90
@@ -175,7 +175,7 @@ end subroutine psi_zswapdata_vect
 !   
 !   
 ! 
-subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_zswap_vidx_vect
@@ -192,8 +192,8 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)  :: ictxt
-  integer(psb_mpk_), intent(in)    :: iicomm
+  type(psb_ctxt_type), intent(in)  :: ctxt
+  integer(psb_mpk_), intent(in)    :: icomm
   integer(psb_ipk_), intent(in)    :: flag
   integer(psb_ipk_), intent(out)   :: info
   class(psb_z_base_vect_type) :: y
@@ -203,9 +203,8 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -218,9 +217,6 @@ subroutine psi_zswap_vidx_vect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -517,7 +513,7 @@ end subroutine psi_zswapdata_multivect
 !   
 !   
 ! 
-subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_zswap_vidx_multivect
@@ -534,8 +530,8 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)    :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_z_base_multivect_type) :: y
@@ -545,9 +541,8 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)              :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -560,9 +555,6 @@ subroutine psi_zswap_vidx_multivect(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_zswapdata_a.F90 b/base/comm/internals/psi_zswapdata_a.F90
index 25e1a991..2d265c76 100644
--- a/base/comm/internals/psi_zswapdata_a.F90
+++ b/base/comm/internals/psi_zswapdata_a.F90
@@ -179,8 +179,8 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
 
   ! locals
   
-  integer(psb_mpk_) :: np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -197,7 +197,6 @@ subroutine psi_zswapidxm(ctxt,icomm,flag,n,beta,y,idx, &
   info=psb_success_
   name='psi_swap_data'
   call psb_erractionsave(err_act)
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -641,7 +640,7 @@ end subroutine psi_zswapdatav
 !   
 !   
 ! 
-subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, &
+subroutine psi_zswapidxv(ctxt,icomm,flag,beta,y,idx, &
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_zswapidxv
@@ -656,8 +655,8 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   complex(psb_dpk_)         :: y(:), beta
@@ -665,9 +664,8 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -684,9 +682,6 @@ subroutine psi_zswapidxv(ictxt,iicomm,flag,beta,y,idx, &
   info=psb_success_
   name='psi_swap_datav'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_zswaptran.F90 b/base/comm/internals/psi_zswaptran.F90
index 74fc4221..f027519f 100644
--- a/base/comm/internals/psi_zswaptran.F90
+++ b/base/comm/internals/psi_zswaptran.F90
@@ -176,7 +176,7 @@ end subroutine psi_zswaptran_vect
 !   
 !   
 ! 
-subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ztran_vidx_vect
@@ -193,8 +193,8 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)      :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in)    :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_z_base_vect_type) :: y
@@ -204,9 +204,8 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -219,9 +218,6 @@ subroutine psi_ztran_vidx_vect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -528,7 +524,7 @@ subroutine psi_zswaptran_multivect(flag,beta,y,desc_a,work,info,data)
 !   
 !   
 ! 
-subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ztran_vidx_multivect
@@ -545,8 +541,8 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)      :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)      :: icomm
   integer(psb_ipk_), intent(in)      :: flag
   integer(psb_ipk_), intent(out)     :: info
   class(psb_z_base_multivect_type) :: y
@@ -556,9 +552,8 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_)   :: np, me
+  integer(psb_mpk_)   :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable :: prcid(:)
   integer(psb_ipk_) :: nesd, nerv,&
        & err_act, i, idx_pt, totsnd_, totrcv_,&
@@ -571,9 +566,6 @@ subroutine psi_ztran_vidx_multivect(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/comm/internals/psi_zswaptran_a.F90 b/base/comm/internals/psi_zswaptran_a.F90
index 4984b51b..508d4045 100644
--- a/base/comm/internals/psi_zswaptran_a.F90
+++ b/base/comm/internals/psi_zswaptran_a.F90
@@ -159,7 +159,7 @@ subroutine psi_zswaptranm(flag,n,beta,y,desc_a,work,info,data)
     return
 end subroutine psi_zswaptranm
 
-subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
+subroutine psi_ztranidxm(ctxt,icomm,flag,n,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ztranidxm
@@ -174,8 +174,8 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in)   :: ictxt
-  integer(psb_mpk_), intent(in)     :: iicomm
+  type(psb_ctxt_type), intent(in)   :: ctxt
+  integer(psb_mpk_), intent(in)     :: icomm
   integer(psb_ipk_), intent(in)     :: flag,n
   integer(psb_ipk_), intent(out)    :: info
   complex(psb_dpk_)         :: y(:,:), beta
@@ -183,9 +183,8 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -202,9 +201,6 @@ subroutine psi_ztranidxm(ictxt,iicomm,flag,n,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
@@ -656,7 +652,7 @@ end subroutine psi_zswaptranv
 !   
 !   
 ! 
-subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,&
+subroutine psi_ztranidxv(ctxt,icomm,flag,beta,y,idx,&
      & totxch,totsnd,totrcv,work,info)
 
   use psi_mod, psb_protect_name => psi_ztranidxv
@@ -671,8 +667,8 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,&
   include 'mpif.h'
 #endif
 
-  type(psb_ctxt_type), intent(in) :: ictxt
-  integer(psb_mpk_), intent(in)   :: iicomm
+  type(psb_ctxt_type), intent(in) :: ctxt
+  integer(psb_mpk_), intent(in)   :: icomm
   integer(psb_ipk_), intent(in)   :: flag
   integer(psb_ipk_), intent(out)  :: info
   complex(psb_dpk_)         :: y(:), beta
@@ -680,9 +676,8 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,&
   integer(psb_ipk_), intent(in)      :: idx(:),totxch,totsnd, totrcv
 
   ! locals
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_mpk_)   :: icomm, np, me,&
-       & proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
+  integer(psb_ipk_) :: np, me
+  integer(psb_mpk_) :: proc_to_comm, p2ptag, p2pstat(mpi_status_size), iret
   integer(psb_mpk_), allocatable, dimension(:) :: bsdidx, brvidx,&
        & sdsz, rvsz, prcid, rvhd, sdhd
   integer(psb_ipk_) :: nesd, nerv,&
@@ -699,9 +694,6 @@ subroutine psi_ztranidxv(ictxt,iicomm,flag,beta,y,idx,&
   info=psb_success_
   name='psi_swap_tran'
   call psb_erractionsave(err_act)
-  ctxt = ictxt
-  icomm = iicomm
-
   call psb_info(ctxt,me,np) 
   if (np == -1) then
     info=psb_err_context_error_
diff --git a/base/internals/psi_bld_glb_dep_list.F90 b/base/internals/psi_bld_glb_dep_list.F90
index e415ffd2..f43e5f17 100644
--- a/base/internals/psi_bld_glb_dep_list.F90
+++ b/base/internals/psi_bld_glb_dep_list.F90
@@ -29,8 +29,8 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info)
-  use psi_mod, psb_protect_name => psi_i_bld_glb_csr_dep_list
+subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info)
+  use psi_mod, psb_protect_name => psi_i_bld_glb_dep_list
 #ifdef MPI_MOD
   use mpi
 #endif
@@ -123,4 +123,4 @@ subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,in
 
   return
 
-end subroutine psi_i_bld_glb_csr_dep_list
+end subroutine psi_i_bld_glb_dep_list
diff --git a/base/modules/Makefile b/base/modules/Makefile
index 31c509ad..8d50011f 100644
--- a/base/modules/Makefile
+++ b/base/modules/Makefile
@@ -257,7 +257,7 @@ serial/psb_c_csc_mat_mod.o serial/psb_c_csr_mat_mod.o serial/psb_lc_csr_mat_mod.
 serial/psb_z_csc_mat_mod.o serial/psb_z_csr_mat_mod.o serial/psb_lz_csr_mat_mod.o: serial/psb_z_base_mat_mod.o
 
 serial/psb_mat_mod.o: serial/psb_vect_mod.o serial/psb_s_mat_mod.o serial/psb_d_mat_mod.o serial/psb_c_mat_mod.o serial/psb_z_mat_mod.o
-serial/psb_serial_mod.o: serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o 
+serial/psb_serial_mod.o: serial/psb_s_serial_mod.o serial/psb_d_serial_mod.o serial/psb_c_serial_mod.o serial/psb_z_serial_mod.o  auxil/psi_serial_mod.o
 serial/psb_i_vect_mod.o: serial/psb_i_base_vect_mod.o
 serial/psb_l_vect_mod.o: serial/psb_l_base_vect_mod.o serial/psb_i_vect_mod.o 
 serial/psb_s_vect_mod.o: serial/psb_s_base_vect_mod.o serial/psb_i_vect_mod.o
diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90
index 05145c1c..f017f350 100644
--- a/base/modules/auxil/psi_c_serial_mod.f90
+++ b/base/modules/auxil/psi_c_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_c_serial_mod
   use psb_const_mod, only :  psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_spk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_cgelp(trans,iperm,x,info)
-      import :: psb_ipk_, psb_spk_
+    subroutine psb_m_cgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_, psb_spk_
       implicit none
       complex(psb_spk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_cgelp
-    subroutine psb_cgelpv(trans,iperm,x,info)
-      import :: psb_ipk_, psb_spk_
+    end subroutine psb_m_cgelp
+    subroutine psb_m_cgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_,psb_spk_
+      implicit none
+      complex(psb_spk_), intent(inout)     ::  x(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_m_cgelpv
+    subroutine psb_e_cgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_spk_
+      implicit none
+      complex(psb_spk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_cgelp
+    subroutine psb_e_cgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_spk_
       implicit none
       complex(psb_spk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_cgelpv
+    end subroutine psb_e_cgelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90
index 0bea1bce..c27aa600 100644
--- a/base/modules/auxil/psi_d_serial_mod.f90
+++ b/base/modules/auxil/psi_d_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_d_serial_mod
   use psb_const_mod, only :  psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_dpk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_dgelp(trans,iperm,x,info)
-      import :: psb_ipk_, psb_dpk_
+    subroutine psb_m_dgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_, psb_dpk_
       implicit none
       real(psb_dpk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_dgelp
-    subroutine psb_dgelpv(trans,iperm,x,info)
-      import :: psb_ipk_, psb_dpk_
+    end subroutine psb_m_dgelp
+    subroutine psb_m_dgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_,psb_dpk_
+      implicit none
+      real(psb_dpk_), intent(inout)     ::  x(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_m_dgelpv
+    subroutine psb_e_dgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_dpk_
+      implicit none
+      real(psb_dpk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_dgelp
+    subroutine psb_e_dgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_dpk_
       implicit none
       real(psb_dpk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_dgelpv
+    end subroutine psb_e_dgelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90
index f8b9694d..99a91985 100644
--- a/base/modules/auxil/psi_e_serial_mod.f90
+++ b/base/modules/auxil/psi_e_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_e_serial_mod
   use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_egelp(trans,iperm,x,info)
+    subroutine psb_m_egelp(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_epk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_egelp
-    subroutine psb_egelpv(trans,iperm,x,info)
+    end subroutine psb_m_egelp
+    subroutine psb_m_egelpv(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_epk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_egelpv
+    end subroutine psb_m_egelpv
+    subroutine psb_e_egelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_epk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_egelp
+    subroutine psb_e_egelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_epk_), intent(inout)     ::  x(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_egelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90
index bc0df7c5..565955e7 100644
--- a/base/modules/auxil/psi_i2_serial_mod.f90
+++ b/base/modules/auxil/psi_i2_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_i2_serial_mod
   use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_i2gelp(trans,iperm,x,info)
+    subroutine psb_m_i2gelp(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_i2pk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_i2gelp
-    subroutine psb_i2gelpv(trans,iperm,x,info)
+    end subroutine psb_m_i2gelp
+    subroutine psb_m_i2gelpv(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_i2pk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_i2gelpv
+    end subroutine psb_m_i2gelpv
+    subroutine psb_e_i2gelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_i2pk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_i2gelp
+    subroutine psb_e_i2gelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_i2pk_), intent(inout)     ::  x(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_i2gelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90
index 2acb7482..17ea8dc4 100644
--- a/base/modules/auxil/psi_m_serial_mod.f90
+++ b/base/modules/auxil/psi_m_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_m_serial_mod
   use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_mgelp(trans,iperm,x,info)
+    subroutine psb_m_mgelp(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_mpk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_mgelp
-    subroutine psb_mgelpv(trans,iperm,x,info)
+    end subroutine psb_m_mgelp
+    subroutine psb_m_mgelpv(trans,iperm,x,info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_mpk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_mgelpv
+    end subroutine psb_m_mgelpv
+    subroutine psb_e_mgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_mpk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_mgelp
+    subroutine psb_e_mgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
+      implicit none
+      integer(psb_mpk_), intent(inout)     ::  x(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_mgelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90
index ac3dbb62..ed7b5d9f 100644
--- a/base/modules/auxil/psi_s_serial_mod.f90
+++ b/base/modules/auxil/psi_s_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_s_serial_mod
   use psb_const_mod, only :  psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_spk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_sgelp(trans,iperm,x,info)
-      import :: psb_ipk_, psb_spk_
+    subroutine psb_m_sgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_, psb_spk_
       implicit none
       real(psb_spk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_sgelp
-    subroutine psb_sgelpv(trans,iperm,x,info)
-      import :: psb_ipk_, psb_spk_
+    end subroutine psb_m_sgelp
+    subroutine psb_m_sgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_,psb_spk_
+      implicit none
+      real(psb_spk_), intent(inout)     ::  x(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_m_sgelpv
+    subroutine psb_e_sgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_spk_
+      implicit none
+      real(psb_spk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_sgelp
+    subroutine psb_e_sgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_spk_
       implicit none
       real(psb_spk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_sgelpv
+    end subroutine psb_e_sgelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90
index ee148ef2..9de8451b 100644
--- a/base/modules/auxil/psi_z_serial_mod.f90
+++ b/base/modules/auxil/psi_z_serial_mod.f90
@@ -32,24 +32,40 @@
 module psi_z_serial_mod
   use psb_const_mod, only :  psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_dpk_
 
-  interface psb_gelp
+  interface psb_gelp 
     ! 2-D version
-    subroutine psb_zgelp(trans,iperm,x,info)
-      import :: psb_ipk_, psb_dpk_
+    subroutine psb_m_zgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_, psb_dpk_
       implicit none
       complex(psb_dpk_), intent(inout)     ::  x(:,:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_zgelp
-    subroutine psb_zgelpv(trans,iperm,x,info)
-      import :: psb_ipk_, psb_dpk_
+    end subroutine psb_m_zgelp
+    subroutine psb_m_zgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_mpk_,psb_dpk_
+      implicit none
+      complex(psb_dpk_), intent(inout)     ::  x(:)
+      integer(psb_mpk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_m_zgelpv
+    subroutine psb_e_zgelp(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_dpk_
+      implicit none
+      complex(psb_dpk_), intent(inout)     ::  x(:,:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
+      integer(psb_ipk_), intent(out)     ::  info
+      character, intent(in)              :: trans
+    end subroutine psb_e_zgelp
+    subroutine psb_e_zgelpv(trans,iperm,x,info)
+      import :: psb_ipk_, psb_epk_, psb_dpk_
       implicit none
       complex(psb_dpk_), intent(inout)     ::  x(:)
-      integer(psb_ipk_), intent(in)      ::  iperm(:)
+      integer(psb_epk_), intent(in)      ::  iperm(:)
       integer(psb_ipk_), intent(out)     ::  info
       character, intent(in)              :: trans
-    end subroutine psb_zgelpv
+    end subroutine psb_e_zgelpv
   end interface psb_gelp
 
   interface psb_geaxpby
diff --git a/base/modules/comm/psi_c_comm_v_mod.f90 b/base/modules/comm/psi_c_comm_v_mod.f90
index 47eb7fdf..7d10a028 100644
--- a/base/modules/comm/psi_c_comm_v_mod.f90
+++ b/base/modules/comm/psi_c_comm_v_mod.f90
@@ -56,11 +56,11 @@ module psi_c_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_cswapdata_multivect
-    subroutine psi_cswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_cswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_c_base_vect_type)             :: y
@@ -69,11 +69,11 @@ module psi_c_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_cswap_vidx_vect
-    subroutine psi_cswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_cswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_c_base_multivect_type)    :: y
@@ -106,11 +106,11 @@ module psi_c_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_cswaptran_multivect
-    subroutine psi_ctran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ctran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_c_base_vect_type)          :: y
@@ -119,11 +119,11 @@ module psi_c_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_ctran_vidx_vect
-    subroutine psi_ctran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ctran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_c_base_multivect_type)      :: y
diff --git a/base/modules/comm/psi_d_comm_v_mod.f90 b/base/modules/comm/psi_d_comm_v_mod.f90
index 6b7cdfd6..b7a902da 100644
--- a/base/modules/comm/psi_d_comm_v_mod.f90
+++ b/base/modules/comm/psi_d_comm_v_mod.f90
@@ -56,11 +56,11 @@ module psi_d_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_dswapdata_multivect
-    subroutine psi_dswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_dswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_d_base_vect_type)             :: y
@@ -69,11 +69,11 @@ module psi_d_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_dswap_vidx_vect
-    subroutine psi_dswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_dswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_d_base_multivect_type)    :: y
@@ -106,11 +106,11 @@ module psi_d_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_dswaptran_multivect
-    subroutine psi_dtran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_dtran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_d_base_vect_type)          :: y
@@ -119,11 +119,11 @@ module psi_d_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_dtran_vidx_vect
-    subroutine psi_dtran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_dtran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_d_base_multivect_type)      :: y
diff --git a/base/modules/comm/psi_i_comm_v_mod.f90 b/base/modules/comm/psi_i_comm_v_mod.f90
index 4072a6c4..2fe3948c 100644
--- a/base/modules/comm/psi_i_comm_v_mod.f90
+++ b/base/modules/comm/psi_i_comm_v_mod.f90
@@ -57,11 +57,11 @@ module psi_i_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_iswapdata_multivect
-    subroutine psi_iswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_iswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_i_base_vect_type)             :: y
@@ -70,11 +70,11 @@ module psi_i_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_iswap_vidx_vect
-    subroutine psi_iswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_iswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_i_base_multivect_type)    :: y
@@ -107,11 +107,11 @@ module psi_i_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_iswaptran_multivect
-    subroutine psi_itran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_itran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_i_base_vect_type)          :: y
@@ -120,11 +120,11 @@ module psi_i_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_itran_vidx_vect
-    subroutine psi_itran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_itran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_i_base_multivect_type)      :: y
diff --git a/base/modules/comm/psi_l_comm_v_mod.f90 b/base/modules/comm/psi_l_comm_v_mod.f90
index b3b55a0d..b61a17b7 100644
--- a/base/modules/comm/psi_l_comm_v_mod.f90
+++ b/base/modules/comm/psi_l_comm_v_mod.f90
@@ -58,11 +58,11 @@ module psi_l_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_lswapdata_multivect
-    subroutine psi_lswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_lswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_l_base_vect_type)             :: y
@@ -71,11 +71,11 @@ module psi_l_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_lswap_vidx_vect
-    subroutine psi_lswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_lswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_l_base_multivect_type)    :: y
@@ -108,11 +108,11 @@ module psi_l_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_lswaptran_multivect
-    subroutine psi_ltran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ltran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_l_base_vect_type)          :: y
@@ -121,11 +121,11 @@ module psi_l_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_ltran_vidx_vect
-    subroutine psi_ltran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ltran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_l_base_multivect_type)      :: y
diff --git a/base/modules/comm/psi_s_comm_v_mod.f90 b/base/modules/comm/psi_s_comm_v_mod.f90
index a2eb0bcf..1cf4d53e 100644
--- a/base/modules/comm/psi_s_comm_v_mod.f90
+++ b/base/modules/comm/psi_s_comm_v_mod.f90
@@ -56,11 +56,11 @@ module psi_s_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_sswapdata_multivect
-    subroutine psi_sswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_sswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_s_base_vect_type)             :: y
@@ -69,11 +69,11 @@ module psi_s_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_sswap_vidx_vect
-    subroutine psi_sswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_sswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_s_base_multivect_type)    :: y
@@ -106,11 +106,11 @@ module psi_s_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_sswaptran_multivect
-    subroutine psi_stran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_stran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_s_base_vect_type)          :: y
@@ -119,11 +119,11 @@ module psi_s_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_stran_vidx_vect
-    subroutine psi_stran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_stran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_s_base_multivect_type)      :: y
diff --git a/base/modules/comm/psi_z_comm_v_mod.f90 b/base/modules/comm/psi_z_comm_v_mod.f90
index 02c1b8d8..de8e1117 100644
--- a/base/modules/comm/psi_z_comm_v_mod.f90
+++ b/base/modules/comm/psi_z_comm_v_mod.f90
@@ -56,11 +56,11 @@ module psi_z_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional :: data
     end subroutine psi_zswapdata_multivect
-    subroutine psi_zswap_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_zswap_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)         :: ctxt
-      integer(psb_mpk_), intent(in)           :: iicomm
+      integer(psb_mpk_), intent(in)           :: icomm
       integer(psb_ipk_), intent(in)           :: flag
       integer(psb_ipk_), intent(out)          :: info
       class(psb_z_base_vect_type)             :: y
@@ -69,11 +69,11 @@ module psi_z_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_zswap_vidx_vect
-    subroutine psi_zswap_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_zswap_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_z_base_multivect_type)    :: y
@@ -106,11 +106,11 @@ module psi_z_comm_v_mod
       type(psb_desc_type), target :: desc_a
       integer(psb_ipk_), optional           :: data
     end subroutine psi_zswaptran_multivect
-    subroutine psi_ztran_vidx_vect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ztran_vidx_vect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_z_base_vect_type)          :: y
@@ -119,11 +119,11 @@ module psi_z_comm_v_mod
       class(psb_i_base_vect_type), intent(inout) :: idx
       integer(psb_ipk_), intent(in)           :: totxch,totsnd, totrcv
     end subroutine psi_ztran_vidx_vect
-    subroutine psi_ztran_vidx_multivect(ctxt,iicomm,flag,beta,y,idx,&
+    subroutine psi_ztran_vidx_multivect(ctxt,icomm,flag,beta,y,idx,&
          & totxch,totsnd,totrcv,work,info)
       import 
       type(psb_ctxt_type), intent(in)       :: ctxt
-      integer(psb_mpk_), intent(in)         :: iicomm
+      integer(psb_mpk_), intent(in)         :: icomm
       integer(psb_ipk_), intent(in)         :: flag
       integer(psb_ipk_), intent(out)        :: info
       class(psb_z_base_multivect_type)      :: y
diff --git a/base/modules/desc/psb_desc_mod.F90 b/base/modules/desc/psb_desc_mod.F90
index cc003759..c1c98d51 100644
--- a/base/modules/desc/psb_desc_mod.F90
+++ b/base/modules/desc/psb_desc_mod.F90
@@ -613,8 +613,10 @@ contains
     if (allocated(desc%indxmap)) then
       val = desc%indxmap%get_ctxt()    
     else
-      call psb_errpush(psb_err_invalid_cd_state_,'psb_cd_get_context')
-      call psb_error()
+      ! At this point, val should a non-ALLOCATED
+      ! ctxt component, which suits us just fine. 
+      !call psb_errpush(psb_err_invalid_cd_state_,'psb_cd_get_context')
+      !call psb_error()
     end if
   end function psb_cd_get_context
 
diff --git a/base/modules/desc/psb_indx_map_mod.f90 b/base/modules/desc/psb_indx_map_mod.f90
index d18458f0..0c0d8199 100644
--- a/base/modules/desc/psb_indx_map_mod.f90
+++ b/base/modules/desc/psb_indx_map_mod.f90
@@ -224,6 +224,7 @@ module psb_indx_map_mod
     generic, public          :: qry_halo_owner => qry_halo_owner_s, qry_halo_owner_v
     
     procedure, pass(idxmap)  :: fnd_owner => psi_indx_map_fnd_owner
+    procedure, pass(idxmap)  :: init_null => base_init_null 
     procedure, pass(idxmap)  :: init_vl   => base_init_vl
     generic, public          :: init      => init_vl
 
@@ -242,7 +243,7 @@ module psb_indx_map_mod
        & base_ll2gs1, base_ll2gs2, base_ll2gv1, base_ll2gv2,&
        & base_lg2ls1, base_lg2ls2, base_lg2lv1, base_lg2lv2,&
        & base_lg2ls1_ins, base_lg2ls2_ins, base_lg2lv1_ins,&
-       & base_lg2lv2_ins, base_init_vl, base_is_null,&
+       & base_lg2lv2_ins, base_init_vl, base_is_null, base_init_null, &
        & base_row_extendable, base_clone, base_cpy, base_reinit, &
        & base_set_halo_owner, base_get_halo_owner, &
        & base_qry_halo_owner_s, base_qry_halo_owner_v,&
@@ -1345,6 +1346,17 @@ contains
 
   end subroutine base_set_null
 
+  subroutine base_init_null(idxmap,ctxt,info)
+    class(psb_indx_map), intent(inout) :: idxmap
+    type(psb_ctxt_type), intent(in)  :: ctxt
+    integer(psb_ipk_), intent(out) :: info
+
+    call idxmap%set_null()
+    idxmap%ctxt = ctxt
+    info = 0
+    return
+  end subroutine base_init_null
+    
   subroutine base_init_vl(idxmap,ctxt,vl,info)
     use psb_penv_mod
     use psb_error_mod
@@ -1414,7 +1426,7 @@ contains
     call psb_get_erraction(err_act)
 
     outmap%state       = idxmap%state      
-    outmap%ctxt       = idxmap%ctxt      
+    outmap%ctxt        = idxmap%ctxt      
     outmap%mpic        = idxmap%mpic       
     outmap%global_rows = idxmap%global_rows
     outmap%global_cols = idxmap%global_cols
diff --git a/base/modules/penv/psi_d_collective_mod.F90 b/base/modules/penv/psi_d_collective_mod.F90
index eabe5b3f..12d5f38b 100644
--- a/base/modules/penv/psi_d_collective_mod.F90
+++ b/base/modules/penv/psi_d_collective_mod.F90
@@ -105,8 +105,6 @@ contains
     integer(psb_mpk_) :: root_
     real(psb_dpk_) :: dat_
     integer(psb_mpk_) :: iam, np, info, icomm
-    integer(psb_ipk_) :: iinfo
-
 
 #if !defined(SERIAL_MPI)
     call psb_info(ctxt,iam,np)
diff --git a/base/modules/penv/psi_e_collective_mod.F90 b/base/modules/penv/psi_e_collective_mod.F90
index 54d85347..215446c0 100644
--- a/base/modules/penv/psi_e_collective_mod.F90
+++ b/base/modules/penv/psi_e_collective_mod.F90
@@ -102,8 +102,6 @@ contains
     integer(psb_mpk_) :: root_
     integer(psb_epk_) :: dat_
     integer(psb_mpk_) :: iam, np, info, icomm
-    integer(psb_ipk_) :: iinfo
-
 
 #if !defined(SERIAL_MPI)
     call psb_info(ctxt,iam,np)
diff --git a/base/modules/penv/psi_i2_collective_mod.F90 b/base/modules/penv/psi_i2_collective_mod.F90
index 64a49ae3..781653d4 100644
--- a/base/modules/penv/psi_i2_collective_mod.F90
+++ b/base/modules/penv/psi_i2_collective_mod.F90
@@ -102,8 +102,6 @@ contains
     integer(psb_mpk_) :: root_
     integer(psb_i2pk_) :: dat_
     integer(psb_mpk_) :: iam, np, info, icomm
-    integer(psb_ipk_) :: iinfo
-
 
 #if !defined(SERIAL_MPI)
     call psb_info(ctxt,iam,np)
diff --git a/base/modules/penv/psi_m_collective_mod.F90 b/base/modules/penv/psi_m_collective_mod.F90
index 462a7221..8fdea824 100644
--- a/base/modules/penv/psi_m_collective_mod.F90
+++ b/base/modules/penv/psi_m_collective_mod.F90
@@ -102,8 +102,6 @@ contains
     integer(psb_mpk_) :: root_
     integer(psb_mpk_) :: dat_
     integer(psb_mpk_) :: iam, np, info, icomm
-    integer(psb_ipk_) :: iinfo
-
 
 #if !defined(SERIAL_MPI)
     call psb_info(ctxt,iam,np)
diff --git a/base/modules/penv/psi_p2p_mod.F90 b/base/modules/penv/psi_p2p_mod.F90
index 84438f96..f7262378 100644
--- a/base/modules/penv/psi_p2p_mod.F90
+++ b/base/modules/penv/psi_p2p_mod.F90
@@ -216,10 +216,11 @@ contains
     logical, intent(out)  :: dat(:)
     integer(psb_mpk_), intent(in)  :: src
     integer(psb_mpk_) :: info 
-    integer(psb_mpk_) :: status(mpi_status_size)
+    integer(psb_mpk_) :: status(mpi_status_size), icomm
 #if defined(SERIAL_MPI) 
 #else
-    call mpi_recv(dat,size(dat),mpi_logical,src,psb_logical_tag,ctxt,status,info)
+    icomm = psb_get_mpi_comm(ctxt)
+    call mpi_recv(dat,size(dat),mpi_logical,src,psb_logical_tag,icomm,status,info)
     call psb_test_nodes(psb_mesg_queue)
 #endif    
 
diff --git a/base/modules/penv/psi_s_collective_mod.F90 b/base/modules/penv/psi_s_collective_mod.F90
index 30a10524..82f96aac 100644
--- a/base/modules/penv/psi_s_collective_mod.F90
+++ b/base/modules/penv/psi_s_collective_mod.F90
@@ -105,8 +105,6 @@ contains
     integer(psb_mpk_) :: root_
     real(psb_spk_) :: dat_
     integer(psb_mpk_) :: iam, np, info, icomm
-    integer(psb_ipk_) :: iinfo
-
 
 #if !defined(SERIAL_MPI)
     call psb_info(ctxt,iam,np)
diff --git a/base/modules/psi_i_mod.F90 b/base/modules/psi_i_mod.F90
index 31e5d461..e8cf2706 100644
--- a/base/modules/psi_i_mod.F90
+++ b/base/modules/psi_i_mod.F90
@@ -110,13 +110,13 @@ module psi_i_mod
   end interface
 
   interface  psi_bld_glb_dep_list
-    subroutine psi_i_bld_glb_csr_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info)
+    subroutine psi_i_bld_glb_dep_list(ctxt,loc_dl,length_dl,c_dep_list,dl_ptr,info)
       import
       type(psb_ctxt_type), intent(in) :: ctxt
-      integer(psb_ipk_), intent(in)   :: loc_dl(:), length_dl(0:)
+      integer(psb_ipk_), intent(in)     :: loc_dl(:), length_dl(0:)
       integer(psb_ipk_), allocatable, intent(out) :: c_dep_list(:), dl_ptr(:)
-      integer(psb_ipk_), intent(out)  :: info
-    end subroutine psi_i_bld_glb_csr_dep_list
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psi_i_bld_glb_dep_list
   end interface
 
   interface  psi_extract_loc_dl
diff --git a/base/modules/serial/psb_c_base_vect_mod.f90 b/base/modules/serial/psb_c_base_vect_mod.F90
similarity index 97%
rename from base/modules/serial/psb_c_base_vect_mod.f90
rename to base/modules/serial/psb_c_base_vect_mod.F90
index f59e238f..e68fef6c 100644
--- a/base/modules/serial/psb_c_base_vect_mod.f90
+++ b/base/modules/serial/psb_c_base_vect_mod.F90
@@ -266,14 +266,21 @@ contains
     complex(psb_spk_), intent(in) :: this(:)
     class(psb_c_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine c_base_bld_x
 
   !
@@ -403,7 +410,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -783,7 +789,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -794,7 +800,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function c_base_get_vect
 
   !
@@ -812,7 +826,7 @@ contains
     complex(psb_spk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -820,7 +834,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine c_base_set_scal
@@ -838,19 +859,27 @@ contains
     complex(psb_spk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine c_base_set_vect
@@ -888,9 +917,18 @@ contains
     implicit none
     class(psb_c_base_vect_type), intent(inout)  :: x
 
+    integer(psb_ipk_) :: i
+    
     if (allocated(x%v)) then
       if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1, size(x%v)
+        x%v(i) =  abs(x%v(i))
+      end do
+#else
       x%v =  abs(x%v)
+#endif
       call x%set_host()
     end if
 
@@ -1132,6 +1170,7 @@ contains
     info = 0
     if (y%is_dev()) call y%sync()
     n = min(size(y%v), size(x))
+    !$omp parallel do private(i)    
     do i=1, n
       y%v(i) = y%v(i)*x(i)
     end do
@@ -1169,6 +1208,7 @@ contains
       if (beta == cone) then
         return
       else
+        !$omp parallel do private(i)    
         do i=1, n
           z%v(i) = beta*z%v(i)
         end do
@@ -1176,42 +1216,51 @@ contains
     else
       if (alpha == cone) then
         if (beta == czero) then
+          !$omp parallel do private(i)    
           do i=1, n
             z%v(i) = y(i)*x(i)
           end do
         else if (beta == cone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + y(i)*x(i)
           end do
         end if
       else if (alpha == -cone) then
         if (beta == czero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = -y(i)*x(i)
           end do
         else if (beta == cone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) - y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) - y(i)*x(i)
           end do
         end if
       else
         if (beta == czero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = alpha*y(i)*x(i)
           end do
         else if (beta == cone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + alpha*y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
           end do
@@ -1314,7 +1363,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info)
 
-
   end subroutine c_base_div_v
   !
   !> Function  base_div_v2
@@ -1358,7 +1406,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info,flag)
 
-
   end subroutine c_base_div_v_check
   !
   !> Function  base_div_v2_check
@@ -1381,7 +1428,6 @@ contains
     if (z%is_dev()) call z%sync()
     call z%div(x%v,y%v,info,flag)
 
-
   end subroutine c_base_div_v2_check
   !
   !> Function  base_div_a2
@@ -1403,6 +1449,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = min(size(y), size(x))
+    !$omp parallel do private(i)
     do i=1, n
       z%v(i) = x(i)/y(i)
     end do
@@ -1433,6 +1480,7 @@ contains
       if (z%is_dev()) call z%sync()
 
       n = min(size(y), size(x))
+      !  $omp parallel do private(i)
       do i=1, n
         if (y(i) /= 0) then
           z%v(i) = x(i)/y(i)
@@ -1443,7 +1491,6 @@ contains
       end do
     end if
 
-
   end subroutine c_base_div_a2_check
   !
   !> Function  base_inv_v
@@ -1487,7 +1534,6 @@ contains
     if (y%is_dev()) call y%sync()
     call y%inv(x%v,info,flag)
 
-
   end subroutine c_base_inv_v_check
   !
   !> Function  base_inv_a2
@@ -1509,6 +1555,7 @@ contains
     if (y%is_dev()) call y%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i=1, n
       y%v(i) = 1_psb_spk_/x(i)
     end do
@@ -1539,6 +1586,7 @@ contains
       if (y%is_dev()) call y%sync()
 
       n = size(x)
+      !$omp parallel do private(i)
       do i=1, n
         if (x(i) /= 0) then
           y%v(i) = 1_psb_spk_/x(i)
@@ -1573,6 +1621,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i = 1, n, 1
       if ( abs(x(i)).ge.c ) then
         z%v(i) = 1_psb_spk_
@@ -1618,14 +1667,21 @@ contains
     implicit none
     class(psb_c_base_vect_type), intent(inout)  :: x
     complex(psb_spk_), intent (in)       :: alpha
+    integer(psb_ipk_) :: i
 
     if (allocated(x%v)) then
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1,size(x%v)
+        x%v(i) = alpha*x%v(i)
+      end do
+#else
       x%v = alpha*x%v
-      call x%set_host()
+#endif
     end if
-
+    call x%set_host()
   end subroutine c_base_scal
-
+  
   !
   ! Norms 1, 2 and infinity
   !
@@ -1655,10 +1711,18 @@ contains
     class(psb_c_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_spk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = szero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, n
+      res = max(res,abs(x%v(i)))
+    end do
+#else
     res =  maxval(abs(x%v(1:n)))
-
+#endif
   end function c_base_amax
 
 
@@ -1672,10 +1736,18 @@ contains
     class(psb_c_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_spk_)                :: res
-
+    integer(psb_ipk_) :: i
+    
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res=szero
+    !$omp parallel do private(i) reduction(+: res)
+    do i= 1, size(x%v)
+      res = res + abs(x%v(i))
+    end do
+#else
     res =  sum(abs(x%v(1:n)))
-
+#endif
   end function c_base_asum
 
 
@@ -1882,13 +1954,17 @@ contains
     integer(psb_ipk_) :: i, n
 
     if (z%is_dev()) call z%sync()
-
+#if defined(OPENMP)
     n = size(x)
-    do i = 1, n, 1
-        z%v(i) = x(i) + b
+    !$omp parallel do private(i)
+    do i = 1, n
+      z%v(i) = x(i) + b
     end do
+#else
+    z%v = x + b
+#endif
     info = 0
-
+    
   end subroutine c_base_addconst_a2
   !
   !> Function  _base_addconst_v2
@@ -1914,9 +1990,6 @@ contains
 end module psb_c_base_vect_mod
 
 
-
-
-
 module psb_c_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_c_mat_mod.F90 b/base/modules/serial/psb_c_mat_mod.F90
index 76225758..5e889da2 100644
--- a/base/modules/serial/psb_c_mat_mod.F90
+++ b/base/modules/serial/psb_c_mat_mod.F90
@@ -128,6 +128,7 @@ module psb_c_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_c_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_c_free
     procedure, pass(a) :: trim        => psb_c_trim
     procedure, pass(a) :: csput_a     => psb_c_csput_a
@@ -326,6 +327,7 @@ module psb_c_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_lc_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_lc_free
     procedure, pass(a) :: trim        => psb_lc_trim
     procedure, pass(a) :: csput_a     => psb_lc_csput_a
@@ -604,12 +606,14 @@ module psb_c_mat_mod
   end interface
 
   interface
-    subroutine psb_c_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_cspmat_type
+    subroutine psb_c_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_cspmat_type, psb_c_base_sparse_mat
       class(psb_cspmat_type), intent(inout) :: a
       integer(psb_ipk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_ipk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_c_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_c_csall
   end interface
 
@@ -1384,12 +1388,14 @@ module psb_c_mat_mod
   end interface
 
   interface
-    subroutine psb_lc_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_lcspmat_type
+    subroutine psb_lc_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_lcspmat_type, psb_lc_base_sparse_mat
       class(psb_lcspmat_type), intent(inout) :: a
       integer(psb_lpk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_lpk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_lc_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_lc_csall
   end interface
 
diff --git a/base/modules/serial/psb_d_base_vect_mod.f90 b/base/modules/serial/psb_d_base_vect_mod.F90
similarity index 97%
rename from base/modules/serial/psb_d_base_vect_mod.f90
rename to base/modules/serial/psb_d_base_vect_mod.F90
index daf12cbf..09fd187b 100644
--- a/base/modules/serial/psb_d_base_vect_mod.f90
+++ b/base/modules/serial/psb_d_base_vect_mod.F90
@@ -273,14 +273,21 @@ contains
     real(psb_dpk_), intent(in) :: this(:)
     class(psb_d_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine d_base_bld_x
 
   !
@@ -410,7 +417,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -790,7 +796,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -801,7 +807,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function d_base_get_vect
 
   !
@@ -819,7 +833,7 @@ contains
     real(psb_dpk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -827,7 +841,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine d_base_set_scal
@@ -845,19 +866,27 @@ contains
     real(psb_dpk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine d_base_set_vect
@@ -895,9 +924,18 @@ contains
     implicit none
     class(psb_d_base_vect_type), intent(inout)  :: x
 
+    integer(psb_ipk_) :: i
+    
     if (allocated(x%v)) then
       if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1, size(x%v)
+        x%v(i) =  abs(x%v(i))
+      end do
+#else
       x%v =  abs(x%v)
+#endif
       call x%set_host()
     end if
 
@@ -1139,6 +1177,7 @@ contains
     info = 0
     if (y%is_dev()) call y%sync()
     n = min(size(y%v), size(x))
+    !$omp parallel do private(i)    
     do i=1, n
       y%v(i) = y%v(i)*x(i)
     end do
@@ -1176,6 +1215,7 @@ contains
       if (beta == done) then
         return
       else
+        !$omp parallel do private(i)    
         do i=1, n
           z%v(i) = beta*z%v(i)
         end do
@@ -1183,42 +1223,51 @@ contains
     else
       if (alpha == done) then
         if (beta == dzero) then
+          !$omp parallel do private(i)    
           do i=1, n
             z%v(i) = y(i)*x(i)
           end do
         else if (beta == done) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + y(i)*x(i)
           end do
         end if
       else if (alpha == -done) then
         if (beta == dzero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = -y(i)*x(i)
           end do
         else if (beta == done) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) - y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) - y(i)*x(i)
           end do
         end if
       else
         if (beta == dzero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = alpha*y(i)*x(i)
           end do
         else if (beta == done) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + alpha*y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
           end do
@@ -1321,7 +1370,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info)
 
-
   end subroutine d_base_div_v
   !
   !> Function  base_div_v2
@@ -1365,7 +1413,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info,flag)
 
-
   end subroutine d_base_div_v_check
   !
   !> Function  base_div_v2_check
@@ -1388,7 +1435,6 @@ contains
     if (z%is_dev()) call z%sync()
     call z%div(x%v,y%v,info,flag)
 
-
   end subroutine d_base_div_v2_check
   !
   !> Function  base_div_a2
@@ -1410,6 +1456,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = min(size(y), size(x))
+    !$omp parallel do private(i)
     do i=1, n
       z%v(i) = x(i)/y(i)
     end do
@@ -1440,6 +1487,7 @@ contains
       if (z%is_dev()) call z%sync()
 
       n = min(size(y), size(x))
+      !  $omp parallel do private(i)
       do i=1, n
         if (y(i) /= 0) then
           z%v(i) = x(i)/y(i)
@@ -1450,7 +1498,6 @@ contains
       end do
     end if
 
-
   end subroutine d_base_div_a2_check
   !
   !> Function  base_inv_v
@@ -1494,7 +1541,6 @@ contains
     if (y%is_dev()) call y%sync()
     call y%inv(x%v,info,flag)
 
-
   end subroutine d_base_inv_v_check
   !
   !> Function  base_inv_a2
@@ -1516,6 +1562,7 @@ contains
     if (y%is_dev()) call y%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i=1, n
       y%v(i) = 1_psb_dpk_/x(i)
     end do
@@ -1546,6 +1593,7 @@ contains
       if (y%is_dev()) call y%sync()
 
       n = size(x)
+      !$omp parallel do private(i)
       do i=1, n
         if (x(i) /= 0) then
           y%v(i) = 1_psb_dpk_/x(i)
@@ -1580,6 +1628,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i = 1, n, 1
       if ( abs(x(i)).ge.c ) then
         z%v(i) = 1_psb_dpk_
@@ -1625,14 +1674,21 @@ contains
     implicit none
     class(psb_d_base_vect_type), intent(inout)  :: x
     real(psb_dpk_), intent (in)       :: alpha
+    integer(psb_ipk_) :: i
 
     if (allocated(x%v)) then
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1,size(x%v)
+        x%v(i) = alpha*x%v(i)
+      end do
+#else
       x%v = alpha*x%v
-      call x%set_host()
+#endif
     end if
-
+    call x%set_host()
   end subroutine d_base_scal
-
+  
   !
   ! Norms 1, 2 and infinity
   !
@@ -1662,10 +1718,18 @@ contains
     class(psb_d_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_dpk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = dzero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, n
+      res = max(res,abs(x%v(i)))
+    end do
+#else
     res =  maxval(abs(x%v(1:n)))
-
+#endif
   end function d_base_amax
 
   !
@@ -1678,10 +1742,18 @@ contains
     class(psb_d_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_dpk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = HUGE(done)
+    !$omp parallel do private(i) reduction(min: res)
+    do i=1, n
+      res = min(res,abs(x%v(i)))
+    end do
+#else
     res =  minval(x%v(1:n))
-
+#endif
   end function d_base_min
 
   !
@@ -1730,10 +1802,11 @@ contains
 
     z = huge(z)
     n = min(size(y), size(x%v))
+    !$omp parallel do private(i,temp) reduction(min: z)
     do i=1, n
       if ( y(i) /= dzero ) then
         temp = x%v(i)/y(i)
-        if (temp <= z)  z = temp
+        z = min(z,temp)
       end if
     end do
 
@@ -1750,10 +1823,18 @@ contains
     class(psb_d_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_dpk_)                :: res
-
+    integer(psb_ipk_) :: i
+    
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res=dzero
+    !$omp parallel do private(i) reduction(+: res)
+    do i= 1, size(x%v)
+      res = res + abs(x%v(i))
+    end do
+#else
     res =  sum(abs(x%v(1:n)))
-
+#endif
   end function d_base_asum
 
 
@@ -2052,13 +2133,17 @@ contains
     integer(psb_ipk_) :: i, n
 
     if (z%is_dev()) call z%sync()
-
+#if defined(OPENMP)
     n = size(x)
-    do i = 1, n, 1
-        z%v(i) = x(i) + b
+    !$omp parallel do private(i)
+    do i = 1, n
+      z%v(i) = x(i) + b
     end do
+#else
+    z%v = x + b
+#endif
     info = 0
-
+    
   end subroutine d_base_addconst_a2
   !
   !> Function  _base_addconst_v2
@@ -2084,9 +2169,6 @@ contains
 end module psb_d_base_vect_mod
 
 
-
-
-
 module psb_d_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_d_mat_mod.F90 b/base/modules/serial/psb_d_mat_mod.F90
index 878d099f..caf03994 100644
--- a/base/modules/serial/psb_d_mat_mod.F90
+++ b/base/modules/serial/psb_d_mat_mod.F90
@@ -128,6 +128,7 @@ module psb_d_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_d_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_d_free
     procedure, pass(a) :: trim        => psb_d_trim
     procedure, pass(a) :: csput_a     => psb_d_csput_a
@@ -326,6 +327,7 @@ module psb_d_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_ld_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_ld_free
     procedure, pass(a) :: trim        => psb_ld_trim
     procedure, pass(a) :: csput_a     => psb_ld_csput_a
@@ -604,12 +606,14 @@ module psb_d_mat_mod
   end interface
 
   interface
-    subroutine psb_d_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_dspmat_type
+    subroutine psb_d_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_dspmat_type, psb_d_base_sparse_mat
       class(psb_dspmat_type), intent(inout) :: a
       integer(psb_ipk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_ipk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_d_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_d_csall
   end interface
 
@@ -1384,12 +1388,14 @@ module psb_d_mat_mod
   end interface
 
   interface
-    subroutine psb_ld_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_ldspmat_type
+    subroutine psb_ld_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_ldspmat_type, psb_ld_base_sparse_mat
       class(psb_ldspmat_type), intent(inout) :: a
       integer(psb_lpk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_lpk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_ld_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_ld_csall
   end interface
 
diff --git a/base/modules/serial/psb_i_base_vect_mod.f90 b/base/modules/serial/psb_i_base_vect_mod.F90
similarity index 98%
rename from base/modules/serial/psb_i_base_vect_mod.f90
rename to base/modules/serial/psb_i_base_vect_mod.F90
index 55d7b47e..0289ecd0 100644
--- a/base/modules/serial/psb_i_base_vect_mod.f90
+++ b/base/modules/serial/psb_i_base_vect_mod.F90
@@ -202,14 +202,21 @@ contains
     integer(psb_ipk_), intent(in) :: this(:)
     class(psb_i_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine i_base_bld_x
 
   !
@@ -339,7 +346,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -719,7 +725,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -730,7 +736,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function i_base_get_vect
 
   !
@@ -748,7 +762,7 @@ contains
     integer(psb_ipk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -756,7 +770,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine i_base_set_scal
@@ -774,19 +795,27 @@ contains
     integer(psb_ipk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine i_base_set_vect
@@ -980,9 +1009,6 @@ contains
 end module psb_i_base_vect_mod
 
 
-
-
-
 module psb_i_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_l_base_vect_mod.f90 b/base/modules/serial/psb_l_base_vect_mod.F90
similarity index 98%
rename from base/modules/serial/psb_l_base_vect_mod.f90
rename to base/modules/serial/psb_l_base_vect_mod.F90
index 53b45f2a..d8654f63 100644
--- a/base/modules/serial/psb_l_base_vect_mod.f90
+++ b/base/modules/serial/psb_l_base_vect_mod.F90
@@ -203,14 +203,21 @@ contains
     integer(psb_lpk_), intent(in) :: this(:)
     class(psb_l_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine l_base_bld_x
 
   !
@@ -340,7 +347,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -720,7 +726,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -731,7 +737,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function l_base_get_vect
 
   !
@@ -749,7 +763,7 @@ contains
     integer(psb_lpk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -757,7 +771,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine l_base_set_scal
@@ -775,19 +796,27 @@ contains
     integer(psb_lpk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine l_base_set_vect
@@ -981,9 +1010,6 @@ contains
 end module psb_l_base_vect_mod
 
 
-
-
-
 module psb_l_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_s_base_vect_mod.f90 b/base/modules/serial/psb_s_base_vect_mod.F90
similarity index 97%
rename from base/modules/serial/psb_s_base_vect_mod.f90
rename to base/modules/serial/psb_s_base_vect_mod.F90
index c185e341..231b1dc7 100644
--- a/base/modules/serial/psb_s_base_vect_mod.f90
+++ b/base/modules/serial/psb_s_base_vect_mod.F90
@@ -273,14 +273,21 @@ contains
     real(psb_spk_), intent(in) :: this(:)
     class(psb_s_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine s_base_bld_x
 
   !
@@ -410,7 +417,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -790,7 +796,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -801,7 +807,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function s_base_get_vect
 
   !
@@ -819,7 +833,7 @@ contains
     real(psb_spk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -827,7 +841,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine s_base_set_scal
@@ -845,19 +866,27 @@ contains
     real(psb_spk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine s_base_set_vect
@@ -895,9 +924,18 @@ contains
     implicit none
     class(psb_s_base_vect_type), intent(inout)  :: x
 
+    integer(psb_ipk_) :: i
+    
     if (allocated(x%v)) then
       if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1, size(x%v)
+        x%v(i) =  abs(x%v(i))
+      end do
+#else
       x%v =  abs(x%v)
+#endif
       call x%set_host()
     end if
 
@@ -1139,6 +1177,7 @@ contains
     info = 0
     if (y%is_dev()) call y%sync()
     n = min(size(y%v), size(x))
+    !$omp parallel do private(i)    
     do i=1, n
       y%v(i) = y%v(i)*x(i)
     end do
@@ -1176,6 +1215,7 @@ contains
       if (beta == sone) then
         return
       else
+        !$omp parallel do private(i)    
         do i=1, n
           z%v(i) = beta*z%v(i)
         end do
@@ -1183,42 +1223,51 @@ contains
     else
       if (alpha == sone) then
         if (beta == szero) then
+          !$omp parallel do private(i)    
           do i=1, n
             z%v(i) = y(i)*x(i)
           end do
         else if (beta == sone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + y(i)*x(i)
           end do
         end if
       else if (alpha == -sone) then
         if (beta == szero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = -y(i)*x(i)
           end do
         else if (beta == sone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) - y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) - y(i)*x(i)
           end do
         end if
       else
         if (beta == szero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = alpha*y(i)*x(i)
           end do
         else if (beta == sone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + alpha*y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
           end do
@@ -1321,7 +1370,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info)
 
-
   end subroutine s_base_div_v
   !
   !> Function  base_div_v2
@@ -1365,7 +1413,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info,flag)
 
-
   end subroutine s_base_div_v_check
   !
   !> Function  base_div_v2_check
@@ -1388,7 +1435,6 @@ contains
     if (z%is_dev()) call z%sync()
     call z%div(x%v,y%v,info,flag)
 
-
   end subroutine s_base_div_v2_check
   !
   !> Function  base_div_a2
@@ -1410,6 +1456,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = min(size(y), size(x))
+    !$omp parallel do private(i)
     do i=1, n
       z%v(i) = x(i)/y(i)
     end do
@@ -1440,6 +1487,7 @@ contains
       if (z%is_dev()) call z%sync()
 
       n = min(size(y), size(x))
+      !  $omp parallel do private(i)
       do i=1, n
         if (y(i) /= 0) then
           z%v(i) = x(i)/y(i)
@@ -1450,7 +1498,6 @@ contains
       end do
     end if
 
-
   end subroutine s_base_div_a2_check
   !
   !> Function  base_inv_v
@@ -1494,7 +1541,6 @@ contains
     if (y%is_dev()) call y%sync()
     call y%inv(x%v,info,flag)
 
-
   end subroutine s_base_inv_v_check
   !
   !> Function  base_inv_a2
@@ -1516,6 +1562,7 @@ contains
     if (y%is_dev()) call y%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i=1, n
       y%v(i) = 1_psb_spk_/x(i)
     end do
@@ -1546,6 +1593,7 @@ contains
       if (y%is_dev()) call y%sync()
 
       n = size(x)
+      !$omp parallel do private(i)
       do i=1, n
         if (x(i) /= 0) then
           y%v(i) = 1_psb_spk_/x(i)
@@ -1580,6 +1628,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i = 1, n, 1
       if ( abs(x(i)).ge.c ) then
         z%v(i) = 1_psb_spk_
@@ -1625,14 +1674,21 @@ contains
     implicit none
     class(psb_s_base_vect_type), intent(inout)  :: x
     real(psb_spk_), intent (in)       :: alpha
+    integer(psb_ipk_) :: i
 
     if (allocated(x%v)) then
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1,size(x%v)
+        x%v(i) = alpha*x%v(i)
+      end do
+#else
       x%v = alpha*x%v
-      call x%set_host()
+#endif
     end if
-
+    call x%set_host()
   end subroutine s_base_scal
-
+  
   !
   ! Norms 1, 2 and infinity
   !
@@ -1662,10 +1718,18 @@ contains
     class(psb_s_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_spk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = szero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, n
+      res = max(res,abs(x%v(i)))
+    end do
+#else
     res =  maxval(abs(x%v(1:n)))
-
+#endif
   end function s_base_amax
 
   !
@@ -1678,10 +1742,18 @@ contains
     class(psb_s_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_spk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = HUGE(sone)
+    !$omp parallel do private(i) reduction(min: res)
+    do i=1, n
+      res = min(res,abs(x%v(i)))
+    end do
+#else
     res =  minval(x%v(1:n))
-
+#endif
   end function s_base_min
 
   !
@@ -1730,10 +1802,11 @@ contains
 
     z = huge(z)
     n = min(size(y), size(x%v))
+    !$omp parallel do private(i,temp) reduction(min: z)
     do i=1, n
       if ( y(i) /= szero ) then
         temp = x%v(i)/y(i)
-        if (temp <= z)  z = temp
+        z = min(z,temp)
       end if
     end do
 
@@ -1750,10 +1823,18 @@ contains
     class(psb_s_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_spk_)                :: res
-
+    integer(psb_ipk_) :: i
+    
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res=szero
+    !$omp parallel do private(i) reduction(+: res)
+    do i= 1, size(x%v)
+      res = res + abs(x%v(i))
+    end do
+#else
     res =  sum(abs(x%v(1:n)))
-
+#endif
   end function s_base_asum
 
 
@@ -2052,13 +2133,17 @@ contains
     integer(psb_ipk_) :: i, n
 
     if (z%is_dev()) call z%sync()
-
+#if defined(OPENMP)
     n = size(x)
-    do i = 1, n, 1
-        z%v(i) = x(i) + b
+    !$omp parallel do private(i)
+    do i = 1, n
+      z%v(i) = x(i) + b
     end do
+#else
+    z%v = x + b
+#endif
     info = 0
-
+    
   end subroutine s_base_addconst_a2
   !
   !> Function  _base_addconst_v2
@@ -2084,9 +2169,6 @@ contains
 end module psb_s_base_vect_mod
 
 
-
-
-
 module psb_s_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_s_mat_mod.F90 b/base/modules/serial/psb_s_mat_mod.F90
index 3553e96b..8e3934b8 100644
--- a/base/modules/serial/psb_s_mat_mod.F90
+++ b/base/modules/serial/psb_s_mat_mod.F90
@@ -128,6 +128,7 @@ module psb_s_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_s_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_s_free
     procedure, pass(a) :: trim        => psb_s_trim
     procedure, pass(a) :: csput_a     => psb_s_csput_a
@@ -326,6 +327,7 @@ module psb_s_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_ls_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_ls_free
     procedure, pass(a) :: trim        => psb_ls_trim
     procedure, pass(a) :: csput_a     => psb_ls_csput_a
@@ -604,12 +606,14 @@ module psb_s_mat_mod
   end interface
 
   interface
-    subroutine psb_s_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_sspmat_type
+    subroutine psb_s_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_sspmat_type, psb_s_base_sparse_mat
       class(psb_sspmat_type), intent(inout) :: a
       integer(psb_ipk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_ipk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_s_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_s_csall
   end interface
 
@@ -1384,12 +1388,14 @@ module psb_s_mat_mod
   end interface
 
   interface
-    subroutine psb_ls_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_lsspmat_type
+    subroutine psb_ls_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_lsspmat_type, psb_ls_base_sparse_mat
       class(psb_lsspmat_type), intent(inout) :: a
       integer(psb_lpk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_lpk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_ls_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_ls_csall
   end interface
 
diff --git a/base/modules/serial/psb_serial_mod.f90 b/base/modules/serial/psb_serial_mod.f90
index 2f2154e0..627b318e 100644
--- a/base/modules/serial/psb_serial_mod.f90
+++ b/base/modules/serial/psb_serial_mod.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 module psb_serial_mod
   use psb_const_mod
   use psb_error_mod
@@ -66,9 +66,42 @@ module psb_serial_mod
       real(psb_dpk_), intent (in)       :: alpha, beta
       integer(psb_ipk_), intent(out)              :: info
     end subroutine psb_d_nspaxpby
+    subroutine psb_s_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info)
+      use psb_const_mod
+      integer(psb_ipk_), intent(out)              :: nz
+      integer(psb_ipk_), intent(out)              :: iz(:)
+      real(psb_spk_), intent (out)      :: z(:)
+      integer(psb_ipk_), intent(in)               :: nx, ny
+      integer(psb_ipk_), intent(in)               :: ix(:), iy(:)
+      real(psb_spk_), intent (in)       :: x(:), y(:)
+      real(psb_spk_), intent (in)       :: alpha, beta
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_s_nspaxpby
+    subroutine psb_c_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info)
+      use psb_const_mod
+      integer(psb_ipk_), intent(out)              :: nz
+      integer(psb_ipk_), intent(out)              :: iz(:)
+      complex(psb_spk_), intent (out)      :: z(:)
+      integer(psb_ipk_), intent(in)               :: nx, ny
+      integer(psb_ipk_), intent(in)               :: ix(:), iy(:)
+      complex(psb_spk_), intent (in)       :: x(:), y(:)
+      complex(psb_spk_), intent (in)       :: alpha, beta
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_c_nspaxpby
+    subroutine psb_z_nspaxpby(nz,iz,z,alpha, nx, ix, x, beta, ny,iy,y,info)
+      use psb_const_mod
+      integer(psb_ipk_), intent(out)              :: nz
+      integer(psb_ipk_), intent(out)              :: iz(:)
+      complex(psb_dpk_), intent (out)      :: z(:)
+      integer(psb_ipk_), intent(in)               :: nx, ny
+      integer(psb_ipk_), intent(in)               :: ix(:), iy(:)
+      complex(psb_dpk_), intent (in)       :: x(:), y(:)
+      complex(psb_dpk_), intent (in)       :: alpha, beta
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_z_nspaxpby
   end interface psb_nspaxpby
 
-  interface 
+  interface
     subroutine symbmm (n, m, l, ia, ja, diaga, &
          & ib, jb, diagb, ic, jc, diagc, index)
       import :: psb_ipk_
@@ -84,7 +117,7 @@ module psb_serial_mod
       integer(psb_lpk_), allocatable :: ic(:),jc(:)
     end subroutine lsymbmm
   end interface
-  
+
 
 contains
 
@@ -103,13 +136,13 @@ contains
   elemental function psb_cnrm1(x) result(res)
     complex(psb_spk_), intent(in)  :: x
     real(psb_spk_)                 :: res
-    res = abs( real( x ) ) + abs( aimag( x ) )  
+    res = abs( real( x ) ) + abs( aimag( x ) )
   end function psb_cnrm1
 
   elemental function psb_znrm1(x) result(res)
     complex(psb_dpk_), intent(in)  :: x
     real(psb_dpk_)                 :: res
-    res = abs( real( x ) ) + abs( aimag( x ) )  
+    res = abs( real( x ) ) + abs( aimag( x ) )
   end function psb_znrm1
 
   elemental function psb_sminreal(x) result(res)
@@ -127,13 +160,13 @@ contains
   elemental function psb_cminreal(x) result(res)
     complex(psb_spk_), intent(in)  :: x
     real(psb_spk_)                 :: res
-    res = min( real( x ) , aimag( x ) )  
+    res = min( real( x ) , aimag( x ) )
   end function psb_cminreal
 
   elemental function psb_zminreal(x) result(res)
     complex(psb_dpk_), intent(in)  :: x
     real(psb_dpk_)                 :: res
-    res = min( real( x ) , aimag( x ) )  
+    res = min( real( x ) , aimag( x ) )
   end function psb_zminreal
 
 
@@ -197,7 +230,7 @@ contains
     !     .. executable statements ..
     !
     if( n <= 0 ) return
-    if( incx == 1 .and. incy == 1 ) then 
+    if( incx == 1 .and. incy == 1 ) then
       !
       !     code for both increments equal to 1
       !
@@ -232,7 +265,7 @@ contains
     real(psb_spk_) norm,scale
     complex(psb_spk_) alpha
     !
-    if (cabs(ca) == 0.0) then 
+    if (cabs(ca) == 0.0) then
       !
       c = 0.0d0
       s = (1.0,0.0)
@@ -316,7 +349,7 @@ contains
     !     .. executable statements ..
     !
     if( n <= 0 ) return
-    if( incx == 1 .and. incy == 1 ) then 
+    if( incx == 1 .and. incy == 1 ) then
       !
       !     code for both increments equal to 1
       !
@@ -351,7 +384,7 @@ contains
     real(psb_dpk_) norm,scale
     complex(psb_dpk_) alpha
     !
-    if (cdabs(ca) == 0.0d0) then 
+    if (cdabs(ca) == 0.0d0) then
       !
       c = 0.0d0
       s = (1.0d0,0.0d0)
@@ -374,4 +407,3 @@ contains
 
 
 end module psb_serial_mod
-
diff --git a/base/modules/serial/psb_z_base_vect_mod.f90 b/base/modules/serial/psb_z_base_vect_mod.F90
similarity index 97%
rename from base/modules/serial/psb_z_base_vect_mod.f90
rename to base/modules/serial/psb_z_base_vect_mod.F90
index 1daed233..08cfb840 100644
--- a/base/modules/serial/psb_z_base_vect_mod.f90
+++ b/base/modules/serial/psb_z_base_vect_mod.F90
@@ -266,14 +266,21 @@ contains
     complex(psb_dpk_), intent(in) :: this(:)
     class(psb_z_base_vect_type), intent(inout) :: x
     integer(psb_ipk_) :: info
-
+    integer(psb_ipk_) :: i
+    
     call psb_realloc(size(this),x%v,info)
     if (info /= 0) then
       call psb_errpush(psb_err_alloc_dealloc_,'base_vect_bld')
       return
     end if
+#if defined (OPENMP)
+    !$omp parallel do private(i)
+    do i = 1, size(this)
+      x%v(i) = this(i)
+    end do
+#else
     x%v(:)  = this(:)
-
+#endif
   end subroutine z_base_bld_x
 
   !
@@ -403,7 +410,6 @@ contains
       case(psb_dupl_ovwrt_)
         do i = 1, n
           !loop over all val's rows
-
           ! row actual block row
           if ((1 <= irl(i)).and.(irl(i) <= isz)) then
             ! this row belongs to me
@@ -783,7 +789,7 @@ contains
     integer(psb_ipk_) :: info
     integer(psb_ipk_), optional :: n
     ! Local variables
-    integer(psb_ipk_) :: isz
+    integer(psb_ipk_) :: isz, i
 
     if (.not.allocated(x%v)) return
     if (.not.x%is_host()) call x%sync()
@@ -794,7 +800,15 @@ contains
       call psb_errpush(psb_err_alloc_dealloc_,'base_get_vect')
       return
     end if
-    res(1:isz) = x%v(1:isz)
+    if (.false.) then 
+      res(1:isz) = x%v(1:isz)
+    else
+      !$omp parallel do private(i)
+      do i=1, isz
+        res(i) = x%v(i)
+      end do
+    end if
+    
   end function z_base_get_vect
 
   !
@@ -812,7 +826,7 @@ contains
     complex(psb_dpk_), intent(in) :: val
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i
 
     first_=1
     last_=size(x%v)
@@ -820,7 +834,14 @@ contains
     if (present(last))  last_  = min(last,last_)
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    !$omp parallel do private(i)
+    do i = first_, last_        
+      x%v(i) = val
+    end do
+#else
     x%v(first_:last_) = val
+#endif
     call x%set_host()
 
   end subroutine z_base_set_scal
@@ -838,19 +859,27 @@ contains
     complex(psb_dpk_), intent(in) :: val(:)
     integer(psb_ipk_), optional :: first, last
 
-    integer(psb_ipk_) :: first_, last_
+    integer(psb_ipk_) :: first_, last_, i,  info
 
+    if (.not.allocated(x%v)) then
+      call psb_realloc(size(val),x%v,info)
+    end if
+    
     first_                     = 1
     if (present(first)) first_ = max(1,first)
     last_                      = min(psb_size(x%v),first_+size(val)-1)
     if (present(last))  last_  = min(last,last_)
 
-    if (allocated(x%v)) then
-      if (x%is_dev()) call x%sync()
+    if (x%is_dev()) call x%sync()
+
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i  = first_, last_
+        x%v(i) = val(i-first_+1)
+      end do
+#else
       x%v(first_:last_) = val(1:last_-first_+1)
-    else
-      x%v = val
-    end if
+#endif
     call x%set_host()
 
   end subroutine z_base_set_vect
@@ -888,9 +917,18 @@ contains
     implicit none
     class(psb_z_base_vect_type), intent(inout)  :: x
 
+    integer(psb_ipk_) :: i
+    
     if (allocated(x%v)) then
       if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1, size(x%v)
+        x%v(i) =  abs(x%v(i))
+      end do
+#else
       x%v =  abs(x%v)
+#endif
       call x%set_host()
     end if
 
@@ -1132,6 +1170,7 @@ contains
     info = 0
     if (y%is_dev()) call y%sync()
     n = min(size(y%v), size(x))
+    !$omp parallel do private(i)    
     do i=1, n
       y%v(i) = y%v(i)*x(i)
     end do
@@ -1169,6 +1208,7 @@ contains
       if (beta == zone) then
         return
       else
+        !$omp parallel do private(i)    
         do i=1, n
           z%v(i) = beta*z%v(i)
         end do
@@ -1176,42 +1216,51 @@ contains
     else
       if (alpha == zone) then
         if (beta == zzero) then
+          !$omp parallel do private(i)    
           do i=1, n
             z%v(i) = y(i)*x(i)
           end do
         else if (beta == zone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + y(i)*x(i)
           end do
         end if
       else if (alpha == -zone) then
         if (beta == zzero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = -y(i)*x(i)
           end do
         else if (beta == zone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) - y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) - y(i)*x(i)
           end do
         end if
       else
         if (beta == zzero) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = alpha*y(i)*x(i)
           end do
         else if (beta == zone) then
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = z%v(i) + alpha*y(i)*x(i)
           end do
         else
+          !$omp parallel do private(i)
           do i=1, n
             z%v(i) = beta*z%v(i) + alpha*y(i)*x(i)
           end do
@@ -1314,7 +1363,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info)
 
-
   end subroutine z_base_div_v
   !
   !> Function  base_div_v2
@@ -1358,7 +1406,6 @@ contains
     if (x%is_dev()) call x%sync()
     call x%div(x%v,y%v,info,flag)
 
-
   end subroutine z_base_div_v_check
   !
   !> Function  base_div_v2_check
@@ -1381,7 +1428,6 @@ contains
     if (z%is_dev()) call z%sync()
     call z%div(x%v,y%v,info,flag)
 
-
   end subroutine z_base_div_v2_check
   !
   !> Function  base_div_a2
@@ -1403,6 +1449,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = min(size(y), size(x))
+    !$omp parallel do private(i)
     do i=1, n
       z%v(i) = x(i)/y(i)
     end do
@@ -1433,6 +1480,7 @@ contains
       if (z%is_dev()) call z%sync()
 
       n = min(size(y), size(x))
+      !  $omp parallel do private(i)
       do i=1, n
         if (y(i) /= 0) then
           z%v(i) = x(i)/y(i)
@@ -1443,7 +1491,6 @@ contains
       end do
     end if
 
-
   end subroutine z_base_div_a2_check
   !
   !> Function  base_inv_v
@@ -1487,7 +1534,6 @@ contains
     if (y%is_dev()) call y%sync()
     call y%inv(x%v,info,flag)
 
-
   end subroutine z_base_inv_v_check
   !
   !> Function  base_inv_a2
@@ -1509,6 +1555,7 @@ contains
     if (y%is_dev()) call y%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i=1, n
       y%v(i) = 1_psb_dpk_/x(i)
     end do
@@ -1539,6 +1586,7 @@ contains
       if (y%is_dev()) call y%sync()
 
       n = size(x)
+      !$omp parallel do private(i)
       do i=1, n
         if (x(i) /= 0) then
           y%v(i) = 1_psb_dpk_/x(i)
@@ -1573,6 +1621,7 @@ contains
     if (z%is_dev()) call z%sync()
 
     n = size(x)
+    !$omp parallel do private(i)
     do i = 1, n, 1
       if ( abs(x(i)).ge.c ) then
         z%v(i) = 1_psb_dpk_
@@ -1618,14 +1667,21 @@ contains
     implicit none
     class(psb_z_base_vect_type), intent(inout)  :: x
     complex(psb_dpk_), intent (in)       :: alpha
+    integer(psb_ipk_) :: i
 
     if (allocated(x%v)) then
+#if defined(OPENMP)
+      !$omp parallel do private(i)
+      do i=1,size(x%v)
+        x%v(i) = alpha*x%v(i)
+      end do
+#else
       x%v = alpha*x%v
-      call x%set_host()
+#endif
     end if
-
+    call x%set_host()
   end subroutine z_base_scal
-
+  
   !
   ! Norms 1, 2 and infinity
   !
@@ -1655,10 +1711,18 @@ contains
     class(psb_z_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_dpk_)                :: res
+    integer(psb_ipk_) :: i
 
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res = dzero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, n
+      res = max(res,abs(x%v(i)))
+    end do
+#else
     res =  maxval(abs(x%v(1:n)))
-
+#endif
   end function z_base_amax
 
 
@@ -1672,10 +1736,18 @@ contains
     class(psb_z_base_vect_type), intent(inout) :: x
     integer(psb_ipk_), intent(in)           :: n
     real(psb_dpk_)                :: res
-
+    integer(psb_ipk_) :: i
+    
     if (x%is_dev()) call x%sync()
+#if defined(OPENMP)
+    res=dzero
+    !$omp parallel do private(i) reduction(+: res)
+    do i= 1, size(x%v)
+      res = res + abs(x%v(i))
+    end do
+#else
     res =  sum(abs(x%v(1:n)))
-
+#endif
   end function z_base_asum
 
 
@@ -1882,13 +1954,17 @@ contains
     integer(psb_ipk_) :: i, n
 
     if (z%is_dev()) call z%sync()
-
+#if defined(OPENMP)
     n = size(x)
-    do i = 1, n, 1
-        z%v(i) = x(i) + b
+    !$omp parallel do private(i)
+    do i = 1, n
+      z%v(i) = x(i) + b
     end do
+#else
+    z%v = x + b
+#endif
     info = 0
-
+    
   end subroutine z_base_addconst_a2
   !
   !> Function  _base_addconst_v2
@@ -1914,9 +1990,6 @@ contains
 end module psb_z_base_vect_mod
 
 
-
-
-
 module psb_z_base_multivect_mod
 
   use psb_const_mod
diff --git a/base/modules/serial/psb_z_mat_mod.F90 b/base/modules/serial/psb_z_mat_mod.F90
index 35586b3e..ed3338f9 100644
--- a/base/modules/serial/psb_z_mat_mod.F90
+++ b/base/modules/serial/psb_z_mat_mod.F90
@@ -128,6 +128,7 @@ module psb_z_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_z_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_z_free
     procedure, pass(a) :: trim        => psb_z_trim
     procedure, pass(a) :: csput_a     => psb_z_csput_a
@@ -326,6 +327,7 @@ module psb_z_mat_mod
 
     ! Memory/data management
     procedure, pass(a) :: csall       => psb_lz_csall
+    generic, public    :: allocate    => csall
     procedure, pass(a) :: free        => psb_lz_free
     procedure, pass(a) :: trim        => psb_lz_trim
     procedure, pass(a) :: csput_a     => psb_lz_csput_a
@@ -604,12 +606,14 @@ module psb_z_mat_mod
   end interface
 
   interface
-    subroutine psb_z_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_zspmat_type
+    subroutine psb_z_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_zspmat_type, psb_z_base_sparse_mat
       class(psb_zspmat_type), intent(inout) :: a
       integer(psb_ipk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_ipk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_z_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_z_csall
   end interface
 
@@ -1384,12 +1388,14 @@ module psb_z_mat_mod
   end interface
 
   interface
-    subroutine psb_lz_csall(nr,nc,a,info,nz)
-      import :: psb_ipk_, psb_lpk_, psb_lzspmat_type
+    subroutine psb_lz_csall(nr,nc,a,info,nz,type,mold)
+      import :: psb_ipk_, psb_lpk_, psb_lzspmat_type, psb_lz_base_sparse_mat
       class(psb_lzspmat_type), intent(inout) :: a
       integer(psb_lpk_), intent(in)             :: nr,nc
       integer(psb_ipk_), intent(out)            :: info
       integer(psb_lpk_), intent(in), optional   :: nz
+      character(len=*), intent(in), optional    :: type
+      class(psb_lz_base_sparse_mat), optional, intent(in) :: mold
     end subroutine psb_lz_csall
   end interface
 
diff --git a/base/modules/tools/psb_c_tools_mod.F90 b/base/modules/tools/psb_c_tools_mod.F90
index 81e78d3a..378e146b 100644
--- a/base/modules/tools/psb_c_tools_mod.F90
+++ b/base/modules/tools/psb_c_tools_mod.F90
@@ -431,5 +431,21 @@ Module psb_c_tools_mod
     end function
   end interface
 
+  interface psb_remap
+    subroutine psb_c_remap(np_remap, desc_in, a_in, &
+         & ipd, isrc, nrsrc, naggr, desc_out, a_out, info)
+      import
+      implicit none
+      !....parameters...
+      integer(psb_ipk_), intent(in)        :: np_remap
+      type(psb_desc_type), intent(inout)   :: desc_in
+      type(psb_cspmat_type), intent(inout) :: a_in
+      type(psb_cspmat_type), intent(out)   :: a_out
+      type(psb_desc_type), intent(out)     :: desc_out
+      integer(psb_ipk_), intent(out)       :: ipd 
+      integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+      integer(psb_ipk_), intent(out)       :: info
+    end subroutine psb_c_remap
+  end interface psb_remap
 
 end module psb_c_tools_mod
diff --git a/base/modules/tools/psb_d_tools_mod.F90 b/base/modules/tools/psb_d_tools_mod.F90
index 76a5bdf2..81c75ece 100644
--- a/base/modules/tools/psb_d_tools_mod.F90
+++ b/base/modules/tools/psb_d_tools_mod.F90
@@ -431,5 +431,21 @@ Module psb_d_tools_mod
     end function
   end interface
 
+  interface psb_remap
+    subroutine psb_d_remap(np_remap, desc_in, a_in, &
+         & ipd, isrc, nrsrc, naggr, desc_out, a_out, info)
+      import
+      implicit none
+      !....parameters...
+      integer(psb_ipk_), intent(in)        :: np_remap
+      type(psb_desc_type), intent(inout)   :: desc_in
+      type(psb_dspmat_type), intent(inout) :: a_in
+      type(psb_dspmat_type), intent(out)   :: a_out
+      type(psb_desc_type), intent(out)     :: desc_out
+      integer(psb_ipk_), intent(out)       :: ipd 
+      integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+      integer(psb_ipk_), intent(out)       :: info
+    end subroutine psb_d_remap
+  end interface psb_remap
 
 end module psb_d_tools_mod
diff --git a/base/modules/tools/psb_i_tools_mod.F90 b/base/modules/tools/psb_i_tools_mod.F90
index def96326..5cc6e836 100644
--- a/base/modules/tools/psb_i_tools_mod.F90
+++ b/base/modules/tools/psb_i_tools_mod.F90
@@ -170,5 +170,4 @@ Module psb_i_tools_mod
     end subroutine psb_iins_multivect
   end interface
 
-
 end module psb_i_tools_mod
diff --git a/base/modules/tools/psb_l_tools_mod.F90 b/base/modules/tools/psb_l_tools_mod.F90
index b389ef85..56617798 100644
--- a/base/modules/tools/psb_l_tools_mod.F90
+++ b/base/modules/tools/psb_l_tools_mod.F90
@@ -170,5 +170,4 @@ Module psb_l_tools_mod
     end subroutine psb_lins_multivect
   end interface
 
-
 end module psb_l_tools_mod
diff --git a/base/modules/tools/psb_s_tools_mod.F90 b/base/modules/tools/psb_s_tools_mod.F90
index 2b6058da..fa82a53e 100644
--- a/base/modules/tools/psb_s_tools_mod.F90
+++ b/base/modules/tools/psb_s_tools_mod.F90
@@ -431,5 +431,21 @@ Module psb_s_tools_mod
     end function
   end interface
 
+  interface psb_remap
+    subroutine psb_s_remap(np_remap, desc_in, a_in, &
+         & ipd, isrc, nrsrc, naggr, desc_out, a_out, info)
+      import
+      implicit none
+      !....parameters...
+      integer(psb_ipk_), intent(in)        :: np_remap
+      type(psb_desc_type), intent(inout)   :: desc_in
+      type(psb_sspmat_type), intent(inout) :: a_in
+      type(psb_sspmat_type), intent(out)   :: a_out
+      type(psb_desc_type), intent(out)     :: desc_out
+      integer(psb_ipk_), intent(out)       :: ipd 
+      integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+      integer(psb_ipk_), intent(out)       :: info
+    end subroutine psb_s_remap
+  end interface psb_remap
 
 end module psb_s_tools_mod
diff --git a/base/modules/tools/psb_z_tools_mod.F90 b/base/modules/tools/psb_z_tools_mod.F90
index 09997e94..233f2c20 100644
--- a/base/modules/tools/psb_z_tools_mod.F90
+++ b/base/modules/tools/psb_z_tools_mod.F90
@@ -431,5 +431,21 @@ Module psb_z_tools_mod
     end function
   end interface
 
+  interface psb_remap
+    subroutine psb_z_remap(np_remap, desc_in, a_in, &
+         & ipd, isrc, nrsrc, naggr, desc_out, a_out, info)
+      import
+      implicit none
+      !....parameters...
+      integer(psb_ipk_), intent(in)        :: np_remap
+      type(psb_desc_type), intent(inout)   :: desc_in
+      type(psb_zspmat_type), intent(inout) :: a_in
+      type(psb_zspmat_type), intent(out)   :: a_out
+      type(psb_desc_type), intent(out)     :: desc_out
+      integer(psb_ipk_), intent(out)       :: ipd 
+      integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+      integer(psb_ipk_), intent(out)       :: info
+    end subroutine psb_z_remap
+  end interface psb_remap
 
 end module psb_z_tools_mod
diff --git a/base/psblas/psb_cgetmatinfo.f90 b/base/psblas/psb_cgetmatinfo.F90
similarity index 96%
rename from base/psblas/psb_cgetmatinfo.f90
rename to base/psblas/psb_cgetmatinfo.F90
index f9c77166..fdfb0cba 100644
--- a/base/psblas/psb_cgetmatinfo.f90
+++ b/base/psblas/psb_cgetmatinfo.F90
@@ -37,9 +37,13 @@
 function  psb_cget_nnz(a,desc_a,info) result(res)
   use psb_base_mod, psb_protect_name => psb_cget_nnz
   use psi_mod
-  use mpi
-
-  implicit none
+#ifdef MPI_MOD
+      use mpi
+#endif
+      implicit none
+#ifdef MPI_H
+      include 'mpif.h'
+#endif
 
   integer(psb_lpk_)                     :: res
   type(psb_cspmat_type), intent(in)   :: a
diff --git a/base/psblas/psb_dgetmatinfo.f90 b/base/psblas/psb_dgetmatinfo.F90
similarity index 96%
rename from base/psblas/psb_dgetmatinfo.f90
rename to base/psblas/psb_dgetmatinfo.F90
index 51ef5ca8..16a1d3ca 100644
--- a/base/psblas/psb_dgetmatinfo.f90
+++ b/base/psblas/psb_dgetmatinfo.F90
@@ -37,9 +37,13 @@
 function  psb_dget_nnz(a,desc_a,info) result(res)
   use psb_base_mod, psb_protect_name => psb_dget_nnz
   use psi_mod
-  use mpi
-
-  implicit none
+#ifdef MPI_MOD
+      use mpi
+#endif
+      implicit none
+#ifdef MPI_H
+      include 'mpif.h'
+#endif
 
   integer(psb_lpk_)                     :: res
   type(psb_dspmat_type), intent(in)   :: a
diff --git a/base/psblas/psb_sgetmatinfo.f90 b/base/psblas/psb_sgetmatinfo.F90
similarity index 96%
rename from base/psblas/psb_sgetmatinfo.f90
rename to base/psblas/psb_sgetmatinfo.F90
index 2da00f27..abf1210c 100644
--- a/base/psblas/psb_sgetmatinfo.f90
+++ b/base/psblas/psb_sgetmatinfo.F90
@@ -37,9 +37,13 @@
 function  psb_sget_nnz(a,desc_a,info) result(res)
   use psb_base_mod, psb_protect_name => psb_sget_nnz
   use psi_mod
-  use mpi
-
-  implicit none
+#ifdef MPI_MOD
+      use mpi
+#endif
+      implicit none
+#ifdef MPI_H
+      include 'mpif.h'
+#endif
 
   integer(psb_lpk_)                     :: res
   type(psb_sspmat_type), intent(in)   :: a
diff --git a/base/psblas/psb_zgetmatinfo.f90 b/base/psblas/psb_zgetmatinfo.F90
similarity index 96%
rename from base/psblas/psb_zgetmatinfo.f90
rename to base/psblas/psb_zgetmatinfo.F90
index 08482963..fab395f2 100644
--- a/base/psblas/psb_zgetmatinfo.f90
+++ b/base/psblas/psb_zgetmatinfo.F90
@@ -37,9 +37,13 @@
 function  psb_zget_nnz(a,desc_a,info) result(res)
   use psb_base_mod, psb_protect_name => psb_zget_nnz
   use psi_mod
-  use mpi
-
-  implicit none
+#ifdef MPI_MOD
+      use mpi
+#endif
+      implicit none
+#ifdef MPI_H
+      include 'mpif.h'
+#endif
 
   integer(psb_lpk_)                     :: res
   type(psb_zspmat_type), intent(in)   :: a
diff --git a/base/serial/Makefile b/base/serial/Makefile
index 5bff0b64..0f17a0a4 100644
--- a/base/serial/Makefile
+++ b/base/serial/Makefile
@@ -11,10 +11,10 @@ FOBJS = psb_lsame.o psi_m_serial_impl.o psi_e_serial_impl.o \
 	smmp.o lsmmp.o \
 	psb_sgeprt.o psb_dgeprt.o psb_cgeprt.o psb_zgeprt.o\
 	psb_spdot_srtd.o psb_aspxpby.o psb_spge_dot.o\
-	psb_sgelp.o psb_dgelp.o psb_cgelp.o psb_zgelp.o \
 	psb_samax_s.o psb_damax_s.o psb_camax_s.o psb_zamax_s.o \
 	psb_sasum_s.o psb_dasum_s.o psb_casum_s.o psb_zasum_s.o 
 
+
 LIBDIR=..
 INCDIR=..
 MODDIR=../modules
diff --git a/base/serial/impl/psb_c_base_mat_impl.F90 b/base/serial/impl/psb_c_base_mat_impl.F90
index 6d7824be..17f2cdc8 100644
--- a/base/serial/impl/psb_c_base_mat_impl.F90
+++ b/base/serial/impl/psb_c_base_mat_impl.F90
@@ -250,7 +250,6 @@ subroutine psb_c_base_mv_from_coo(a,b,info)
 
 end subroutine psb_c_base_mv_from_coo
 
-
 subroutine psb_c_base_mv_to_fmt(a,b,info)
   use psb_c_base_mat_mod, psb_protect_name => psb_c_base_mv_to_fmt
   use psb_error_mod
@@ -698,6 +697,8 @@ subroutine psb_c_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -849,6 +850,8 @@ subroutine psb_c_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -914,8 +917,6 @@ subroutine psb_c_base_triu(a,u,info,&
 
 end subroutine psb_c_base_triu
 
-
-
 subroutine psb_c_base_clone(a,b,info)
   use psb_c_base_mat_mod, psb_protect_name => psb_c_base_clone
   use psb_error_mod
@@ -960,6 +961,7 @@ subroutine psb_c_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = cone
       tmp%ia(nz+i)  = i
@@ -1506,6 +1508,7 @@ contains
     complex(psb_spk_), intent(out) :: y(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       y(i) = d(i)*x(i)
     end do
@@ -1519,6 +1522,7 @@ contains
     complex(psb_spk_), intent(inout) :: x(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       x(i) = d(i)*x(i)
     end do
@@ -3182,6 +3186,8 @@ subroutine psb_lc_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3334,6 +3340,8 @@ subroutine psb_lc_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3446,6 +3454,7 @@ subroutine psb_lc_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = cone
       tmp%ia(nz+i)  = i
diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90
index 88bdb66f..4eb2f26e 100644
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@@ -114,6 +114,7 @@ subroutine psb_c_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ia(i)
       a%val(i) = a%val(i) * d(j)
@@ -126,6 +127,7 @@ subroutine psb_c_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ja(i)
       a%val(i) = a%val(i) * d(j)
@@ -201,6 +203,7 @@ subroutine psb_c_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -253,12 +256,30 @@ subroutine psb_c_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) = beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -346,12 +367,30 @@ function psb_c_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-sone)*beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -728,9 +767,6 @@ subroutine psb_c_coo_print(iout,a,iv,head,ivr,ivc)
 
 end subroutine psb_c_coo_print
 
-
-
-
 function  psb_c_coo_get_nz_row(idx,a) result(res)
   use psb_const_mod
   use psb_sort_mod
@@ -1670,7 +1706,6 @@ subroutine psb_c_coo_csmv(alpha,a,x,beta,y,info,trans)
 
 end subroutine psb_c_coo_csmv
 
-
 subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans)
   use psb_const_mod
   use psb_error_mod
@@ -1709,11 +1744,9 @@ subroutine psb_c_coo_csmm(alpha,a,x,beta,y,info,trans)
     trans_ = 'N'
   end if
 
-
   tra  = (psb_toupper(trans_) == 'T')
   ctra = (psb_toupper(trans_) == 'C')
 
-
   if (tra.or.ctra) then
     m = a%get_ncols()
     n = a%get_nrows()
@@ -1895,7 +1928,15 @@ function psb_c_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+    res = szero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_c_coo_maxval
@@ -2275,11 +2316,13 @@ subroutine psb_c_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2553,11 +2596,13 @@ subroutine psb_c_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2768,7 +2813,6 @@ contains
 
 end subroutine psb_c_coo_csgetrow
 
-
 subroutine psb_c_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
   use psb_error_mod
   use psb_realloc_mod
@@ -3021,7 +3065,6 @@ contains
 
 end subroutine psb_c_coo_csput_a
 
-
 subroutine psb_c_cp_coo_to_coo(a,b,info)
   use psb_error_mod
   use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_coo
@@ -3045,10 +3088,21 @@ subroutine psb_c_cp_coo_to_coo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -3087,10 +3141,21 @@ subroutine psb_c_cp_coo_from_coo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+       a%ia(i)  = b%ia(i) 
+       a%ja(i)  = b%ja(i) 
+       a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -3445,8 +3510,6 @@ subroutine psb_c_fix_coo(a,info,idir)
 
 end subroutine psb_c_fix_coo
 
-
-
 subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
   use psb_const_mod
   use psb_error_mod
@@ -4174,7 +4237,6 @@ subroutine psb_c_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
 
 end subroutine psb_c_fix_coo_inner
 
-
 subroutine psb_c_cp_coo_to_lcoo(a,b,info)
   use psb_error_mod
   use psb_c_base_mat_mod, psb_protect_name => psb_c_cp_coo_to_lcoo
@@ -4199,10 +4261,21 @@ subroutine psb_c_cp_coo_to_lcoo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -4240,10 +4313,21 @@ subroutine psb_c_cp_coo_from_lcoo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      a%ia(i)  = b%ia(i) 
+      a%ja(i)  = b%ja(i) 
+      a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -4442,7 +4526,17 @@ function psb_lc_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+  end block
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_lc_coo_maxval
@@ -4499,7 +4593,17 @@ function psb_lc_coo_csnmi(a) result(res)
       i = a%ia(j)
       vt(i) = vt(i) + abs(a%val(j))
     end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, m
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
     res = maxval(vt(1:m))
+#endif
     deallocate(vt,stat=info)
   end if
 
@@ -4539,7 +4643,17 @@ function psb_lc_coo_csnm1(a) result(res)
     i = a%ja(j)
     vt(i) = vt(i) + abs(a%val(j))
   end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, n
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
   res = maxval(vt(1:n))
+#endif
   deallocate(vt,stat=info)
 
   return
@@ -4584,7 +4698,6 @@ subroutine psb_lc_coo_rowsum(d,a)
     d(i) = d(i) + a%val(j)
   end do
 
-
   return
   call psb_erractionrestore(err_act)
   return
@@ -4592,7 +4705,6 @@ subroutine psb_lc_coo_rowsum(d,a)
 9999 call psb_error_handler(err_act)
 
   return
-
 end subroutine psb_lc_coo_rowsum
 
 subroutine psb_lc_coo_arwsum(d,a)
@@ -4761,6 +4873,7 @@ subroutine psb_lc_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -4813,12 +4926,30 @@ subroutine psb_lc_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -4906,12 +5037,30 @@ function psb_lc_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-1_psb_spk_)*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -5950,7 +6099,7 @@ subroutine psb_lc_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
 
   if (nz < 0) then
     info = psb_err_iarg_neg_
-    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
+3    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
     goto 9999
   end if
   if (size(ia) < nz) then
diff --git a/base/serial/impl/psb_c_csr_impl.f90 b/base/serial/impl/psb_c_csr_impl.f90
index 1f4242fd..25d0a086 100644
--- a/base/serial/impl/psb_c_csr_impl.f90
+++ b/base/serial/impl/psb_c_csr_impl.f90
@@ -130,10 +130,12 @@ contains
 
     if (alpha == czero) then
       if (beta == czero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i) = czero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i) = beta*y(i)
         end do
@@ -147,6 +149,7 @@ contains
       if (beta == czero) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j, acc) schedule(static)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -157,6 +160,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j, acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -167,6 +171,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -181,6 +186,7 @@ contains
       else if (beta == cone) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -191,6 +197,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -201,6 +208,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -214,6 +222,7 @@ contains
       else if (beta == -cone) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -224,6 +233,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -234,6 +244,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -247,6 +258,7 @@ contains
       else
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -257,6 +269,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -267,6 +280,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = czero
             do j=irp(i), irp(i+1)-1
@@ -282,16 +296,19 @@ contains
     else if (tra) then
 
       if (beta == czero) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = czero
         end do
       else if (beta == cone) then
         ! Do nothing
       else if (beta == -cone) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = -y(i)
         end do
       else
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = beta*y(i)
         end do
@@ -476,16 +493,18 @@ contains
     complex(psb_spk_), intent(inout)   :: y(ldy,*)
     logical, intent(in)             :: is_triangle,is_unit,tra,ctra
 
-    complex(psb_spk_), intent(inout)   :: acc(*)
+    complex(psb_spk_), intent(inout)   :: acc(:)
     integer(psb_ipk_) :: i,j, ir
 
 
     if (alpha == czero) then
       if (beta == czero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i,1:nc) = czero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i,1:nc) = beta*y(i,1:nc)
         end do
@@ -497,6 +516,7 @@ contains
       if (beta == czero) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -507,6 +527,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -517,6 +538,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -531,6 +553,7 @@ contains
       else if (beta == cone) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -541,6 +564,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -551,6 +575,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -564,6 +589,7 @@ contains
       else if (beta == -cone) then
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -574,6 +600,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -584,6 +611,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -597,6 +625,7 @@ contains
       else
 
         if (alpha == cone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -607,6 +636,7 @@ contains
 
         else if (alpha == -cone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
@@ -617,6 +647,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = czero
             do j=irp(i), irp(i+1)-1
diff --git a/base/serial/impl/psb_c_mat_impl.F90 b/base/serial/impl/psb_c_mat_impl.F90
index 69c67d02..cc112015 100644
--- a/base/serial/impl/psb_c_mat_impl.F90
+++ b/base/serial/impl/psb_c_mat_impl.F90
@@ -582,7 +582,7 @@ end subroutine psb_c_get_neigh
 
 
 
-subroutine psb_c_csall(nr,nc,a,info,nz)
+subroutine psb_c_csall(nr,nc,a,info,nz,type,mold)
   use psb_c_mat_mod, psb_protect_name => psb_c_csall
   use psb_c_base_mat_mod
   use psb_error_mod
@@ -591,6 +591,8 @@ subroutine psb_c_csall(nr,nc,a,info,nz)
   integer(psb_ipk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_ipk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_c_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -601,7 +603,23 @@ subroutine psb_c_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_c_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_c_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_c_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_c_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_c_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_c_coo_sparse_mat :: a%a, stat=info)
+  end if
+
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
@@ -3381,7 +3399,7 @@ end subroutine psb_lc_get_neigh
 
 
 
-subroutine psb_lc_csall(nr,nc,a,info,nz)
+subroutine psb_lc_csall(nr,nc,a,info,nz,type,mold)
   use psb_c_mat_mod, psb_protect_name => psb_lc_csall
   use psb_c_base_mat_mod
   use psb_error_mod
@@ -3390,6 +3408,8 @@ subroutine psb_lc_csall(nr,nc,a,info,nz)
   integer(psb_lpk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_lpk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_lc_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -3400,7 +3420,22 @@ subroutine psb_lc_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_lc_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_lc_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_lc_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_lc_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_lc_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_lc_coo_sparse_mat :: a%a, stat=info)
+  end if
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
diff --git a/base/serial/impl/psb_d_base_mat_impl.F90 b/base/serial/impl/psb_d_base_mat_impl.F90
index 30cb4d1e..69112529 100644
--- a/base/serial/impl/psb_d_base_mat_impl.F90
+++ b/base/serial/impl/psb_d_base_mat_impl.F90
@@ -250,7 +250,6 @@ subroutine psb_d_base_mv_from_coo(a,b,info)
 
 end subroutine psb_d_base_mv_from_coo
 
-
 subroutine psb_d_base_mv_to_fmt(a,b,info)
   use psb_d_base_mat_mod, psb_protect_name => psb_d_base_mv_to_fmt
   use psb_error_mod
@@ -698,6 +697,8 @@ subroutine psb_d_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -849,6 +850,8 @@ subroutine psb_d_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -914,8 +917,6 @@ subroutine psb_d_base_triu(a,u,info,&
 
 end subroutine psb_d_base_triu
 
-
-
 subroutine psb_d_base_clone(a,b,info)
   use psb_d_base_mat_mod, psb_protect_name => psb_d_base_clone
   use psb_error_mod
@@ -960,6 +961,7 @@ subroutine psb_d_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = done
       tmp%ia(nz+i)  = i
@@ -1506,6 +1508,7 @@ contains
     real(psb_dpk_), intent(out) :: y(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       y(i) = d(i)*x(i)
     end do
@@ -1519,6 +1522,7 @@ contains
     real(psb_dpk_), intent(inout) :: x(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       x(i) = d(i)*x(i)
     end do
@@ -3182,6 +3186,8 @@ subroutine psb_ld_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3334,6 +3340,8 @@ subroutine psb_ld_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3446,6 +3454,7 @@ subroutine psb_ld_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = done
       tmp%ia(nz+i)  = i
diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90
index cd5ea5a8..6b3aafc8 100644
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@@ -114,6 +114,7 @@ subroutine psb_d_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ia(i)
       a%val(i) = a%val(i) * d(j)
@@ -126,6 +127,7 @@ subroutine psb_d_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ja(i)
       a%val(i) = a%val(i) * d(j)
@@ -201,6 +203,7 @@ subroutine psb_d_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -253,12 +256,30 @@ subroutine psb_d_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) = beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -346,12 +367,30 @@ function psb_d_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-done)*beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -728,9 +767,6 @@ subroutine psb_d_coo_print(iout,a,iv,head,ivr,ivc)
 
 end subroutine psb_d_coo_print
 
-
-
-
 function  psb_d_coo_get_nz_row(idx,a) result(res)
   use psb_const_mod
   use psb_sort_mod
@@ -1670,7 +1706,6 @@ subroutine psb_d_coo_csmv(alpha,a,x,beta,y,info,trans)
 
 end subroutine psb_d_coo_csmv
 
-
 subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans)
   use psb_const_mod
   use psb_error_mod
@@ -1709,11 +1744,9 @@ subroutine psb_d_coo_csmm(alpha,a,x,beta,y,info,trans)
     trans_ = 'N'
   end if
 
-
   tra  = (psb_toupper(trans_) == 'T')
   ctra = (psb_toupper(trans_) == 'C')
 
-
   if (tra.or.ctra) then
     m = a%get_ncols()
     n = a%get_nrows()
@@ -1895,7 +1928,15 @@ function psb_d_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+    res = dzero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_d_coo_maxval
@@ -2275,11 +2316,13 @@ subroutine psb_d_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2553,11 +2596,13 @@ subroutine psb_d_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2768,7 +2813,6 @@ contains
 
 end subroutine psb_d_coo_csgetrow
 
-
 subroutine psb_d_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
   use psb_error_mod
   use psb_realloc_mod
@@ -3021,7 +3065,6 @@ contains
 
 end subroutine psb_d_coo_csput_a
 
-
 subroutine psb_d_cp_coo_to_coo(a,b,info)
   use psb_error_mod
   use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_coo
@@ -3045,10 +3088,21 @@ subroutine psb_d_cp_coo_to_coo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -3087,10 +3141,21 @@ subroutine psb_d_cp_coo_from_coo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+       a%ia(i)  = b%ia(i) 
+       a%ja(i)  = b%ja(i) 
+       a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -3445,8 +3510,6 @@ subroutine psb_d_fix_coo(a,info,idir)
 
 end subroutine psb_d_fix_coo
 
-
-
 subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
   use psb_const_mod
   use psb_error_mod
@@ -4174,7 +4237,6 @@ subroutine psb_d_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
 
 end subroutine psb_d_fix_coo_inner
 
-
 subroutine psb_d_cp_coo_to_lcoo(a,b,info)
   use psb_error_mod
   use psb_d_base_mat_mod, psb_protect_name => psb_d_cp_coo_to_lcoo
@@ -4199,10 +4261,21 @@ subroutine psb_d_cp_coo_to_lcoo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -4240,10 +4313,21 @@ subroutine psb_d_cp_coo_from_lcoo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      a%ia(i)  = b%ia(i) 
+      a%ja(i)  = b%ja(i) 
+      a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -4442,7 +4526,17 @@ function psb_ld_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+  end block
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_ld_coo_maxval
@@ -4499,7 +4593,17 @@ function psb_ld_coo_csnmi(a) result(res)
       i = a%ia(j)
       vt(i) = vt(i) + abs(a%val(j))
     end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, m
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
     res = maxval(vt(1:m))
+#endif
     deallocate(vt,stat=info)
   end if
 
@@ -4539,7 +4643,17 @@ function psb_ld_coo_csnm1(a) result(res)
     i = a%ja(j)
     vt(i) = vt(i) + abs(a%val(j))
   end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, n
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
   res = maxval(vt(1:n))
+#endif
   deallocate(vt,stat=info)
 
   return
@@ -4584,7 +4698,6 @@ subroutine psb_ld_coo_rowsum(d,a)
     d(i) = d(i) + a%val(j)
   end do
 
-
   return
   call psb_erractionrestore(err_act)
   return
@@ -4592,7 +4705,6 @@ subroutine psb_ld_coo_rowsum(d,a)
 9999 call psb_error_handler(err_act)
 
   return
-
 end subroutine psb_ld_coo_rowsum
 
 subroutine psb_ld_coo_arwsum(d,a)
@@ -4761,6 +4873,7 @@ subroutine psb_ld_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -4813,12 +4926,30 @@ subroutine psb_ld_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -4906,12 +5037,30 @@ function psb_ld_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-1_psb_dpk_)*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -5950,7 +6099,7 @@ subroutine psb_ld_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
 
   if (nz < 0) then
     info = psb_err_iarg_neg_
-    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
+3    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
     goto 9999
   end if
   if (size(ia) < nz) then
diff --git a/base/serial/impl/psb_d_csr_impl.f90 b/base/serial/impl/psb_d_csr_impl.f90
index 0c572ac3..853476d6 100644
--- a/base/serial/impl/psb_d_csr_impl.f90
+++ b/base/serial/impl/psb_d_csr_impl.f90
@@ -130,10 +130,12 @@ contains
 
     if (alpha == dzero) then
       if (beta == dzero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i) = dzero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i) = beta*y(i)
         end do
@@ -147,6 +149,7 @@ contains
       if (beta == dzero) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j, acc) schedule(static)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -157,6 +160,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j, acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -167,6 +171,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -181,6 +186,7 @@ contains
       else if (beta == done) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -191,6 +197,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -201,6 +208,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -214,6 +222,7 @@ contains
       else if (beta == -done) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -224,6 +233,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -234,6 +244,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -247,6 +258,7 @@ contains
       else
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -257,6 +269,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -267,6 +280,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = dzero
             do j=irp(i), irp(i+1)-1
@@ -282,16 +296,19 @@ contains
     else if (tra) then
 
       if (beta == dzero) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = dzero
         end do
       else if (beta == done) then
         ! Do nothing
       else if (beta == -done) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = -y(i)
         end do
       else
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = beta*y(i)
         end do
@@ -476,16 +493,18 @@ contains
     real(psb_dpk_), intent(inout)   :: y(ldy,*)
     logical, intent(in)             :: is_triangle,is_unit,tra,ctra
 
-    real(psb_dpk_), intent(inout)   :: acc(*)
+    real(psb_dpk_), intent(inout)   :: acc(:)
     integer(psb_ipk_) :: i,j, ir
 
 
     if (alpha == dzero) then
       if (beta == dzero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i,1:nc) = dzero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i,1:nc) = beta*y(i,1:nc)
         end do
@@ -497,6 +516,7 @@ contains
       if (beta == dzero) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -507,6 +527,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -517,6 +538,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -531,6 +553,7 @@ contains
       else if (beta == done) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -541,6 +564,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -551,6 +575,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -564,6 +589,7 @@ contains
       else if (beta == -done) then
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -574,6 +600,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -584,6 +611,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -597,6 +625,7 @@ contains
       else
 
         if (alpha == done) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -607,6 +636,7 @@ contains
 
         else if (alpha == -done) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
@@ -617,6 +647,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = dzero
             do j=irp(i), irp(i+1)-1
diff --git a/base/serial/impl/psb_d_mat_impl.F90 b/base/serial/impl/psb_d_mat_impl.F90
index 86de5536..7f4ac0c1 100644
--- a/base/serial/impl/psb_d_mat_impl.F90
+++ b/base/serial/impl/psb_d_mat_impl.F90
@@ -582,7 +582,7 @@ end subroutine psb_d_get_neigh
 
 
 
-subroutine psb_d_csall(nr,nc,a,info,nz)
+subroutine psb_d_csall(nr,nc,a,info,nz,type,mold)
   use psb_d_mat_mod, psb_protect_name => psb_d_csall
   use psb_d_base_mat_mod
   use psb_error_mod
@@ -591,6 +591,8 @@ subroutine psb_d_csall(nr,nc,a,info,nz)
   integer(psb_ipk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_ipk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_d_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -601,7 +603,23 @@ subroutine psb_d_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_d_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_d_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_d_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_d_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_d_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_d_coo_sparse_mat :: a%a, stat=info)
+  end if
+
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
@@ -3381,7 +3399,7 @@ end subroutine psb_ld_get_neigh
 
 
 
-subroutine psb_ld_csall(nr,nc,a,info,nz)
+subroutine psb_ld_csall(nr,nc,a,info,nz,type,mold)
   use psb_d_mat_mod, psb_protect_name => psb_ld_csall
   use psb_d_base_mat_mod
   use psb_error_mod
@@ -3390,6 +3408,8 @@ subroutine psb_ld_csall(nr,nc,a,info,nz)
   integer(psb_lpk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_lpk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_ld_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -3400,7 +3420,22 @@ subroutine psb_ld_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_ld_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_ld_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_ld_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_ld_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_ld_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_ld_coo_sparse_mat :: a%a, stat=info)
+  end if
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
diff --git a/base/serial/impl/psb_s_base_mat_impl.F90 b/base/serial/impl/psb_s_base_mat_impl.F90
index 7a3f647d..4a99a684 100644
--- a/base/serial/impl/psb_s_base_mat_impl.F90
+++ b/base/serial/impl/psb_s_base_mat_impl.F90
@@ -250,7 +250,6 @@ subroutine psb_s_base_mv_from_coo(a,b,info)
 
 end subroutine psb_s_base_mv_from_coo
 
-
 subroutine psb_s_base_mv_to_fmt(a,b,info)
   use psb_s_base_mat_mod, psb_protect_name => psb_s_base_mv_to_fmt
   use psb_error_mod
@@ -698,6 +697,8 @@ subroutine psb_s_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -849,6 +850,8 @@ subroutine psb_s_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -914,8 +917,6 @@ subroutine psb_s_base_triu(a,u,info,&
 
 end subroutine psb_s_base_triu
 
-
-
 subroutine psb_s_base_clone(a,b,info)
   use psb_s_base_mat_mod, psb_protect_name => psb_s_base_clone
   use psb_error_mod
@@ -960,6 +961,7 @@ subroutine psb_s_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = sone
       tmp%ia(nz+i)  = i
@@ -1506,6 +1508,7 @@ contains
     real(psb_spk_), intent(out) :: y(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       y(i) = d(i)*x(i)
     end do
@@ -1519,6 +1522,7 @@ contains
     real(psb_spk_), intent(inout) :: x(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       x(i) = d(i)*x(i)
     end do
@@ -3182,6 +3186,8 @@ subroutine psb_ls_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3334,6 +3340,8 @@ subroutine psb_ls_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3446,6 +3454,7 @@ subroutine psb_ls_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = sone
       tmp%ia(nz+i)  = i
diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90
index 061fb904..d214b2d5 100644
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@@ -114,6 +114,7 @@ subroutine psb_s_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ia(i)
       a%val(i) = a%val(i) * d(j)
@@ -126,6 +127,7 @@ subroutine psb_s_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ja(i)
       a%val(i) = a%val(i) * d(j)
@@ -201,6 +203,7 @@ subroutine psb_s_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -253,12 +256,30 @@ subroutine psb_s_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) = beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -346,12 +367,30 @@ function psb_s_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-sone)*beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-sone)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -728,9 +767,6 @@ subroutine psb_s_coo_print(iout,a,iv,head,ivr,ivc)
 
 end subroutine psb_s_coo_print
 
-
-
-
 function  psb_s_coo_get_nz_row(idx,a) result(res)
   use psb_const_mod
   use psb_sort_mod
@@ -1670,7 +1706,6 @@ subroutine psb_s_coo_csmv(alpha,a,x,beta,y,info,trans)
 
 end subroutine psb_s_coo_csmv
 
-
 subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans)
   use psb_const_mod
   use psb_error_mod
@@ -1709,11 +1744,9 @@ subroutine psb_s_coo_csmm(alpha,a,x,beta,y,info,trans)
     trans_ = 'N'
   end if
 
-
   tra  = (psb_toupper(trans_) == 'T')
   ctra = (psb_toupper(trans_) == 'C')
 
-
   if (tra.or.ctra) then
     m = a%get_ncols()
     n = a%get_nrows()
@@ -1895,7 +1928,15 @@ function psb_s_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+    res = szero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_s_coo_maxval
@@ -2275,11 +2316,13 @@ subroutine psb_s_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2553,11 +2596,13 @@ subroutine psb_s_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2768,7 +2813,6 @@ contains
 
 end subroutine psb_s_coo_csgetrow
 
-
 subroutine psb_s_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
   use psb_error_mod
   use psb_realloc_mod
@@ -3021,7 +3065,6 @@ contains
 
 end subroutine psb_s_coo_csput_a
 
-
 subroutine psb_s_cp_coo_to_coo(a,b,info)
   use psb_error_mod
   use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_coo
@@ -3045,10 +3088,21 @@ subroutine psb_s_cp_coo_to_coo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -3087,10 +3141,21 @@ subroutine psb_s_cp_coo_from_coo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+       a%ia(i)  = b%ia(i) 
+       a%ja(i)  = b%ja(i) 
+       a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -3445,8 +3510,6 @@ subroutine psb_s_fix_coo(a,info,idir)
 
 end subroutine psb_s_fix_coo
 
-
-
 subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
   use psb_const_mod
   use psb_error_mod
@@ -4174,7 +4237,6 @@ subroutine psb_s_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
 
 end subroutine psb_s_fix_coo_inner
 
-
 subroutine psb_s_cp_coo_to_lcoo(a,b,info)
   use psb_error_mod
   use psb_s_base_mat_mod, psb_protect_name => psb_s_cp_coo_to_lcoo
@@ -4199,10 +4261,21 @@ subroutine psb_s_cp_coo_to_lcoo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -4240,10 +4313,21 @@ subroutine psb_s_cp_coo_from_lcoo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      a%ia(i)  = b%ia(i) 
+      a%ja(i)  = b%ja(i) 
+      a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -4442,7 +4526,17 @@ function psb_ls_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+  end block
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_ls_coo_maxval
@@ -4499,7 +4593,17 @@ function psb_ls_coo_csnmi(a) result(res)
       i = a%ia(j)
       vt(i) = vt(i) + abs(a%val(j))
     end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, m
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
     res = maxval(vt(1:m))
+#endif
     deallocate(vt,stat=info)
   end if
 
@@ -4539,7 +4643,17 @@ function psb_ls_coo_csnm1(a) result(res)
     i = a%ja(j)
     vt(i) = vt(i) + abs(a%val(j))
   end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, n
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
   res = maxval(vt(1:n))
+#endif
   deallocate(vt,stat=info)
 
   return
@@ -4584,7 +4698,6 @@ subroutine psb_ls_coo_rowsum(d,a)
     d(i) = d(i) + a%val(j)
   end do
 
-
   return
   call psb_erractionrestore(err_act)
   return
@@ -4592,7 +4705,6 @@ subroutine psb_ls_coo_rowsum(d,a)
 9999 call psb_error_handler(err_act)
 
   return
-
 end subroutine psb_ls_coo_rowsum
 
 subroutine psb_ls_coo_arwsum(d,a)
@@ -4761,6 +4873,7 @@ subroutine psb_ls_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -4813,12 +4926,30 @@ subroutine psb_ls_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -4906,12 +5037,30 @@ function psb_ls_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-1_psb_spk_)*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-1_psb_spk_)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -5950,7 +6099,7 @@ subroutine psb_ls_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
 
   if (nz < 0) then
     info = psb_err_iarg_neg_
-    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
+3    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
     goto 9999
   end if
   if (size(ia) < nz) then
diff --git a/base/serial/impl/psb_s_csr_impl.f90 b/base/serial/impl/psb_s_csr_impl.f90
index a0c56d35..76140d07 100644
--- a/base/serial/impl/psb_s_csr_impl.f90
+++ b/base/serial/impl/psb_s_csr_impl.f90
@@ -130,10 +130,12 @@ contains
 
     if (alpha == szero) then
       if (beta == szero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i) = szero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i) = beta*y(i)
         end do
@@ -147,6 +149,7 @@ contains
       if (beta == szero) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j, acc) schedule(static)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -157,6 +160,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j, acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -167,6 +171,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -181,6 +186,7 @@ contains
       else if (beta == sone) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -191,6 +197,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -201,6 +208,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -214,6 +222,7 @@ contains
       else if (beta == -sone) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -224,6 +233,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -234,6 +244,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -247,6 +258,7 @@ contains
       else
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -257,6 +269,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -267,6 +280,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = szero
             do j=irp(i), irp(i+1)-1
@@ -282,16 +296,19 @@ contains
     else if (tra) then
 
       if (beta == szero) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = szero
         end do
       else if (beta == sone) then
         ! Do nothing
       else if (beta == -sone) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = -y(i)
         end do
       else
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = beta*y(i)
         end do
@@ -476,16 +493,18 @@ contains
     real(psb_spk_), intent(inout)   :: y(ldy,*)
     logical, intent(in)             :: is_triangle,is_unit,tra,ctra
 
-    real(psb_spk_), intent(inout)   :: acc(*)
+    real(psb_spk_), intent(inout)   :: acc(:)
     integer(psb_ipk_) :: i,j, ir
 
 
     if (alpha == szero) then
       if (beta == szero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i,1:nc) = szero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i,1:nc) = beta*y(i,1:nc)
         end do
@@ -497,6 +516,7 @@ contains
       if (beta == szero) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -507,6 +527,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -517,6 +538,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -531,6 +553,7 @@ contains
       else if (beta == sone) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -541,6 +564,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -551,6 +575,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -564,6 +589,7 @@ contains
       else if (beta == -sone) then
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -574,6 +600,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -584,6 +611,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -597,6 +625,7 @@ contains
       else
 
         if (alpha == sone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -607,6 +636,7 @@ contains
 
         else if (alpha == -sone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
@@ -617,6 +647,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = szero
             do j=irp(i), irp(i+1)-1
diff --git a/base/serial/impl/psb_s_mat_impl.F90 b/base/serial/impl/psb_s_mat_impl.F90
index 867f9fa4..806a08e3 100644
--- a/base/serial/impl/psb_s_mat_impl.F90
+++ b/base/serial/impl/psb_s_mat_impl.F90
@@ -582,7 +582,7 @@ end subroutine psb_s_get_neigh
 
 
 
-subroutine psb_s_csall(nr,nc,a,info,nz)
+subroutine psb_s_csall(nr,nc,a,info,nz,type,mold)
   use psb_s_mat_mod, psb_protect_name => psb_s_csall
   use psb_s_base_mat_mod
   use psb_error_mod
@@ -591,6 +591,8 @@ subroutine psb_s_csall(nr,nc,a,info,nz)
   integer(psb_ipk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_ipk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_s_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -601,7 +603,23 @@ subroutine psb_s_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_s_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_s_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_s_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_s_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_s_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_s_coo_sparse_mat :: a%a, stat=info)
+  end if
+
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
@@ -3381,7 +3399,7 @@ end subroutine psb_ls_get_neigh
 
 
 
-subroutine psb_ls_csall(nr,nc,a,info,nz)
+subroutine psb_ls_csall(nr,nc,a,info,nz,type,mold)
   use psb_s_mat_mod, psb_protect_name => psb_ls_csall
   use psb_s_base_mat_mod
   use psb_error_mod
@@ -3390,6 +3408,8 @@ subroutine psb_ls_csall(nr,nc,a,info,nz)
   integer(psb_lpk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_lpk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_ls_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -3400,7 +3420,22 @@ subroutine psb_ls_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_ls_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_ls_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_ls_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_ls_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_ls_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_ls_coo_sparse_mat :: a%a, stat=info)
+  end if
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
diff --git a/base/serial/impl/psb_z_base_mat_impl.F90 b/base/serial/impl/psb_z_base_mat_impl.F90
index fbbbd83d..404027c5 100644
--- a/base/serial/impl/psb_z_base_mat_impl.F90
+++ b/base/serial/impl/psb_z_base_mat_impl.F90
@@ -250,7 +250,6 @@ subroutine psb_z_base_mv_from_coo(a,b,info)
 
 end subroutine psb_z_base_mv_from_coo
 
-
 subroutine psb_z_base_mv_to_fmt(a,b,info)
   use psb_z_base_mat_mod, psb_protect_name => psb_z_base_mv_to_fmt
   use psb_error_mod
@@ -698,6 +697,8 @@ subroutine psb_z_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -849,6 +850,8 @@ subroutine psb_z_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -914,8 +917,6 @@ subroutine psb_z_base_triu(a,u,info,&
 
 end subroutine psb_z_base_triu
 
-
-
 subroutine psb_z_base_clone(a,b,info)
   use psb_z_base_mat_mod, psb_protect_name => psb_z_base_clone
   use psb_error_mod
@@ -960,6 +961,7 @@ subroutine psb_z_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = zone
       tmp%ia(nz+i)  = i
@@ -1506,6 +1508,7 @@ contains
     complex(psb_dpk_), intent(out) :: y(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       y(i) = d(i)*x(i)
     end do
@@ -1519,6 +1522,7 @@ contains
     complex(psb_dpk_), intent(inout) :: x(*)
     integer(psb_ipk_) :: i
 
+    !$omp parallel do private(i)
     do i=1,n
       x(i) = d(i)*x(i)
     end do
@@ -3182,6 +3186,8 @@ subroutine psb_lz_base_tril(a,l,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3334,6 +3340,8 @@ subroutine psb_lz_base_triu(a,u,info,&
     call psb_realloc(max(mb,nb),ia,info)
     call psb_realloc(max(mb,nb),ja,info)
     call psb_realloc(max(mb,nb),val,info)
+    ! Implementing this in OpenMP?
+    ! Tricky, to be seen 
     do i=imin_,imax_, nbk
       ibk = min(nbk,imax_-i+1)
       call a%csget(i,i+ibk-1,nzout,ia,ja,val,info,&
@@ -3446,6 +3454,7 @@ subroutine psb_lz_base_make_nonunit(a)
     mnm = min(m,n)
     nz = tmp%get_nzeros()
     call tmp%reallocate(nz+mnm)
+    !$omp parallel do private(i) shared(nz)
     do i=1, mnm
       tmp%val(nz+i) = zone
       tmp%ia(nz+i)  = i
diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90
index 2da38296..7850aeec 100644
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@@ -114,6 +114,7 @@ subroutine psb_z_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ia(i)
       a%val(i) = a%val(i) * d(j)
@@ -126,6 +127,7 @@ subroutine psb_z_coo_scal(d,a,info,side)
       goto 9999
     end if
 
+    !$omp parallel do private(i,j)
     do i=1,a%get_nzeros()
       j        = a%ja(i)
       a%val(i) = a%val(i) * d(j)
@@ -201,6 +203,7 @@ subroutine psb_z_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -253,12 +256,30 @@ subroutine psb_z_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) = beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -346,12 +367,30 @@ function psb_z_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined (OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-done)*beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-done)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -728,9 +767,6 @@ subroutine psb_z_coo_print(iout,a,iv,head,ivr,ivc)
 
 end subroutine psb_z_coo_print
 
-
-
-
 function  psb_z_coo_get_nz_row(idx,a) result(res)
   use psb_const_mod
   use psb_sort_mod
@@ -1670,7 +1706,6 @@ subroutine psb_z_coo_csmv(alpha,a,x,beta,y,info,trans)
 
 end subroutine psb_z_coo_csmv
 
-
 subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans)
   use psb_const_mod
   use psb_error_mod
@@ -1709,11 +1744,9 @@ subroutine psb_z_coo_csmm(alpha,a,x,beta,y,info,trans)
     trans_ = 'N'
   end if
 
-
   tra  = (psb_toupper(trans_) == 'T')
   ctra = (psb_toupper(trans_) == 'C')
 
-
   if (tra.or.ctra) then
     m = a%get_ncols()
     n = a%get_nrows()
@@ -1895,7 +1928,15 @@ function psb_z_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+    res = dzero
+    !$omp parallel do private(i) reduction(max: res)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_z_coo_maxval
@@ -2275,11 +2316,13 @@ subroutine psb_z_coo_csgetptn(imin,imax,a,nz,ia,ja,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2553,11 +2596,13 @@ subroutine psb_z_coo_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
        & iren)
 
   if (rscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ia(i) = ia(i) - imin + 1
     end do
   end if
   if (cscale_) then
+    !$omp parallel do private(i)
     do i=nzin_+1, nzin_+nz
       ja(i) = ja(i) - jmin_ + 1
     end do
@@ -2768,7 +2813,6 @@ contains
 
 end subroutine psb_z_coo_csgetrow
 
-
 subroutine psb_z_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
   use psb_error_mod
   use psb_realloc_mod
@@ -3021,7 +3065,6 @@ contains
 
 end subroutine psb_z_coo_csput_a
 
-
 subroutine psb_z_cp_coo_to_coo(a,b,info)
   use psb_error_mod
   use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_coo
@@ -3045,10 +3088,21 @@ subroutine psb_z_cp_coo_to_coo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -3087,10 +3141,21 @@ subroutine psb_z_cp_coo_from_coo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+       a%ia(i)  = b%ia(i) 
+       a%ja(i)  = b%ja(i) 
+       a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -3445,8 +3510,6 @@ subroutine psb_z_fix_coo(a,info,idir)
 
 end subroutine psb_z_fix_coo
 
-
-
 subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
   use psb_const_mod
   use psb_error_mod
@@ -4174,7 +4237,6 @@ subroutine psb_z_fix_coo_inner(nr,nc,nzin,dupl,ia,ja,val,nzout,info,idir)
 
 end subroutine psb_z_fix_coo_inner
 
-
 subroutine psb_z_cp_coo_to_lcoo(a,b,info)
   use psb_error_mod
   use psb_z_base_mat_mod, psb_protect_name => psb_z_cp_coo_to_lcoo
@@ -4199,10 +4261,21 @@ subroutine psb_z_cp_coo_to_lcoo(a,b,info)
   call b%set_nzeros(nz)
   call b%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      b%ia(i)  = a%ia(i)
+      b%ja(i)  = a%ja(i)
+      b%val(i) = a%val(i)
+    end do
+  end block
+#else
   b%ia(1:nz)  = a%ia(1:nz)
   b%ja(1:nz)  = a%ja(1:nz)
   b%val(1:nz) = a%val(1:nz)
-
+#endif
   call b%set_host()
 
   if (.not.b%is_by_rows()) call b%fix(info)
@@ -4240,10 +4313,21 @@ subroutine psb_z_cp_coo_from_lcoo(a,b,info)
   call a%set_nzeros(nz)
   call a%reallocate(nz)
 
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nz
+      a%ia(i)  = b%ia(i) 
+      a%ja(i)  = b%ja(i) 
+      a%val(i) = b%val(i)
+    end do
+  end block
+#else
   a%ia(1:nz)  = b%ia(1:nz)
   a%ja(1:nz)  = b%ja(1:nz)
   a%val(1:nz) = b%val(1:nz)
-
+#endif
   call a%set_host()
 
   if (.not.a%is_by_rows()) call a%fix(info)
@@ -4442,7 +4526,17 @@ function psb_lz_coo_maxval(a) result(res)
   nnz = a%get_nzeros()
   if (allocated(a%val)) then
     nnz = min(nnz,size(a%val))
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nnz
+      res = max(res,abs(a%val(i)))
+    end do
+  end block
+#else
     res = maxval(abs(a%val(1:nnz)))
+#endif
   end if
 
 end function psb_lz_coo_maxval
@@ -4499,7 +4593,17 @@ function psb_lz_coo_csnmi(a) result(res)
       i = a%ia(j)
       vt(i) = vt(i) + abs(a%val(j))
     end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, m
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
     res = maxval(vt(1:m))
+#endif
     deallocate(vt,stat=info)
   end if
 
@@ -4539,7 +4643,17 @@ function psb_lz_coo_csnm1(a) result(res)
     i = a%ja(j)
     vt(i) = vt(i) + abs(a%val(j))
   end do
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, n
+      res = max(res,abs(vt(i)))
+    end do
+  end block 
+#else
   res = maxval(vt(1:n))
+#endif
   deallocate(vt,stat=info)
 
   return
@@ -4584,7 +4698,6 @@ subroutine psb_lz_coo_rowsum(d,a)
     d(i) = d(i) + a%val(j)
   end do
 
-
   return
   call psb_erractionrestore(err_act)
   return
@@ -4592,7 +4705,6 @@ subroutine psb_lz_coo_rowsum(d,a)
 9999 call psb_error_handler(err_act)
 
   return
-
 end subroutine psb_lz_coo_rowsum
 
 subroutine psb_lz_coo_arwsum(d,a)
@@ -4761,6 +4873,7 @@ subroutine psb_lz_coo_scalplusidentity(d,a,info)
   end if
 
   mnm = min(a%get_nrows(),a%get_ncols())
+  !$omp parallel do private(i,j)
   do i=1,a%get_nzeros()
     a%val(i) = a%val(i) * d
     j=a%ia(i)
@@ -4813,12 +4926,30 @@ subroutine psb_lz_coo_spaxpby(alpha,a,beta,b,info)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = alpha*a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  beta*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = alpha*a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = beta*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   ! Move to correct output format
@@ -4906,12 +5037,30 @@ function psb_lz_coo_cmpmat(a,b,tol,info) result(res)
   ! Allocate (temporary) space for the solution
   call tcoo%allocate(M,N,(nza+nzb))
   ! Compute the sum
+#if defined(OPENMP)
+  block
+    integer(psb_ipk_) :: i
+    !$omp parallel do private(i)
+    do i=1, nza
+      tcoo%ia(i) = a%ia(i)
+      tcoo%ja(i) = a%ja(i)
+      tcoo%val(i) = a%val(i)
+    end do
+    !$omp parallel do private(i)
+    do i=1, nzb
+      tcoo%ia(nza+i) = bcoo%ia(i)
+      tcoo%ja(nza+i) = bcoo%ja(i)
+      tcoo%val(nza+i) =  (-1_psb_dpk_)*bcoo%val(i)
+    end do
+  end block
+#else
   tcoo%ia(1:nza) = a%ia(1:nza)
   tcoo%ja(1:nza) = a%ja(1:nza)
   tcoo%val(1:nza) = a%val(1:nza)
   tcoo%ia(nza+1:nza+nzb) = bcoo%ia(1:nzb)
   tcoo%ja(nza+1:nza+nzb) = bcoo%ja(1:nzb)
   tcoo%val(nza+1:nza+nzb) = (-1_psb_dpk_)*bcoo%val(1:nzb)
+#endif
   ! Fix the indexes
   call tcoo%fix(info)
   if (info /= psb_success_) then
@@ -5950,7 +6099,7 @@ subroutine psb_lz_coo_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
 
   if (nz < 0) then
     info = psb_err_iarg_neg_
-    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
+3    call psb_errpush(info,name,i_err=(/1_psb_ipk_/))
     goto 9999
   end if
   if (size(ia) < nz) then
diff --git a/base/serial/impl/psb_z_csr_impl.f90 b/base/serial/impl/psb_z_csr_impl.f90
index 498f2b28..d16e39eb 100644
--- a/base/serial/impl/psb_z_csr_impl.f90
+++ b/base/serial/impl/psb_z_csr_impl.f90
@@ -130,10 +130,12 @@ contains
 
     if (alpha == zzero) then
       if (beta == zzero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i) = zzero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i) = beta*y(i)
         end do
@@ -147,6 +149,7 @@ contains
       if (beta == zzero) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j, acc) schedule(static)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -157,6 +160,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j, acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -167,6 +171,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -181,6 +186,7 @@ contains
       else if (beta == zone) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -191,6 +197,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -201,6 +208,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -214,6 +222,7 @@ contains
       else if (beta == -zone) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -224,6 +233,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -234,6 +244,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -247,6 +258,7 @@ contains
       else
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -257,6 +269,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -267,6 +280,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc  = zzero
             do j=irp(i), irp(i+1)-1
@@ -282,16 +296,19 @@ contains
     else if (tra) then
 
       if (beta == zzero) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = zzero
         end do
       else if (beta == zone) then
         ! Do nothing
       else if (beta == -zone) then
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = -y(i)
         end do
       else
+        !$omp parallel do private(i)
         do i=1, m
           y(i) = beta*y(i)
         end do
@@ -476,16 +493,18 @@ contains
     complex(psb_dpk_), intent(inout)   :: y(ldy,*)
     logical, intent(in)             :: is_triangle,is_unit,tra,ctra
 
-    complex(psb_dpk_), intent(inout)   :: acc(*)
+    complex(psb_dpk_), intent(inout)   :: acc(:)
     integer(psb_ipk_) :: i,j, ir
 
 
     if (alpha == zzero) then
       if (beta == zzero) then
+        !$omp parallel do private(i)
         do i = 1, m
           y(i,1:nc) = zzero
         enddo
       else
+        !$omp parallel do private(i)
         do  i = 1, m
           y(i,1:nc) = beta*y(i,1:nc)
         end do
@@ -497,6 +516,7 @@ contains
       if (beta == zzero) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -507,6 +527,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -517,6 +538,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -531,6 +553,7 @@ contains
       else if (beta == zone) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -541,6 +564,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -551,6 +575,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -564,6 +589,7 @@ contains
       else if (beta == -zone) then
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -574,6 +600,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -584,6 +611,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -597,6 +625,7 @@ contains
       else
 
         if (alpha == zone) then
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -607,6 +636,7 @@ contains
 
         else if (alpha == -zone) then
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
@@ -617,6 +647,7 @@ contains
 
         else
 
+          !$omp parallel do private(i,j,acc)
           do i=1,m
             acc(1:nc)  = zzero
             do j=irp(i), irp(i+1)-1
diff --git a/base/serial/impl/psb_z_mat_impl.F90 b/base/serial/impl/psb_z_mat_impl.F90
index 07616c05..422a664d 100644
--- a/base/serial/impl/psb_z_mat_impl.F90
+++ b/base/serial/impl/psb_z_mat_impl.F90
@@ -582,7 +582,7 @@ end subroutine psb_z_get_neigh
 
 
 
-subroutine psb_z_csall(nr,nc,a,info,nz)
+subroutine psb_z_csall(nr,nc,a,info,nz,type,mold)
   use psb_z_mat_mod, psb_protect_name => psb_z_csall
   use psb_z_base_mat_mod
   use psb_error_mod
@@ -591,6 +591,8 @@ subroutine psb_z_csall(nr,nc,a,info,nz)
   integer(psb_ipk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_ipk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_z_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -601,7 +603,23 @@ subroutine psb_z_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_z_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_z_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_z_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_z_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_z_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_z_coo_sparse_mat :: a%a, stat=info)
+  end if
+
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
@@ -3381,7 +3399,7 @@ end subroutine psb_lz_get_neigh
 
 
 
-subroutine psb_lz_csall(nr,nc,a,info,nz)
+subroutine psb_lz_csall(nr,nc,a,info,nz,type,mold)
   use psb_z_mat_mod, psb_protect_name => psb_lz_csall
   use psb_z_base_mat_mod
   use psb_error_mod
@@ -3390,6 +3408,8 @@ subroutine psb_lz_csall(nr,nc,a,info,nz)
   integer(psb_lpk_), intent(in)             :: nr,nc
   integer(psb_ipk_), intent(out)            :: info
   integer(psb_lpk_), intent(in), optional   :: nz
+  character(len=*), intent(in), optional    :: type
+  class(psb_lz_base_sparse_mat), optional, intent(in) :: mold
 
   integer(psb_ipk_) :: err_act
   character(len=20)  :: name='csall'
@@ -3400,7 +3420,22 @@ subroutine psb_lz_csall(nr,nc,a,info,nz)
   call a%free()
 
   info = psb_success_
-  allocate(psb_lz_coo_sparse_mat :: a%a, stat=info)
+  if (present(mold)) then
+    allocate(a%a, stat=info, mold=mold)
+  else if (present(type)) then
+    select case (type)
+    case('CSR')
+      allocate(psb_lz_csr_sparse_mat :: a%a, stat=info)
+    case('COO')
+      allocate(psb_lz_coo_sparse_mat :: a%a, stat=info)
+    case('CSC')
+      allocate(psb_lz_csc_sparse_mat :: a%a, stat=info)
+    case default
+      allocate(psb_lz_coo_sparse_mat :: a%a, stat=info)
+    end select
+  else
+    allocate(psb_lz_coo_sparse_mat :: a%a, stat=info)
+  end if
   if (info /= psb_success_) then
     info = psb_err_alloc_dealloc_
     call psb_errpush(info, name)
diff --git a/base/serial/psb_cgelp.f90 b/base/serial/psb_cgelp.f90
deleted file mode 100644
index 6b3b2def..00000000
--- a/base/serial/psb_cgelp.f90
+++ /dev/null
@@ -1,246 +0,0 @@
-!   
-!                Parallel Sparse BLAS  version 3.5
-!      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!                         Alfredo Buttari        University of Rome Tor Vergata
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-! File: psb_cgelp.f90
-!
-!
-! Subroutine: psb_cgelp
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:,:).
-! info     - integer.                 Return code.
-subroutine psb_cgelp(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_cgelp
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  complex(psb_spk_), intent(inout) ::  x(:,:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         :: trans
-
-  ! local variables
-  complex(psb_spk_),allocatable :: temp(:)
-  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
-  integer(psb_ipk_), allocatable          :: itemp(:)
-  complex(psb_spk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_cgelp'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz    = size(x,dim=1)
-  i2sz    = size(x,dim=2)
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
-
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-  select case( psb_toupper(trans))
-  case('N') 
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(i) = x(itemp(i),j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case('T')
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(itemp(i)) = x(i,j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_cgelp
-
-
-
-!!$ 
-!!$              Parallel Sparse BLAS  version 3.5
-!!$    (C) Copyright 2006-2018
-!!$                       Salvatore Filippone    University of Rome Tor Vergata
-!!$                       Alfredo Buttari        University of Rome Tor Vergata
-!!$ 
-!!$  Redistribution and use in source and binary forms, with or without
-!!$  modification, are permitted provided that the following conditions
-!!$  are met:
-!!$    1. Redistributions of source code must retain the above copyright
-!!$       notice, this list of conditions and the following disclaimer.
-!!$    2. Redistributions in binary form must reproduce the above copyright
-!!$       notice, this list of conditions, and the following disclaimer in the
-!!$       documentation and/or other materials provided with the distribution.
-!!$    3. The name of the PSBLAS group or the names of its contributors may
-!!$       not be used to endorse or promote products derived from this
-!!$       software without specific written permission.
-!!$ 
-!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
-!
-!
-! Subroutine: psb_cgelpv
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:).
-! info     - integer.                 Return code.
-subroutine psb_cgelpv(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_cgelpv
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  complex(psb_spk_), intent(inout) ::  x(:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         ::  trans
-
-  ! local variables
-  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
-  complex(psb_spk_),allocatable ::  temp(:)
-  integer(psb_ipk_), allocatable          :: itemp(:)
-  complex(psb_spk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_cgelpv'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz = min(size(x),size(iperm))
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-
-  select case( psb_toupper(trans))
-  case('N') 
-    do i=1,i1sz
-      temp(i) = x(itemp(i))
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case('T')
-    do i=1,i1sz
-      temp(itemp(i)) = x(i)
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_cgelpv
-
diff --git a/base/serial/psb_dgelp.f90 b/base/serial/psb_dgelp.f90
deleted file mode 100644
index 7dc4b132..00000000
--- a/base/serial/psb_dgelp.f90
+++ /dev/null
@@ -1,246 +0,0 @@
-!   
-!                Parallel Sparse BLAS  version 3.5
-!      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!                         Alfredo Buttari        University of Rome Tor Vergata
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-! File: psb_dgelp.f90
-!
-!
-! Subroutine: psb_dgelp
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:,:).
-! info     - integer.                 Return code.
-subroutine psb_dgelp(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_dgelp
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  real(psb_dpk_), intent(inout) ::  x(:,:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         :: trans
-
-  ! local variables
-  real(psb_dpk_),allocatable :: temp(:)
-  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
-  integer(psb_ipk_), allocatable       :: itemp(:)
-  real(psb_dpk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_dgelp'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz    = size(x,dim=1)
-  i2sz    = size(x,dim=2)
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
-
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-  select case( psb_toupper(trans))
-  case('N') 
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(i) = x(itemp(i),j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case('T')
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(itemp(i)) = x(i,j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_dgelp
-
-
-
-!!$ 
-!!$              Parallel Sparse BLAS  version 3.5
-!!$    (C) Copyright 2006-2018
-!!$                       Salvatore Filippone    University of Rome Tor Vergata
-!!$                       Alfredo Buttari        University of Rome Tor Vergata
-!!$ 
-!!$  Redistribution and use in source and binary forms, with or without
-!!$  modification, are permitted provided that the following conditions
-!!$  are met:
-!!$    1. Redistributions of source code must retain the above copyright
-!!$       notice, this list of conditions and the following disclaimer.
-!!$    2. Redistributions in binary form must reproduce the above copyright
-!!$       notice, this list of conditions, and the following disclaimer in the
-!!$       documentation and/or other materials provided with the distribution.
-!!$    3. The name of the PSBLAS group or the names of its contributors may
-!!$       not be used to endorse or promote products derived from this
-!!$       software without specific written permission.
-!!$ 
-!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
-!
-!
-! Subroutine: psb_dgelpv
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:).
-! info     - integer.                 Return code.
-subroutine psb_dgelpv(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_dgelpv
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  real(psb_dpk_), intent(inout) ::  x(:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         ::  trans
-
-  ! local variables
-  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
-  real(psb_dpk_),allocatable ::  temp(:)
-  integer(psb_ipk_), allocatable       :: itemp(:)
-  real(psb_dpk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_dgelpv'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz = min(size(x),size(iperm))
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-
-  select case( psb_toupper(trans))
-  case('N') 
-    do i=1,i1sz
-      temp(i) = x(itemp(i))
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case('T')
-    do i=1,i1sz
-      temp(itemp(i)) = x(i)
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_dgelpv
-
diff --git a/base/serial/psb_sgelp.f90 b/base/serial/psb_sgelp.f90
deleted file mode 100644
index 73e46c71..00000000
--- a/base/serial/psb_sgelp.f90
+++ /dev/null
@@ -1,247 +0,0 @@
-!   
-!                Parallel Sparse BLAS  version 3.5
-!      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!                         Alfredo Buttari        University of Rome Tor Vergata
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-! File: psb_sgelp.f90
-!
-!
-! Subroutine: psb_sgelp
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:,:).
-! info     - integer.                 Return code.
-subroutine psb_sgelp(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_sgelp
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  real(psb_spk_), intent(inout) ::  x(:,:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         :: trans
-  ! local variables
-  integer(psb_ipk_) :: ctxt
-  real(psb_spk_),allocatable :: temp(:)
-  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
-  integer(psb_ipk_), allocatable       :: itemp(:)
-  real(psb_spk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_sgelp'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz    = size(x,dim=1)
-  i2sz    = size(x,dim=2)
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
-
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-  select case( psb_toupper(trans))
-  case('N') 
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(i) = x(itemp(i),j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case('T')
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(itemp(i)) = x(i,j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_sgelp
-
-
-
-!!$ 
-!!$              Parallel Sparse BLAS  version 3.5
-!!$    (C) Copyright 2006-2018
-!!$                       Salvatore Filippone    University of Rome Tor Vergata
-!!$                       Alfredo Buttari        University of Rome Tor Vergata
-!!$ 
-!!$  Redistribution and use in source and binary forms, with or without
-!!$  modification, are permitted provided that the following conditions
-!!$  are met:
-!!$    1. Redistributions of source code must retain the above copyright
-!!$       notice, this list of conditions and the following disclaimer.
-!!$    2. Redistributions in binary form must reproduce the above copyright
-!!$       notice, this list of conditions, and the following disclaimer in the
-!!$       documentation and/or other materials provided with the distribution.
-!!$    3. The name of the PSBLAS group or the names of its contributors may
-!!$       not be used to endorse or promote products derived from this
-!!$       software without specific written permission.
-!!$ 
-!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
-!
-!
-! Subroutine: psb_sgelpv
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:).
-! info     - integer.                 Return code.
-subroutine psb_sgelpv(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_sgelpv
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  real(psb_spk_), intent(inout) ::  x(:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         ::  trans
-
-  ! local variables
-  integer(psb_ipk_) :: ctxt
-  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
-  real(psb_spk_),allocatable  ::  temp(:)
-  integer(psb_ipk_), allocatable        :: itemp(:)
-  real(psb_spk_),parameter    :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_sgelpv'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz = min(size(x),size(iperm))
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-  
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-
-  select case( psb_toupper(trans))
-  case('N') 
-    do i=1,i1sz
-      temp(i) = x(itemp(i))
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case('T')
-    do i=1,i1sz
-      temp(itemp(i)) = x(i)
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_sgelpv
-
diff --git a/base/serial/psb_zgelp.f90 b/base/serial/psb_zgelp.f90
deleted file mode 100644
index 609e9657..00000000
--- a/base/serial/psb_zgelp.f90
+++ /dev/null
@@ -1,246 +0,0 @@
-!   
-!                Parallel Sparse BLAS  version 3.5
-!      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!                         Alfredo Buttari        University of Rome Tor Vergata
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-! File: psb_zgelp.f90
-!
-!
-! Subroutine: psb_zgelp
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:,:).
-! info     - integer.                 Return code.
-subroutine psb_zgelp(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_zgelp
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  complex(psb_dpk_), intent(inout) ::  x(:,:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         :: trans
-
-  ! local variables
-  complex(psb_dpk_),allocatable :: temp(:)
-  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
-  integer(psb_ipk_), allocatable          :: itemp(:)
-  complex(psb_dpk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_zgelp'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz    = size(x,dim=1)
-  i2sz    = size(x,dim=2)
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
-
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-  select case( psb_toupper(trans))
-  case('N') 
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(i) = x(itemp(i),j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case('T')
-    do j=1,i2sz
-      do i=1,i1sz
-        temp(itemp(i)) = x(i,j)
-      end do
-      do i=1,i1sz
-        x(i,j) = temp(i) 
-      end do
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_zgelp
-
-
-
-!!$ 
-!!$              Parallel Sparse BLAS  version 3.5
-!!$    (C) Copyright 2006-2018
-!!$                       Salvatore Filippone    University of Rome Tor Vergata
-!!$                       Alfredo Buttari        University of Rome Tor Vergata
-!!$ 
-!!$  Redistribution and use in source and binary forms, with or without
-!!$  modification, are permitted provided that the following conditions
-!!$  are met:
-!!$    1. Redistributions of source code must retain the above copyright
-!!$       notice, this list of conditions and the following disclaimer.
-!!$    2. Redistributions in binary form must reproduce the above copyright
-!!$       notice, this list of conditions, and the following disclaimer in the
-!!$       documentation and/or other materials provided with the distribution.
-!!$    3. The name of the PSBLAS group or the names of its contributors may
-!!$       not be used to endorse or promote products derived from this
-!!$       software without specific written permission.
-!!$ 
-!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
-!
-!
-! Subroutine: psb_zgelpv
-!             Apply a left permutation to a dense matrix
-!
-! Arguments:
-! trans    - character. 
-! iperm    - integer.
-! x        - real, dimension(:).
-! info     - integer.                 Return code.
-subroutine psb_zgelpv(trans,iperm,x,info)
-  use psb_serial_mod, psb_protect_name => psb_zgelpv
-  use psb_const_mod
-  use psb_error_mod
-  implicit none
-
-  complex(psb_dpk_), intent(inout) ::  x(:)
-  integer(psb_ipk_), intent(in)           ::  iperm(:)
-  integer(psb_ipk_), intent(out)          ::  info
-  character, intent(in)         ::  trans
-
-  ! local variables
-  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
-  complex(psb_dpk_),allocatable ::  temp(:)
-  integer(psb_ipk_), allocatable          :: itemp(:)
-  complex(psb_dpk_),parameter   :: one=1
-  integer(psb_ipk_) :: debug_level, debug_unit
-
-  character(len=20)   :: name, ch_err
-  name = 'psb_zgelpv'
-
-  if(psb_get_errstatus() /= 0) return 
-  info=psb_success_
-  call psb_erractionsave(err_act)
-  debug_unit  = psb_get_debug_unit()
-  debug_level = psb_get_debug_level()
-
-  i1sz = min(size(x),size(iperm))
-
-  if (debug_level >= psb_debug_serial_)&
-       & write(debug_unit,*)  trim(name),': size',i1sz
-  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
-  if (info /= psb_success_) then
-    info=2040
-    call psb_errpush(info,name)
-    goto 9999
-  end if
-  itemp(:) = iperm(:) 
-
-  if (.not.psb_isaperm(i1sz,itemp)) then
-    info=psb_err_iarg_invalid_value_
-    int_err(1) = 1      
-    call psb_errpush(info,name,i_err=int_err)
-    goto 9999
-  endif
-
-  select case( psb_toupper(trans))
-  case('N') 
-    do i=1,i1sz
-      temp(i) = x(itemp(i))
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case('T')
-    do i=1,i1sz
-      temp(itemp(i)) = x(i)
-    end do
-    do i=1,i1sz
-      x(i) = temp(i) 
-    end do
-  case default
-    info=psb_err_from_subroutine_
-    ch_err='dgelp'
-    call psb_errpush(info,name,a_err=ch_err)
-  end select
-
-  deallocate(temp,itemp)
-
-  call psb_erractionrestore(err_act)
-  return
-
-9999 call psb_error_handler(err_act)
-
-  return
-
-end subroutine psb_zgelpv
-
diff --git a/base/serial/psi_c_serial_impl.f90 b/base/serial/psi_c_serial_impl.F90
similarity index 50%
rename from base/serial/psi_c_serial_impl.f90
rename to base/serial/psi_c_serial_impl.F90
index 2120683d..1da2ce6e 100644
--- a/base/serial/psi_c_serial_impl.f90
+++ b/base/serial/psi_c_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_cgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_cgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_spk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  complex(psb_spk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  complex(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_cgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='cgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_cgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_cgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_cgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_cgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_spk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  complex(psb_spk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  complex(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_cgelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='cgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_cgelpv
+
+subroutine psb_e_cgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_cgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_spk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  complex(psb_spk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  complex(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_cgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='cgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_cgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_cgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_cgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_cgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_spk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  complex(psb_spk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_cgelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='cgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_cgelpv
+
 subroutine psi_caxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
@@ -40,9 +441,9 @@ subroutine psi_caxpby(m,n,alpha, x, beta, y, info)
   complex(psb_spk_), intent (in)       ::  alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly
+  integer(psb_ipk_) :: lx, ly, i
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -101,7 +502,8 @@ subroutine psi_caxpbyv(m,alpha, x, beta, y, info)
   integer(psb_ipk_) :: err_act
   integer(psb_ipk_) :: lx, ly
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  integer(psb_ipk_) :: i
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -131,7 +533,106 @@ subroutine psi_caxpbyv(m,alpha, x, beta, y, info)
     goto 9999
   end if
 
-  if (m>0) call caxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+!  if (m>0) call caxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+
+  if (alpha.eq.czero) then
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = czero
+      enddo
+    else if (beta.eq.cone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.cone) then
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-cone) then
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + y(i)
+      enddo
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + y(i)
+      enddo
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
+
 
   call psb_erractionrestore(err_act)
   return
@@ -154,7 +655,7 @@ subroutine psi_caxpbyv2(m,alpha, x, beta, y, z, info)
   complex(psb_spk_), intent (in)       :: alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly, lz
+  integer(psb_ipk_) :: lx, ly, lz, i
   integer(psb_ipk_) :: ierr(5)
   character(len=20)        :: name, ch_err
 
@@ -193,7 +694,105 @@ subroutine psi_caxpbyv2(m,alpha, x, beta, y, z, info)
     goto 9999
   end if
 
-  if (m>0) call caxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
+  if (alpha.eq.czero) then
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = czero
+      enddo
+    else if (beta.eq.cone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.cone) then
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = x(i) - y(i)
+        enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-cone) then
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.czero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-cone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
 
   call psb_erractionrestore(err_act)
   return
@@ -541,6 +1140,7 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
   if (alpha.eq.czero) then
     if (beta.eq.czero) then
       do j=1, n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = czero
         enddo
@@ -552,12 +1152,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) =  beta*y(i,j)
         enddo
@@ -568,12 +1170,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + y(i,j)
         enddo
@@ -581,12 +1185,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -597,12 +1203,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -610,12 +1218,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -626,12 +1236,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -639,12 +1251,14 @@ subroutine  caxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
@@ -730,12 +1344,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) =  beta*y(i,j)
         enddo
@@ -746,12 +1362,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + y(i,j)
         enddo
@@ -759,12 +1377,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -775,12 +1395,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -788,12 +1410,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -804,12 +1428,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.czero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -817,12 +1443,14 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-cone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
diff --git a/base/serial/psi_d_serial_impl.f90 b/base/serial/psi_d_serial_impl.F90
similarity index 50%
rename from base/serial/psi_d_serial_impl.f90
rename to base/serial/psi_d_serial_impl.F90
index 0d80f459..8c65b349 100644
--- a/base/serial/psi_d_serial_impl.f90
+++ b/base/serial/psi_d_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_dgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_dgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_dpk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  real(psb_dpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  real(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_dgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='dgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_dgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_dgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_dgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_dgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_dpk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  real(psb_dpk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  real(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_dgelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='dgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_dgelpv
+
+subroutine psb_e_dgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_dgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_dpk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  real(psb_dpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  real(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_dgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='dgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_dgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_dgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_dgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_dgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_dpk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  real(psb_dpk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_dgelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='dgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_dgelpv
+
 subroutine psi_daxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
@@ -40,9 +441,9 @@ subroutine psi_daxpby(m,n,alpha, x, beta, y, info)
   real(psb_dpk_), intent (in)       ::  alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly
+  integer(psb_ipk_) :: lx, ly, i
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -101,7 +502,8 @@ subroutine psi_daxpbyv(m,alpha, x, beta, y, info)
   integer(psb_ipk_) :: err_act
   integer(psb_ipk_) :: lx, ly
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  integer(psb_ipk_) :: i
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -131,7 +533,106 @@ subroutine psi_daxpbyv(m,alpha, x, beta, y, info)
     goto 9999
   end if
 
-  if (m>0) call daxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+!  if (m>0) call daxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+
+  if (alpha.eq.dzero) then
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = dzero
+      enddo
+    else if (beta.eq.done) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.done) then
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-done) then
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + y(i)
+      enddo
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + y(i)
+      enddo
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
+
 
   call psb_erractionrestore(err_act)
   return
@@ -154,7 +655,7 @@ subroutine psi_daxpbyv2(m,alpha, x, beta, y, z, info)
   real(psb_dpk_), intent (in)       :: alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly, lz
+  integer(psb_ipk_) :: lx, ly, lz, i
   integer(psb_ipk_) :: ierr(5)
   character(len=20)        :: name, ch_err
 
@@ -193,7 +694,105 @@ subroutine psi_daxpbyv2(m,alpha, x, beta, y, z, info)
     goto 9999
   end if
 
-  if (m>0) call daxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
+  if (alpha.eq.dzero) then
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = dzero
+      enddo
+    else if (beta.eq.done) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.done) then
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = x(i) - y(i)
+        enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-done) then
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.dzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-done) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
 
   call psb_erractionrestore(err_act)
   return
@@ -541,6 +1140,7 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
   if (alpha.eq.dzero) then
     if (beta.eq.dzero) then
       do j=1, n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = dzero
         enddo
@@ -552,12 +1152,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) =  beta*y(i,j)
         enddo
@@ -568,12 +1170,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + y(i,j)
         enddo
@@ -581,12 +1185,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -597,12 +1203,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -610,12 +1218,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -626,12 +1236,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -639,12 +1251,14 @@ subroutine  daxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
@@ -730,12 +1344,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) =  beta*y(i,j)
         enddo
@@ -746,12 +1362,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + y(i,j)
         enddo
@@ -759,12 +1377,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -775,12 +1395,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -788,12 +1410,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -804,12 +1428,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.dzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -817,12 +1443,14 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-done) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
diff --git a/base/serial/psi_e_serial_impl.f90 b/base/serial/psi_e_serial_impl.F90
similarity index 60%
rename from base/serial/psi_e_serial_impl.f90
rename to base/serial/psi_e_serial_impl.F90
index 0595d87e..988bad52 100644
--- a/base/serial/psi_e_serial_impl.f90
+++ b/base/serial/psi_e_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_egelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_egelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_epk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_epk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_epk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_egelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='egelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_egelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_egelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_egelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_egelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_epk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  integer(psb_epk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_epk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_egelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='egelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_egelpv
+
+subroutine psb_e_egelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_egelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_epk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_epk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_epk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_egelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='egelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_egelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_egelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_egelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_egelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_epk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_egelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='egelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_egelpv
+
 subroutine psi_eaxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
diff --git a/base/serial/psi_i2_serial_impl.f90 b/base/serial/psi_i2_serial_impl.F90
similarity index 60%
rename from base/serial/psi_i2_serial_impl.f90
rename to base/serial/psi_i2_serial_impl.F90
index 59d579f2..83b078f0 100644
--- a/base/serial/psi_i2_serial_impl.f90
+++ b/base/serial/psi_i2_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_i2gelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_i2gelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_i2pk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_i2pk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_i2pk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_i2gelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='i2gelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_i2gelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_i2gelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_i2gelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_i2gelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_i2pk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  integer(psb_i2pk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_i2pk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_i2gelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='i2gelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_i2gelpv
+
+subroutine psb_e_i2gelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_i2gelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_i2pk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_i2pk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_i2pk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_i2gelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='i2gelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_i2gelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_i2gelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_i2gelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_i2gelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_i2pk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_i2pk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_i2gelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='i2gelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_i2gelpv
+
 subroutine psi_i2axpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
diff --git a/base/serial/psi_m_serial_impl.f90 b/base/serial/psi_m_serial_impl.F90
similarity index 60%
rename from base/serial/psi_m_serial_impl.f90
rename to base/serial/psi_m_serial_impl.F90
index cc8b9f4f..950e2358 100644
--- a/base/serial/psi_m_serial_impl.f90
+++ b/base/serial/psi_m_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_mgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_mgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_mpk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_mpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_mpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_mgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='mgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_mgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_mgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_mgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_mgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_mpk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  integer(psb_mpk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  integer(psb_mpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_mgelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='mgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_mgelpv
+
+subroutine psb_e_mgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_mgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_mpk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  integer(psb_mpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_mpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_mgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='mgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_mgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_mgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_mgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_mgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  integer(psb_mpk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_mpk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_mgelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='mgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_mgelpv
+
 subroutine psi_maxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
diff --git a/base/serial/psi_s_serial_impl.f90 b/base/serial/psi_s_serial_impl.F90
similarity index 50%
rename from base/serial/psi_s_serial_impl.f90
rename to base/serial/psi_s_serial_impl.F90
index dfe2559b..6c8e21e2 100644
--- a/base/serial/psi_s_serial_impl.f90
+++ b/base/serial/psi_s_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_sgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_sgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_spk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  real(psb_spk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  real(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_sgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='sgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_sgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_sgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_sgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_sgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_spk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  real(psb_spk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  real(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_sgelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='sgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_sgelpv
+
+subroutine psb_e_sgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_sgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_spk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  real(psb_spk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  real(psb_spk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_sgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='sgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_sgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_sgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_sgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_sgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  real(psb_spk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  real(psb_spk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_sgelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='sgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_sgelpv
+
 subroutine psi_saxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
@@ -40,9 +441,9 @@ subroutine psi_saxpby(m,n,alpha, x, beta, y, info)
   real(psb_spk_), intent (in)       ::  alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly
+  integer(psb_ipk_) :: lx, ly, i
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -101,7 +502,8 @@ subroutine psi_saxpbyv(m,alpha, x, beta, y, info)
   integer(psb_ipk_) :: err_act
   integer(psb_ipk_) :: lx, ly
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  integer(psb_ipk_) :: i
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -131,7 +533,106 @@ subroutine psi_saxpbyv(m,alpha, x, beta, y, info)
     goto 9999
   end if
 
-  if (m>0) call saxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+!  if (m>0) call saxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+
+  if (alpha.eq.szero) then
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = szero
+      enddo
+    else if (beta.eq.sone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.sone) then
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-sone) then
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + y(i)
+      enddo
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + y(i)
+      enddo
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
+
 
   call psb_erractionrestore(err_act)
   return
@@ -154,7 +655,7 @@ subroutine psi_saxpbyv2(m,alpha, x, beta, y, z, info)
   real(psb_spk_), intent (in)       :: alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly, lz
+  integer(psb_ipk_) :: lx, ly, lz, i
   integer(psb_ipk_) :: ierr(5)
   character(len=20)        :: name, ch_err
 
@@ -193,7 +694,105 @@ subroutine psi_saxpbyv2(m,alpha, x, beta, y, z, info)
     goto 9999
   end if
 
-  if (m>0) call saxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
+  if (alpha.eq.szero) then
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = szero
+      enddo
+    else if (beta.eq.sone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.sone) then
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = x(i) - y(i)
+        enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-sone) then
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.szero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-sone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
 
   call psb_erractionrestore(err_act)
   return
@@ -541,6 +1140,7 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
   if (alpha.eq.szero) then
     if (beta.eq.szero) then
       do j=1, n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = szero
         enddo
@@ -552,12 +1152,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) =  beta*y(i,j)
         enddo
@@ -568,12 +1170,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + y(i,j)
         enddo
@@ -581,12 +1185,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -597,12 +1203,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -610,12 +1218,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -626,12 +1236,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -639,12 +1251,14 @@ subroutine  saxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
@@ -730,12 +1344,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) =  beta*y(i,j)
         enddo
@@ -746,12 +1362,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + y(i,j)
         enddo
@@ -759,12 +1377,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -775,12 +1395,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -788,12 +1410,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -804,12 +1428,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.szero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -817,12 +1443,14 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-sone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
diff --git a/base/serial/psi_z_serial_impl.f90 b/base/serial/psi_z_serial_impl.F90
similarity index 50%
rename from base/serial/psi_z_serial_impl.f90
rename to base/serial/psi_z_serial_impl.F90
index 5b7036e6..f3087992 100644
--- a/base/serial/psi_z_serial_impl.f90
+++ b/base/serial/psi_z_serial_impl.F90
@@ -29,6 +29,407 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !
 !
+subroutine psb_m_zgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_zgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_dpk_), intent(inout) ::  x(:,:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  complex(psb_dpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), i1sz, i2sz, err_act,i,j
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  complex(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_zgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='zgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_zgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_zgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_m_zgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_m_zgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_dpk_), intent(inout) ::  x(:)
+  integer(psb_mpk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), i1sz, err_act, i
+  complex(psb_dpk_),allocatable ::  temp(:)
+  integer(psb_ipk_), allocatable          :: itemp(:)
+  complex(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_zgelpv'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='zgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_m_zgelpv
+
+subroutine psb_e_zgelp(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_zgelp
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_dpk_), intent(inout) ::  x(:,:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         :: trans
+
+  ! local variables
+  complex(psb_dpk_),allocatable :: temp(:)
+  integer(psb_ipk_) :: int_err(5), err_act
+  integer(psb_epk_) :: i1sz, i2sz, i, j
+  integer(psb_epk_), allocatable          :: itemp(:)
+  complex(psb_dpk_),parameter   :: one=1
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20)   :: name
+  name = 'psb_zgelp'
+
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz    = size(x,dim=1)
+  i2sz    = size(x,dim=2)
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz,i2sz
+
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+  select case( psb_toupper(trans))
+  case('N') 
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(i) = x(itemp(i),j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case('T')
+    do j=1,i2sz
+      do i=1,i1sz
+        temp(itemp(i)) = x(i,j)
+      end do
+      do i=1,i1sz
+        x(i,j) = temp(i) 
+      end do
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='zgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_zgelp
+
+
+
+!!$ 
+!!$              Parallel Sparse BLAS  version 3.5
+!!$    (C) Copyright 2006-2018
+!!$                       Salvatore Filippone    University of Rome Tor Vergata
+!!$                       Alfredo Buttari        University of Rome Tor Vergata
+!!$ 
+!!$  Redistribution and use in source and binary forms, with or without
+!!$  modification, are permitted provided that the following conditions
+!!$  are met:
+!!$    1. Redistributions of source code must retain the above copyright
+!!$       notice, this list of conditions and the following disclaimer.
+!!$    2. Redistributions in binary form must reproduce the above copyright
+!!$       notice, this list of conditions, and the following disclaimer in the
+!!$       documentation and/or other materials provided with the distribution.
+!!$    3. The name of the PSBLAS group or the names of its contributors may
+!!$       not be used to endorse or promote products derived from this
+!!$       software without specific written permission.
+!!$ 
+!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!!$  POSSIBILITY OF SUCH DAMAGE.
+!!$ 
+!!$  
+!
+!
+! Subroutine: psb_zgelpv
+!             Apply a left permutation to a dense matrix
+!
+! Arguments:
+! trans    - character. 
+! iperm    - integer.
+! x        - real, dimension(:).
+! info     - integer.                 Return code.
+subroutine psb_e_zgelpv(trans,iperm,x,info)
+  use psb_serial_mod, psb_protect_name => psb_e_zgelpv
+  use psb_const_mod
+  use psb_error_mod
+  implicit none
+
+  complex(psb_dpk_), intent(inout) ::  x(:)
+  integer(psb_epk_), intent(in)           ::  iperm(:)
+  integer(psb_ipk_), intent(out)          ::  info
+  character, intent(in)         ::  trans
+
+  ! local variables
+  integer(psb_ipk_) :: int_err(5), err_act
+  complex(psb_dpk_),allocatable ::  temp(:)
+  integer(psb_epk_) :: i1sz, i
+  integer(psb_epk_), allocatable          :: itemp(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  character(len=20) :: name
+
+  name = 'psb_zgelp'
+  if(psb_get_errstatus() /= 0) return 
+  info=psb_success_
+  call psb_erractionsave(err_act)
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  i1sz = min(size(x),size(iperm))
+
+  if (debug_level >= psb_debug_serial_)&
+       & write(debug_unit,*)  trim(name),': size',i1sz
+  allocate(temp(i1sz),itemp(size(iperm)),stat=info)
+  if (info /= psb_success_) then
+    info=2040
+    call psb_errpush(info,name)
+    goto 9999
+  end if
+  itemp(:) = iperm(:) 
+
+  if (.not.psb_isaperm(i1sz,itemp)) then
+    info=psb_err_iarg_invalid_value_
+    int_err(1) = 1      
+    call psb_errpush(info,name,i_err=int_err)
+    goto 9999
+  endif
+
+  select case( psb_toupper(trans))
+  case('N') 
+    do i=1,i1sz
+      temp(i) = x(itemp(i))
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case('T')
+    do i=1,i1sz
+      temp(itemp(i)) = x(i)
+    end do
+    do i=1,i1sz
+      x(i) = temp(i) 
+    end do
+  case default
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='zgelp')
+  end select
+
+  deallocate(temp,itemp)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+
+  return
+
+end subroutine psb_e_zgelpv
+
 subroutine psi_zaxpby(m,n,alpha, x, beta, y, info)
 
   use psb_const_mod
@@ -40,9 +441,9 @@ subroutine psi_zaxpby(m,n,alpha, x, beta, y, info)
   complex(psb_dpk_), intent (in)       ::  alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly
+  integer(psb_ipk_) :: lx, ly, i
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -101,7 +502,8 @@ subroutine psi_zaxpbyv(m,alpha, x, beta, y, info)
   integer(psb_ipk_) :: err_act
   integer(psb_ipk_) :: lx, ly
   integer(psb_ipk_) :: ierr(5)
-  character(len=20)        :: name, ch_err
+  integer(psb_ipk_) :: i
+  character(len=20) :: name, ch_err
 
   name='psb_geaxpby'
   info=psb_success_
@@ -131,7 +533,106 @@ subroutine psi_zaxpbyv(m,alpha, x, beta, y, info)
     goto 9999
   end if
 
-  if (m>0) call zaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+!  if (m>0) call zaxpby(m,ione,alpha,x,lx,beta,y,ly,info)
+
+  if (alpha.eq.zzero) then
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = zzero
+      enddo
+    else if (beta.eq.zone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.zone) then
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-zone) then
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + y(i)
+      enddo
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + y(i)
+      enddo
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        y(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
+
 
   call psb_erractionrestore(err_act)
   return
@@ -154,7 +655,7 @@ subroutine psi_zaxpbyv2(m,alpha, x, beta, y, z, info)
   complex(psb_dpk_), intent (in)       :: alpha, beta
   integer(psb_ipk_), intent(out)     :: info
   integer(psb_ipk_) :: err_act
-  integer(psb_ipk_) :: lx, ly, lz
+  integer(psb_ipk_) :: lx, ly, lz, i
   integer(psb_ipk_) :: ierr(5)
   character(len=20)        :: name, ch_err
 
@@ -193,7 +694,105 @@ subroutine psi_zaxpbyv2(m,alpha, x, beta, y, z, info)
     goto 9999
   end if
 
-  if (m>0) call zaxpbyv2(m,ione,alpha,x,lx,beta,y,ly,z,lz,info)
+  if (alpha.eq.zzero) then
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = zzero
+      enddo
+    else if (beta.eq.zone) then
+      !
+      !        Do nothing!
+      !
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) =  beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.zone) then
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = x(i) - y(i)
+        enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = x(i) + beta*y(i)
+      enddo
+    endif
+
+  else if (alpha.eq.-zone) then
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = -x(i) + beta*y(i)
+      enddo
+    endif
+
+  else
+
+    if (beta.eq.zzero) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i)
+      enddo
+    else if (beta.eq.zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) + y(i)
+      enddo
+
+    else if (beta.eq.-zone) then
+      !$omp parallel do private(i)
+      do i=1,m
+        Z(i) = alpha*x(i) - y(i)
+      enddo
+    else
+      !$omp parallel do private(i)
+      do i=1,m
+          Z(i) = alpha*x(i) + beta*y(i)
+      enddo
+    endif
+
+  endif
 
   call psb_erractionrestore(err_act)
   return
@@ -541,6 +1140,7 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
   if (alpha.eq.zzero) then
     if (beta.eq.zzero) then
       do j=1, n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = zzero
         enddo
@@ -552,12 +1152,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) =  beta*y(i,j)
         enddo
@@ -568,12 +1170,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + y(i,j)
         enddo
@@ -581,12 +1185,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -597,12 +1203,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -610,12 +1218,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -626,12 +1236,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -639,12 +1251,14 @@ subroutine  zaxpby(m, n, alpha, X, lldx, beta, Y, lldy, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           y(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
@@ -730,12 +1344,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) =  beta*y(i,j)
         enddo
@@ -746,12 +1362,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + y(i,j)
         enddo
@@ -759,12 +1377,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = x(i,j) + beta*y(i,j)
         enddo
@@ -775,12 +1395,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + y(i,j)
         enddo
@@ -788,12 +1410,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = -x(i,j) + beta*y(i,j)
         enddo
@@ -804,12 +1428,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     if (beta.eq.zzero) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j)
         enddo
       enddo
     else if (beta.eq.zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + y(i,j)
         enddo
@@ -817,12 +1443,14 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
     else if (beta.eq.-zone) then
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) - y(i,j)
         enddo
       enddo
     else
       do j=1,n
+        !$omp parallel do private(i)
         do i=1,m
           Z(i,j) = alpha*x(i,j) + beta*y(i,j)
         enddo
diff --git a/base/tools/Makefile b/base/tools/Makefile
index c8b488d3..1227da3f 100644
--- a/base/tools/Makefile
+++ b/base/tools/Makefile
@@ -27,7 +27,8 @@ FOBJS = psb_cdall.o psb_cdals.o psb_cdalv.o psb_cd_inloc.o psb_cdins.o psb_cdprt
 	psb_s_map.o psb_d_map.o psb_c_map.o psb_z_map.o \
 	psb_s_par_csr_spspmm.o psb_d_par_csr_spspmm.o psb_c_par_csr_spspmm.o psb_z_par_csr_spspmm.o \
 	psb_s_glob_transpose.o psb_d_glob_transpose.o psb_c_glob_transpose.o psb_z_glob_transpose.o \
-	psb_cgetelem.o psb_dgetelem.o psb_sgetelem.o psb_zgetelem.o   
+	psb_cgetelem.o psb_dgetelem.o psb_sgetelem.o psb_zgetelem.o   \
+	psb_c_remap.o psb_s_remap.o psb_d_remap.o psb_z_remap.o
 #	psb_lallc.o psb_lasb.o psb_lfree.o psb_lins.o \
 
 MPFOBJS = psb_icdasb.o psb_ssphalo.o  psb_dsphalo.o psb_csphalo.o psb_zsphalo.o  \
diff --git a/base/tools/psb_c_remap.F90 b/base/tools/psb_c_remap.F90
new file mode 100644
index 00000000..881b2ad0
--- /dev/null
+++ b/base/tools/psb_c_remap.F90
@@ -0,0 +1,255 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+!
+! Subroutine: psb_c_remap
+! 
+! Arguments: 
+!    desc_in  - type(psb_desc_type).       The communication descriptor to be cloned.
+!    desc_out - type(psb_desc_type).       The output communication descriptor.
+!    info     - integer.                       Return code.
+subroutine psb_c_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, &
+     & desc_out, a_out, info)
+
+  use psb_base_mod, psb_protect_name => psb_c_remap
+
+  implicit none
+  !....parameters...
+  integer(psb_ipk_), intent(in)        :: np_remap
+  type(psb_desc_type), intent(inout)   :: desc_in
+  type(psb_cspmat_type), intent(inout) :: a_in
+  type(psb_cspmat_type), intent(out)   :: a_out
+  type(psb_desc_type), intent(out)     :: desc_out
+  integer(psb_ipk_), intent(out)       :: ipd 
+  integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+  integer(psb_ipk_), intent(out)       :: info
+
+
+  ! locals
+  type(psb_ctxt_type) :: ctxt, newctxt
+  integer(psb_ipk_) :: np, me, err_act
+  integer(psb_ipk_) :: rnp, rme
+  integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc
+  integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) 
+  type(psb_lc_coo_sparse_mat) :: acoo_snd, acoo_rcv
+  integer(psb_ipk_) :: debug_level, debug_unit  
+  character(len=20)   :: name
+
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  if (psb_get_errstatus() /= 0) return 
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  name = 'psb_cdcpy'
+
+  ctxt = desc_in%get_context()
+
+  ! check on blacs grid 
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_ext_) &
+       & write(debug_unit,*) me,' ',trim(name),': Entered'
+  if (np == -1) then
+    info = psb_err_context_error_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+!!$  write(0,*) ' Remapping from ',np,' onto ', np_remap
+
+  if (desc_in%get_fmt() == 'BLOCK') then
+    !
+    ! Should we spread the processes in the new context,
+    ! or should we keep them close? 
+    ! 
+    if (.true.) then 
+      allocate(ids(0:np_remap-1))
+      if (np_remap <= np/2) then
+        ids(0) = 0
+        do ipdest=1,np_remap -1
+          ids(ipdest) = ids(ipdest-1) + np/np_remap
+        end do
+!!$        write(0,*) ' IDS ',ids(:) 
+      else
+        do ipdest = 0, np_remap-1
+          ids(ipdest) = ipdest
+        end do
+      end if
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids)
+    else
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt)
+    end if
+
+    call psb_info(newctxt,rme,rnp)
+!!$    write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp
+    call psb_bcast(ctxt,rnp)
+    allocate(newnl(rnp),naggr(np),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
+    if (rnp >= np) then 
+      write(0,*) ' No remapping on larger proc count now'
+      info = psb_err_internal_error_
+      call psb_errpush(info,name)
+      goto 9999
+    end if
+    naggr = 0
+
+    !
+    ! Compute destination for my data.
+    ! Simplistic reallocation: divide the NP processes
+    ! across the new ones (as balanced as possible),
+    ! then send all data from old to new process
+    !
+    id2 = np/rnp
+    id1 = id2+1
+    imd = mod(np,rnp)
+    if (me < (imd*id1)) then
+      ipdest = (me/id1)
+    else
+      ipdest = ( ((me-imd*id1)/id2) +  imd)
+    end if
+    if (allocated(ids)) then
+      ipd = ids(ipdest)
+    else
+      ipd = ipdest
+    end if
+!!$    write(0,*) ' Sending my data from ',me,' to ', &
+!!$         & ipd, 'out of ',rnp,rnp-1
+
+    !
+    ! Compute local rows for all new
+    ! processes; will have a BLOCK distribution
+    ! 
+    newnl = 0
+    newnl(ipdest+1) = desc_in%get_local_rows()    
+    call psb_sum(ctxt,newnl)
+
+    if (rme>=0) then
+      ! 
+      if (rme < imd) then
+        isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ]
+      else
+        isrc = [ (i, i=  imd*id1+((rme-imd))*id2,&
+             & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ]            
+      end if
+!!$      write(0,*) me,rme,imd,' ISRC: ',isrc(:)
+      nsrc = size(isrc)
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows()
+    else 
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',0,0
+    end if
+  else
+    write(0,*) 'Right now only BLOCK on input '
+    info = psb_err_invalid_cd_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  !
+  ! Collect matrices on their destinations
+  !
+  block
+    integer(psb_ipk_) :: nzsnd, nzrcv, ip
+    integer(psb_ipk_) :: nrl, ncl, nzl, nzp
+    call a_in%cp_to(acoo_snd)
+    nzsnd = acoo_snd%get_nzeros()
+    call psb_snd(ctxt,nzsnd,ipd)
+    call psb_snd(ctxt,desc_in%get_local_rows(),ipd)
+    ! Convert to global numbering
+    call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info)
+    call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info)
+
+    call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd)
+
+    if (rme>=0) then
+      ! prepare to receive
+      nzsrc = isrc
+      nrsrc = isrc
+      nzl = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,nzsrc(ip),isrc(ip))
+        call psb_rcv(ctxt,nrsrc(ip),isrc(ip))
+        nzl = nzl + nzsrc(ip)
+      end do
+!!$      write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc)
+      call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl)
+      nrl = acoo_rcv%get_nrows()
+      ncl = acoo_rcv%get_ncols()
+      nzp = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        nzp = nzp + nzsrc(ip)
+      end do
+      call acoo_rcv%set_nzeros(nzp)
+!!$      write(0,*) rme,' Collected: ',&
+!!$           & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros()
+
+      !
+      !  New descriptor
+      !    
+      call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1))
+      ! Insert
+      call psb_spall(a_out,desc_out,info)
+      call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),&
+           & acoo_rcv%val(1:nzp),a_out,desc_out,info)      
+      ! Assemble
+      call psb_cdasb(desc_out,info)
+      call psb_spasb(a_out,desc_out,info)
+
+!!$      write(0,*) rme,' Regenerated: ',&
+!!$           & desc_out%get_local_rows(), desc_out%get_local_cols(),&
+!!$           & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros()
+      naggr(me+1) = desc_out%get_local_rows()
+    else
+      naggr(me+1) = 0
+    end if
+    call psb_sum(ctxt,naggr)
+    
+  end block
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+  return  
+
+end subroutine psb_c_remap
diff --git a/base/tools/psb_cspalloc.f90 b/base/tools/psb_cspalloc.f90
index cac6c1e6..69b9e1c2 100644
--- a/base/tools/psb_cspalloc.f90
+++ b/base/tools/psb_cspalloc.f90
@@ -102,7 +102,7 @@ subroutine psb_cspalloc(a, desc_a, info, nnz)
        & ':allocating size:',loc_row,loc_col,nnz_
   call a%free()
   !....allocate aspk, ia1, ia2.....
-  call a%csall(loc_row,loc_col,info,nz=nnz_)
+  call a%allocate(loc_row,loc_col,info,nz=nnz_)
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='sp_all')
diff --git a/base/tools/psb_d_remap.F90 b/base/tools/psb_d_remap.F90
new file mode 100644
index 00000000..2157b56b
--- /dev/null
+++ b/base/tools/psb_d_remap.F90
@@ -0,0 +1,255 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+!
+! Subroutine: psb_d_remap
+! 
+! Arguments: 
+!    desc_in  - type(psb_desc_type).       The communication descriptor to be cloned.
+!    desc_out - type(psb_desc_type).       The output communication descriptor.
+!    info     - integer.                       Return code.
+subroutine psb_d_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, &
+     & desc_out, a_out, info)
+
+  use psb_base_mod, psb_protect_name => psb_d_remap
+
+  implicit none
+  !....parameters...
+  integer(psb_ipk_), intent(in)        :: np_remap
+  type(psb_desc_type), intent(inout)   :: desc_in
+  type(psb_dspmat_type), intent(inout) :: a_in
+  type(psb_dspmat_type), intent(out)   :: a_out
+  type(psb_desc_type), intent(out)     :: desc_out
+  integer(psb_ipk_), intent(out)       :: ipd 
+  integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+  integer(psb_ipk_), intent(out)       :: info
+
+
+  ! locals
+  type(psb_ctxt_type) :: ctxt, newctxt
+  integer(psb_ipk_) :: np, me, err_act
+  integer(psb_ipk_) :: rnp, rme
+  integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc
+  integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) 
+  type(psb_ld_coo_sparse_mat) :: acoo_snd, acoo_rcv
+  integer(psb_ipk_) :: debug_level, debug_unit  
+  character(len=20)   :: name
+
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  if (psb_get_errstatus() /= 0) return 
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  name = 'psb_cdcpy'
+
+  ctxt = desc_in%get_context()
+
+  ! check on blacs grid 
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_ext_) &
+       & write(debug_unit,*) me,' ',trim(name),': Entered'
+  if (np == -1) then
+    info = psb_err_context_error_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+!!$  write(0,*) ' Remapping from ',np,' onto ', np_remap
+
+  if (desc_in%get_fmt() == 'BLOCK') then
+    !
+    ! Should we spread the processes in the new context,
+    ! or should we keep them close? 
+    ! 
+    if (.true.) then 
+      allocate(ids(0:np_remap-1))
+      if (np_remap <= np/2) then
+        ids(0) = 0
+        do ipdest=1,np_remap -1
+          ids(ipdest) = ids(ipdest-1) + np/np_remap
+        end do
+!!$        write(0,*) ' IDS ',ids(:) 
+      else
+        do ipdest = 0, np_remap-1
+          ids(ipdest) = ipdest
+        end do
+      end if
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids)
+    else
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt)
+    end if
+
+    call psb_info(newctxt,rme,rnp)
+!!$    write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp
+    call psb_bcast(ctxt,rnp)
+    allocate(newnl(rnp),naggr(np),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
+    if (rnp >= np) then 
+      write(0,*) ' No remapping on larger proc count now'
+      info = psb_err_internal_error_
+      call psb_errpush(info,name)
+      goto 9999
+    end if
+    naggr = 0
+
+    !
+    ! Compute destination for my data.
+    ! Simplistic reallocation: divide the NP processes
+    ! across the new ones (as balanced as possible),
+    ! then send all data from old to new process
+    !
+    id2 = np/rnp
+    id1 = id2+1
+    imd = mod(np,rnp)
+    if (me < (imd*id1)) then
+      ipdest = (me/id1)
+    else
+      ipdest = ( ((me-imd*id1)/id2) +  imd)
+    end if
+    if (allocated(ids)) then
+      ipd = ids(ipdest)
+    else
+      ipd = ipdest
+    end if
+!!$    write(0,*) ' Sending my data from ',me,' to ', &
+!!$         & ipd, 'out of ',rnp,rnp-1
+
+    !
+    ! Compute local rows for all new
+    ! processes; will have a BLOCK distribution
+    ! 
+    newnl = 0
+    newnl(ipdest+1) = desc_in%get_local_rows()    
+    call psb_sum(ctxt,newnl)
+
+    if (rme>=0) then
+      ! 
+      if (rme < imd) then
+        isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ]
+      else
+        isrc = [ (i, i=  imd*id1+((rme-imd))*id2,&
+             & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ]            
+      end if
+!!$      write(0,*) me,rme,imd,' ISRC: ',isrc(:)
+      nsrc = size(isrc)
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows()
+    else 
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',0,0
+    end if
+  else
+    write(0,*) 'Right now only BLOCK on input '
+    info = psb_err_invalid_cd_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  !
+  ! Collect matrices on their destinations
+  !
+  block
+    integer(psb_ipk_) :: nzsnd, nzrcv, ip
+    integer(psb_ipk_) :: nrl, ncl, nzl, nzp
+    call a_in%cp_to(acoo_snd)
+    nzsnd = acoo_snd%get_nzeros()
+    call psb_snd(ctxt,nzsnd,ipd)
+    call psb_snd(ctxt,desc_in%get_local_rows(),ipd)
+    ! Convert to global numbering
+    call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info)
+    call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info)
+
+    call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd)
+
+    if (rme>=0) then
+      ! prepare to receive
+      nzsrc = isrc
+      nrsrc = isrc
+      nzl = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,nzsrc(ip),isrc(ip))
+        call psb_rcv(ctxt,nrsrc(ip),isrc(ip))
+        nzl = nzl + nzsrc(ip)
+      end do
+!!$      write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc)
+      call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl)
+      nrl = acoo_rcv%get_nrows()
+      ncl = acoo_rcv%get_ncols()
+      nzp = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        nzp = nzp + nzsrc(ip)
+      end do
+      call acoo_rcv%set_nzeros(nzp)
+!!$      write(0,*) rme,' Collected: ',&
+!!$           & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros()
+
+      !
+      !  New descriptor
+      !    
+      call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1))
+      ! Insert
+      call psb_spall(a_out,desc_out,info)
+      call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),&
+           & acoo_rcv%val(1:nzp),a_out,desc_out,info)      
+      ! Assemble
+      call psb_cdasb(desc_out,info)
+      call psb_spasb(a_out,desc_out,info)
+
+!!$      write(0,*) rme,' Regenerated: ',&
+!!$           & desc_out%get_local_rows(), desc_out%get_local_cols(),&
+!!$           & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros()
+      naggr(me+1) = desc_out%get_local_rows()
+    else
+      naggr(me+1) = 0
+    end if
+    call psb_sum(ctxt,naggr)
+    
+  end block
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+  return  
+
+end subroutine psb_d_remap
diff --git a/base/tools/psb_dspalloc.f90 b/base/tools/psb_dspalloc.f90
index cae01838..56ad6c93 100644
--- a/base/tools/psb_dspalloc.f90
+++ b/base/tools/psb_dspalloc.f90
@@ -102,7 +102,7 @@ subroutine psb_dspalloc(a, desc_a, info, nnz)
        & ':allocating size:',loc_row,loc_col,nnz_
   call a%free()
   !....allocate aspk, ia1, ia2.....
-  call a%csall(loc_row,loc_col,info,nz=nnz_)
+  call a%allocate(loc_row,loc_col,info,nz=nnz_)
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='sp_all')
diff --git a/base/tools/psb_s_remap.F90 b/base/tools/psb_s_remap.F90
new file mode 100644
index 00000000..899c1b26
--- /dev/null
+++ b/base/tools/psb_s_remap.F90
@@ -0,0 +1,255 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+!
+! Subroutine: psb_s_remap
+! 
+! Arguments: 
+!    desc_in  - type(psb_desc_type).       The communication descriptor to be cloned.
+!    desc_out - type(psb_desc_type).       The output communication descriptor.
+!    info     - integer.                       Return code.
+subroutine psb_s_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, &
+     & desc_out, a_out, info)
+
+  use psb_base_mod, psb_protect_name => psb_s_remap
+
+  implicit none
+  !....parameters...
+  integer(psb_ipk_), intent(in)        :: np_remap
+  type(psb_desc_type), intent(inout)   :: desc_in
+  type(psb_sspmat_type), intent(inout) :: a_in
+  type(psb_sspmat_type), intent(out)   :: a_out
+  type(psb_desc_type), intent(out)     :: desc_out
+  integer(psb_ipk_), intent(out)       :: ipd 
+  integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+  integer(psb_ipk_), intent(out)       :: info
+
+
+  ! locals
+  type(psb_ctxt_type) :: ctxt, newctxt
+  integer(psb_ipk_) :: np, me, err_act
+  integer(psb_ipk_) :: rnp, rme
+  integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc
+  integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) 
+  type(psb_ls_coo_sparse_mat) :: acoo_snd, acoo_rcv
+  integer(psb_ipk_) :: debug_level, debug_unit  
+  character(len=20)   :: name
+
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  if (psb_get_errstatus() /= 0) return 
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  name = 'psb_cdcpy'
+
+  ctxt = desc_in%get_context()
+
+  ! check on blacs grid 
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_ext_) &
+       & write(debug_unit,*) me,' ',trim(name),': Entered'
+  if (np == -1) then
+    info = psb_err_context_error_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+!!$  write(0,*) ' Remapping from ',np,' onto ', np_remap
+
+  if (desc_in%get_fmt() == 'BLOCK') then
+    !
+    ! Should we spread the processes in the new context,
+    ! or should we keep them close? 
+    ! 
+    if (.true.) then 
+      allocate(ids(0:np_remap-1))
+      if (np_remap <= np/2) then
+        ids(0) = 0
+        do ipdest=1,np_remap -1
+          ids(ipdest) = ids(ipdest-1) + np/np_remap
+        end do
+!!$        write(0,*) ' IDS ',ids(:) 
+      else
+        do ipdest = 0, np_remap-1
+          ids(ipdest) = ipdest
+        end do
+      end if
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids)
+    else
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt)
+    end if
+
+    call psb_info(newctxt,rme,rnp)
+!!$    write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp
+    call psb_bcast(ctxt,rnp)
+    allocate(newnl(rnp),naggr(np),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
+    if (rnp >= np) then 
+      write(0,*) ' No remapping on larger proc count now'
+      info = psb_err_internal_error_
+      call psb_errpush(info,name)
+      goto 9999
+    end if
+    naggr = 0
+
+    !
+    ! Compute destination for my data.
+    ! Simplistic reallocation: divide the NP processes
+    ! across the new ones (as balanced as possible),
+    ! then send all data from old to new process
+    !
+    id2 = np/rnp
+    id1 = id2+1
+    imd = mod(np,rnp)
+    if (me < (imd*id1)) then
+      ipdest = (me/id1)
+    else
+      ipdest = ( ((me-imd*id1)/id2) +  imd)
+    end if
+    if (allocated(ids)) then
+      ipd = ids(ipdest)
+    else
+      ipd = ipdest
+    end if
+!!$    write(0,*) ' Sending my data from ',me,' to ', &
+!!$         & ipd, 'out of ',rnp,rnp-1
+
+    !
+    ! Compute local rows for all new
+    ! processes; will have a BLOCK distribution
+    ! 
+    newnl = 0
+    newnl(ipdest+1) = desc_in%get_local_rows()    
+    call psb_sum(ctxt,newnl)
+
+    if (rme>=0) then
+      ! 
+      if (rme < imd) then
+        isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ]
+      else
+        isrc = [ (i, i=  imd*id1+((rme-imd))*id2,&
+             & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ]            
+      end if
+!!$      write(0,*) me,rme,imd,' ISRC: ',isrc(:)
+      nsrc = size(isrc)
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows()
+    else 
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',0,0
+    end if
+  else
+    write(0,*) 'Right now only BLOCK on input '
+    info = psb_err_invalid_cd_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  !
+  ! Collect matrices on their destinations
+  !
+  block
+    integer(psb_ipk_) :: nzsnd, nzrcv, ip
+    integer(psb_ipk_) :: nrl, ncl, nzl, nzp
+    call a_in%cp_to(acoo_snd)
+    nzsnd = acoo_snd%get_nzeros()
+    call psb_snd(ctxt,nzsnd,ipd)
+    call psb_snd(ctxt,desc_in%get_local_rows(),ipd)
+    ! Convert to global numbering
+    call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info)
+    call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info)
+
+    call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd)
+
+    if (rme>=0) then
+      ! prepare to receive
+      nzsrc = isrc
+      nrsrc = isrc
+      nzl = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,nzsrc(ip),isrc(ip))
+        call psb_rcv(ctxt,nrsrc(ip),isrc(ip))
+        nzl = nzl + nzsrc(ip)
+      end do
+!!$      write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc)
+      call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl)
+      nrl = acoo_rcv%get_nrows()
+      ncl = acoo_rcv%get_ncols()
+      nzp = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        nzp = nzp + nzsrc(ip)
+      end do
+      call acoo_rcv%set_nzeros(nzp)
+!!$      write(0,*) rme,' Collected: ',&
+!!$           & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros()
+
+      !
+      !  New descriptor
+      !    
+      call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1))
+      ! Insert
+      call psb_spall(a_out,desc_out,info)
+      call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),&
+           & acoo_rcv%val(1:nzp),a_out,desc_out,info)      
+      ! Assemble
+      call psb_cdasb(desc_out,info)
+      call psb_spasb(a_out,desc_out,info)
+
+!!$      write(0,*) rme,' Regenerated: ',&
+!!$           & desc_out%get_local_rows(), desc_out%get_local_cols(),&
+!!$           & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros()
+      naggr(me+1) = desc_out%get_local_rows()
+    else
+      naggr(me+1) = 0
+    end if
+    call psb_sum(ctxt,naggr)
+    
+  end block
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+  return  
+
+end subroutine psb_s_remap
diff --git a/base/tools/psb_sspalloc.f90 b/base/tools/psb_sspalloc.f90
index 10e37d58..15c3c538 100644
--- a/base/tools/psb_sspalloc.f90
+++ b/base/tools/psb_sspalloc.f90
@@ -102,7 +102,7 @@ subroutine psb_sspalloc(a, desc_a, info, nnz)
        & ':allocating size:',loc_row,loc_col,nnz_
   call a%free()
   !....allocate aspk, ia1, ia2.....
-  call a%csall(loc_row,loc_col,info,nz=nnz_)
+  call a%allocate(loc_row,loc_col,info,nz=nnz_)
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='sp_all')
diff --git a/base/tools/psb_z_remap.F90 b/base/tools/psb_z_remap.F90
new file mode 100644
index 00000000..f9c5c39c
--- /dev/null
+++ b/base/tools/psb_z_remap.F90
@@ -0,0 +1,255 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+!
+! Subroutine: psb_z_remap
+! 
+! Arguments: 
+!    desc_in  - type(psb_desc_type).       The communication descriptor to be cloned.
+!    desc_out - type(psb_desc_type).       The output communication descriptor.
+!    info     - integer.                       Return code.
+subroutine psb_z_remap(np_remap, desc_in, a_in, ipd, isrc, nrsrc, naggr, &
+     & desc_out, a_out, info)
+
+  use psb_base_mod, psb_protect_name => psb_z_remap
+
+  implicit none
+  !....parameters...
+  integer(psb_ipk_), intent(in)        :: np_remap
+  type(psb_desc_type), intent(inout)   :: desc_in
+  type(psb_zspmat_type), intent(inout) :: a_in
+  type(psb_zspmat_type), intent(out)   :: a_out
+  type(psb_desc_type), intent(out)     :: desc_out
+  integer(psb_ipk_), intent(out)       :: ipd 
+  integer(psb_ipk_), allocatable, intent(out) :: isrc(:), nrsrc(:), naggr(:)
+  integer(psb_ipk_), intent(out)       :: info
+
+
+  ! locals
+  type(psb_ctxt_type) :: ctxt, newctxt
+  integer(psb_ipk_) :: np, me, err_act
+  integer(psb_ipk_) :: rnp, rme
+  integer(psb_ipk_) :: ipdest, id1, id2, imd, i, nsrc
+  integer(psb_ipk_), allocatable :: newnl(:), nzsrc(:), ids(:) 
+  type(psb_lz_coo_sparse_mat) :: acoo_snd, acoo_rcv
+  integer(psb_ipk_) :: debug_level, debug_unit  
+  character(len=20)   :: name
+
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+
+  if (psb_get_errstatus() /= 0) return 
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  name = 'psb_cdcpy'
+
+  ctxt = desc_in%get_context()
+
+  ! check on blacs grid 
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_ext_) &
+       & write(debug_unit,*) me,' ',trim(name),': Entered'
+  if (np == -1) then
+    info = psb_err_context_error_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+!!$  write(0,*) ' Remapping from ',np,' onto ', np_remap
+
+  if (desc_in%get_fmt() == 'BLOCK') then
+    !
+    ! Should we spread the processes in the new context,
+    ! or should we keep them close? 
+    ! 
+    if (.true.) then 
+      allocate(ids(0:np_remap-1))
+      if (np_remap <= np/2) then
+        ids(0) = 0
+        do ipdest=1,np_remap -1
+          ids(ipdest) = ids(ipdest-1) + np/np_remap
+        end do
+!!$        write(0,*) ' IDS ',ids(:) 
+      else
+        do ipdest = 0, np_remap-1
+          ids(ipdest) = ipdest
+        end do
+      end if
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt,ids=ids)
+    else
+      call psb_init(newctxt,np=np_remap,basectxt=ctxt)
+    end if
+
+    call psb_info(newctxt,rme,rnp)
+!!$    write(0,*) 'Old context: ',me,np,' New context: ',rme,rnp
+    call psb_bcast(ctxt,rnp)
+    allocate(newnl(rnp),naggr(np),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
+    if (rnp >= np) then 
+      write(0,*) ' No remapping on larger proc count now'
+      info = psb_err_internal_error_
+      call psb_errpush(info,name)
+      goto 9999
+    end if
+    naggr = 0
+
+    !
+    ! Compute destination for my data.
+    ! Simplistic reallocation: divide the NP processes
+    ! across the new ones (as balanced as possible),
+    ! then send all data from old to new process
+    !
+    id2 = np/rnp
+    id1 = id2+1
+    imd = mod(np,rnp)
+    if (me < (imd*id1)) then
+      ipdest = (me/id1)
+    else
+      ipdest = ( ((me-imd*id1)/id2) +  imd)
+    end if
+    if (allocated(ids)) then
+      ipd = ids(ipdest)
+    else
+      ipd = ipdest
+    end if
+!!$    write(0,*) ' Sending my data from ',me,' to ', &
+!!$         & ipd, 'out of ',rnp,rnp-1
+
+    !
+    ! Compute local rows for all new
+    ! processes; will have a BLOCK distribution
+    ! 
+    newnl = 0
+    newnl(ipdest+1) = desc_in%get_local_rows()    
+    call psb_sum(ctxt,newnl)
+
+    if (rme>=0) then
+      ! 
+      if (rme < imd) then
+        isrc = [ (i, i=rme*id1,min(rme*id1+id1-1,np-1)) ]
+      else
+        isrc = [ (i, i=  imd*id1+((rme-imd))*id2,&
+             & min(imd*id1+(rme-imd)*id2+id2-1,np-1) ) ]            
+      end if
+!!$      write(0,*) me,rme,imd,' ISRC: ',isrc(:)
+      nsrc = size(isrc)
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',desc_out%get_local_rows(),desc_out%get_global_rows()
+    else 
+!!$      write(0,*) me,rme,'In ',desc_in%get_local_rows(),desc_in%get_global_rows(),&
+!!$           & ' out ',0,0
+    end if
+  else
+    write(0,*) 'Right now only BLOCK on input '
+    info = psb_err_invalid_cd_state_
+    call psb_errpush(info,name)
+    goto 9999
+  endif
+
+
+  !
+  ! Collect matrices on their destinations
+  !
+  block
+    integer(psb_ipk_) :: nzsnd, nzrcv, ip
+    integer(psb_ipk_) :: nrl, ncl, nzl, nzp
+    call a_in%cp_to(acoo_snd)
+    nzsnd = acoo_snd%get_nzeros()
+    call psb_snd(ctxt,nzsnd,ipd)
+    call psb_snd(ctxt,desc_in%get_local_rows(),ipd)
+    ! Convert to global numbering
+    call psb_loc_to_glob(acoo_snd%ia(1:nzsnd),desc_in,info)
+    call psb_loc_to_glob(acoo_snd%ja(1:nzsnd),desc_in,info)
+
+    call psb_snd(ctxt,acoo_snd%ia(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%ja(1:nzsnd),ipd)
+    call psb_snd(ctxt,acoo_snd%val(1:nzsnd),ipd)
+
+    if (rme>=0) then
+      ! prepare to receive
+      nzsrc = isrc
+      nrsrc = isrc
+      nzl = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,nzsrc(ip),isrc(ip))
+        call psb_rcv(ctxt,nrsrc(ip),isrc(ip))
+        nzl = nzl + nzsrc(ip)
+      end do
+!!$      write(0,*) rme,' Check on NR:',newnl(rme+1),sum(nrsrc)
+      call acoo_rcv%allocate(newnl(rme+1),newnl(rme+1),nzl)
+      nrl = acoo_rcv%get_nrows()
+      ncl = acoo_rcv%get_ncols()
+      nzp = 0
+      do ip=1, nsrc
+        call psb_rcv(ctxt,acoo_rcv%ia(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%ja(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        call psb_rcv(ctxt,acoo_rcv%val(nzp+1:nzp+nzsrc(ip)),isrc(ip))
+        nzp = nzp + nzsrc(ip)
+      end do
+      call acoo_rcv%set_nzeros(nzp)
+!!$      write(0,*) rme,' Collected: ',&
+!!$           & acoo_rcv%get_nrows(),acoo_rcv%get_ncols(),acoo_rcv%get_nzeros()
+
+      !
+      !  New descriptor
+      !    
+      call psb_cdall(newctxt,desc_out,info,nl=newnl(rme+1))
+      ! Insert
+      call psb_spall(a_out,desc_out,info)
+      call psb_spins(nzp,acoo_rcv%ia(1:nzp),acoo_rcv%ja(1:nzp),&
+           & acoo_rcv%val(1:nzp),a_out,desc_out,info)      
+      ! Assemble
+      call psb_cdasb(desc_out,info)
+      call psb_spasb(a_out,desc_out,info)
+
+!!$      write(0,*) rme,' Regenerated: ',&
+!!$           & desc_out%get_local_rows(), desc_out%get_local_cols(),&
+!!$           & a_out%get_nrows(),a_out%get_ncols(),a_out%get_nzeros()
+      naggr(me+1) = desc_out%get_local_rows()
+    else
+      naggr(me+1) = 0
+    end if
+    call psb_sum(ctxt,naggr)
+    
+  end block
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+  return  
+
+end subroutine psb_z_remap
diff --git a/base/tools/psb_zspalloc.f90 b/base/tools/psb_zspalloc.f90
index 823fee7a..16d48734 100644
--- a/base/tools/psb_zspalloc.f90
+++ b/base/tools/psb_zspalloc.f90
@@ -102,7 +102,7 @@ subroutine psb_zspalloc(a, desc_a, info, nnz)
        & ':allocating size:',loc_row,loc_col,nnz_
   call a%free()
   !....allocate aspk, ia1, ia2.....
-  call a%csall(loc_row,loc_col,info,nz=nnz_)
+  call a%allocate(loc_row,loc_col,info,nz=nnz_)
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='sp_all')
diff --git a/config/pac.m4 b/config/pac.m4
index 46a56b68..ecf8e475 100644
--- a/config/pac.m4
+++ b/config/pac.m4
@@ -363,6 +363,45 @@ fi
 ]
 )
 
+dnl @synopsis PAC_ARG_OPENMP
+dnl
+dnl Test for --enable-openmp
+dnl 
+dnl 
+dnl
+dnl Example use:
+dnl
+dnl
+dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
+dnl
+AC_DEFUN([PAC_ARG_OPENMP],
+[AC_MSG_CHECKING([whether we want openmp ])
+AC_ARG_ENABLE(openmp,
+AC_HELP_STRING([--enable-openmp], 
+[Specify whether to enable openmp. ]),
+[
+pac_cv_openmp="yes";
+]
+dnl ,
+dnl [pac_cv_openmp="no";]
+	     )
+if test x"$pac_cv_openmp" == x"yes" ; then
+   AC_MSG_RESULT([yes.])
+   AC_LANG_PUSH([Fortran])
+   AC_OPENMP() 
+   pac_cv_openmp_fcopt="$OPENMP_FCFLAGS";
+   AC_LANG_POP()
+   AC_LANG_PUSH([C])
+   AC_OPENMP() 
+   pac_cv_openmp_ccopt="$OPENMP_CFLAGS";
+   AC_LANG_POP()
+else
+ pac_cv_openmp="no";
+ AC_MSG_RESULT([no.])
+fi
+]
+)
+
 dnl @synopsis PAC_ARG_LONG_INTEGERS
 dnl
 dnl Test for --enable-long-integers
diff --git a/configure b/configure
index e8688547..e70bbfb7 100755
--- a/configure
+++ b/configure
@@ -663,6 +663,8 @@ BASEMODNAME
 CDEFINES
 FDEFINES
 LAPACK_LIBS
+OPENMP_CFLAGS
+OPENMP_FCFLAGS
 EGREP
 GREP
 CPP
@@ -771,6 +773,7 @@ enable_dependency_tracking
 enable_silent_rules
 with_ipk
 with_lpk
+enable_openmp
 with_blas
 with_blasdir
 with_lapack
@@ -1420,6 +1423,8 @@ Optional Features:
                           speeds up one-time build
   --enable-silent-rules   less verbose build output (undo: "make V=1")
   --disable-silent-rules  verbose build output (undo: "make V=0")
+  --enable-openmp         Specify whether to enable openmp.
+  --disable-openmp        do not use OpenMP
 
 Optional Packages:
   --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@@ -6890,7 +6895,191 @@ FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES";
 FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES";
 CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES"
 
+FLINK="$MPIFC"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we want openmp " >&5
+$as_echo_n "checking whether we want openmp ... " >&6; }
+# Check whether --enable-openmp was given.
+if test "${enable_openmp+set}" = set; then :
+  enableval=$enable_openmp;
+pac_cv_openmp="yes";
 
+
+fi
+
+if test x"$pac_cv_openmp" == x"yes" ; then
+   { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes." >&5
+$as_echo "yes." >&6; }
+   ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+
+  OPENMP_FCFLAGS=
+  # Check whether --enable-openmp was given.
+if test "${enable_openmp+set}" = set; then :
+  enableval=$enable_openmp;
+fi
+
+  if test "$enable_openmp" != no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $FC option to support OpenMP" >&5
+$as_echo_n "checking for $FC option to support OpenMP... " >&6; }
+if ${ac_cv_prog_fc_openmp+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat > conftest.$ac_ext <<_ACEOF
+
+      program main
+      implicit none
+!$    integer tid
+      tid = 42
+      call omp_set_num_threads(2)
+      end
+
+_ACEOF
+if ac_fn_fc_try_link "$LINENO"; then :
+  ac_cv_prog_fc_openmp='none needed'
+else
+  ac_cv_prog_fc_openmp='unsupported'
+	  	  	  	  	  	  	                                	  	  	  	  	  	  for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \
+                           -Popenmp --openmp; do
+	    ac_save_FCFLAGS=$FCFLAGS
+	    FCFLAGS="$FCFLAGS $ac_option"
+	    cat > conftest.$ac_ext <<_ACEOF
+
+      program main
+      implicit none
+!$    integer tid
+      tid = 42
+      call omp_set_num_threads(2)
+      end
+
+_ACEOF
+if ac_fn_fc_try_link "$LINENO"; then :
+  ac_cv_prog_fc_openmp=$ac_option
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+	    FCFLAGS=$ac_save_FCFLAGS
+	    if test "$ac_cv_prog_fc_openmp" != unsupported; then
+	      break
+	    fi
+	  done
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_fc_openmp" >&5
+$as_echo "$ac_cv_prog_fc_openmp" >&6; }
+    case $ac_cv_prog_fc_openmp in #(
+      "none needed" | unsupported)
+	;; #(
+      *)
+	OPENMP_FCFLAGS=$ac_cv_prog_fc_openmp ;;
+    esac
+  fi
+
+
+   pac_cv_openmp_fcopt="$OPENMP_FCFLAGS";
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+  OPENMP_CFLAGS=
+  # Check whether --enable-openmp was given.
+if test "${enable_openmp+set}" = set; then :
+  enableval=$enable_openmp;
+fi
+
+  if test "$enable_openmp" != no; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to support OpenMP" >&5
+$as_echo_n "checking for $CC option to support OpenMP... " >&6; }
+if ${ac_cv_prog_c_openmp+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENMP
+ choke me
+#endif
+#include <omp.h>
+int main () { return omp_get_num_threads (); }
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_prog_c_openmp='none needed'
+else
+  ac_cv_prog_c_openmp='unsupported'
+	  	  	  	  	  	  	                                	  	  	  	  	  	  for ac_option in -fopenmp -xopenmp -openmp -mp -omp -qsmp=omp -homp \
+                           -Popenmp --openmp; do
+	    ac_save_CFLAGS=$CFLAGS
+	    CFLAGS="$CFLAGS $ac_option"
+	    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENMP
+ choke me
+#endif
+#include <omp.h>
+int main () { return omp_get_num_threads (); }
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_prog_c_openmp=$ac_option
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+	    CFLAGS=$ac_save_CFLAGS
+	    if test "$ac_cv_prog_c_openmp" != unsupported; then
+	      break
+	    fi
+	  done
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_c_openmp" >&5
+$as_echo "$ac_cv_prog_c_openmp" >&6; }
+    case $ac_cv_prog_c_openmp in #(
+      "none needed" | unsupported)
+	;; #(
+      *)
+	OPENMP_CFLAGS=$ac_cv_prog_c_openmp ;;
+    esac
+  fi
+
+
+   pac_cv_openmp_ccopt="$OPENMP_CFLAGS";
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+else
+ pac_cv_openmp="no";
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no." >&5
+$as_echo "no." >&6; }
+fi
+
+
+if test x"$pac_cv_openmp" == x"yes" ; then
+   FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES";
+   CDEFINES="-DOPENMP $CDEFINES";
+   FCOPT="$FCOPT $pac_cv_openmp_fcopt";
+   CCOPT="$CCOPT $pac_cv_openmp_ccopt";
+   FLINK="$FLINK $pac_cv_openmp_fcopt";
+fi
 #
 # Tests for support of various Fortran features; some of them are critical,
 # some optional
diff --git a/configure.ac b/configure.ac
index a2650489..cc354ba4 100755
--- a/configure.ac
+++ b/configure.ac
@@ -498,7 +498,15 @@ FDEFINES="$psblas_cv_define_prepend-DIPK${pac_cv_ipk_size} $FDEFINES";
 FDEFINES="$psblas_cv_define_prepend-DLPK${pac_cv_lpk_size} $FDEFINES";	
 CDEFINES="-DIPK${pac_cv_ipk_size} -DLPK${pac_cv_lpk_size} $CDEFINES"
 
-
+FLINK="$MPIFC"
+PAC_ARG_OPENMP()
+if test x"$pac_cv_openmp" == x"yes" ; then
+   FDEFINES="$psblas_cv_define_prepend-DOPENMP $FDEFINES";
+   CDEFINES="-DOPENMP $CDEFINES";	
+   FCOPT="$FCOPT $pac_cv_openmp_fcopt";
+   CCOPT="$CCOPT $pac_cv_openmp_ccopt";
+   FLINK="$FLINK $pac_cv_openmp_fcopt";
+fi
 #
 # Tests for support of various Fortran features; some of them are critical,
 # some optional
diff --git a/docs/src/datastruct.tex b/docs/src/datastruct.tex
index 613b54ee..16e02f2f 100644
--- a/docs/src/datastruct.tex
+++ b/docs/src/datastruct.tex
@@ -64,7 +64,7 @@ First we describe the \verb|psb_indx_map| type. This is a data
 structure that keeps track of a certain number of basic issues such
 as:
 \begin{itemize}
-\item The value of the communication/MPI context;
+\item The value of the communication context;
 \item The number of indices in the index space, i.e. global number of
   rows and columns of a sparse matrix;
 \item The local set of indices, including:
@@ -309,7 +309,7 @@ Type: {\bf optional}; default: \verb|.true.|.\\
 \subsubsection{get\_context --- Get communication context}
 
 \begin{verbatim}
-ictxt = desc%get_context()
+ctxt = desc%get_context()
 \end{verbatim}
 
 \begin{description}
diff --git a/docs/src/penv.tex b/docs/src/penv.tex
index 9465c8b3..9b019187 100644
--- a/docs/src/penv.tex
+++ b/docs/src/penv.tex
@@ -7,7 +7,7 @@
   environment}
 
 \begin{verbatim}
-call psb_init(icontxt, np, basectxt, ids)
+call psb_init(ctxt, np, basectxt, ids)
 \end{verbatim}
 
 This subroutine initializes the PSBLAS parallel environment, defining
@@ -41,8 +41,8 @@ Default: use the indices $(0\dots np-1)$.
 
 \begin{description}
 \item[\bf On Return]
-\item[icontxt] the communication context identifying the virtual
-  parallel machine. Note that this is always a duplicate of
+\item[ctxt] the communication context identifying the virtual
+  parallel machine, type \verb|psb_ctxt_type|. Note that this is always a duplicate of
   \verb|basectxt|, so that library communications are completely
   separated from other communication operations.\\
 Scope: {\bf global}.\\
@@ -65,7 +65,7 @@ Specified as: an integer variable.
   environment}
 
 \begin{verbatim}
-call psb_info(icontxt, iam, np)
+call psb_info(ctxt, iam, np)
 \end{verbatim}
 
 This subroutine returns information about  the PSBLAS parallel environment, defining
@@ -73,7 +73,7 @@ a virtual parallel machine.
 \begin{description}
 \item[Type:] Asynchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -103,7 +103,7 @@ Specified as: an integer variable. \
 \item If the user has requested on \verb|psb_init| a number of
   processes less than the total available in the parallel execution
   environment, the remaining processes will have on return $iam=-1$;
-  the only call involving \verb|icontxt| that any such process may
+  the only call involving \verb|ctxt| that any such process may
   execute is  to \verb|psb_exit|. 
 \end{enumerate}
 
@@ -112,22 +112,22 @@ Specified as: an integer variable. \
   environment}
 
 \begin{verbatim}
-call psb_exit(icontxt)
-call psb_exit(icontxt,close)
+call psb_exit(ctxt)
+call psb_exit(ctxt,close)
 \end{verbatim}
 
 This subroutine exits from the  PSBLAS parallel virtual  machine.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
 Intent: {\bf in}.\\
 Specified as: an integer variable.
 \item[close] Whether to close all data structures related to the
-  virtual parallel machine, besides those associated with icontxt.\\
+  virtual parallel machine, besides those associated with ctxt.\\
 Scope: {\bf global}.\\
 Type: {\bf optional}.\\
 Intent: {\bf in}.\\
@@ -138,7 +138,7 @@ Specified as: a logical  variable, default value: true.
 \begin{enumerate}
 \item This routine may be called even if a previous call to
   \verb|psb_info| has returned with $iam=-1$; indeed, it it is the only
-  routine that may be called with argument \verb|icontxt| in this
+  routine that may be called with argument \verb|ctxt| in this
   situation.
 \item A call to this routine with \verb|close=.true.| implies a call
   to \verb|MPI_Finalize|, after which no parallel routine may be called.
@@ -154,14 +154,14 @@ Specified as: a logical  variable, default value: true.
 \clearpage\subsection{psb\_get\_mpi\_comm --- Get the MPI communicator}
 
 \begin{verbatim}
-icomm = psb_get_mpi_comm(icontxt)
+icomm = psb_get_mpi_comm(ctxt)
 \end{verbatim}
 
 This function returns the MPI communicator associated with a PSBLAS context
 \begin{description}
 \item[Type:] Asynchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -184,14 +184,14 @@ is deprecated.
 \clearpage\subsection{psb\_get\_mpi\_rank --- Get the MPI rank}
 
 \begin{verbatim}
-rank = psb_get_mpi_rank(icontxt, id)
+rank = psb_get_mpi_rank(ctxt, id)
 \end{verbatim}
 
 This function returns the MPI rank of the  PSBLAS process $id$
 \begin{description}
 \item[Type:] Asynchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -238,7 +238,7 @@ Returned  as: a  \verb|real(psb_dpk_)|  variable.
   environment}
 
 \begin{verbatim}
-call psb_barrier(icontxt)
+call psb_barrier(ctxt)
 \end{verbatim}
 
 This subroutine acts as an explicit synchronization point for  the  PSBLAS
@@ -246,7 +246,7 @@ parallel virtual  machine.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -258,14 +258,14 @@ Specified as: an integer variable.
 \clearpage\subsection{psb\_abort --- Abort a computation}
 
 \begin{verbatim}
-call psb_abort(icontxt)
+call psb_abort(ctxt)
 \end{verbatim}
 
 This subroutine aborts computation on the parallel virtual machine. 
 \begin{description}
 \item[Type:] Asynchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -280,7 +280,7 @@ Specified as: an integer variable.
 \clearpage\subsection{psb\_bcast --- Broadcast data}
 
 \begin{verbatim}
-call psb_bcast(icontxt, dat, root)
+call psb_bcast(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a broadcast operation based on the
@@ -288,7 +288,7 @@ underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -325,7 +325,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_sum --- Global sum}
 
 \begin{verbatim}
-call psb_sum(icontxt, dat, root)
+call psb_sum(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a sum reduction  operation based on the
@@ -333,7 +333,7 @@ underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -379,7 +379,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_max --- Global maximum}
 
 \begin{verbatim}
-call psb_max(icontxt, dat, root)
+call psb_max(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a maximum valuereduction
@@ -387,7 +387,7 @@ operation based on the underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -432,7 +432,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_min --- Global minimum}
 
 \begin{verbatim}
-call psb_min(icontxt, dat, root)
+call psb_min(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a minimum value reduction
@@ -440,7 +440,7 @@ operation based on the underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -485,7 +485,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_amx --- Global maximum absolute value}
 
 \begin{verbatim}
-call psb_amx(icontxt, dat, root)
+call psb_amx(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a maximum absolute value reduction
@@ -493,7 +493,7 @@ operation based on the underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -538,7 +538,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_amn --- Global minimum absolute value}
 
 \begin{verbatim}
-call psb_amn(icontxt, dat, root)
+call psb_amn(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a minimum absolute value reduction
@@ -546,7 +546,7 @@ operation based on the underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -591,7 +591,7 @@ Type, kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_nrm2 --- Global 2-norm reduction}
 
 \begin{verbatim}
-call psb_nrm2(icontxt, dat, root)
+call psb_nrm2(ctxt, dat, root)
 \end{verbatim}
 
 This subroutine implements a 2-norm value reduction 
@@ -599,7 +599,7 @@ operation based on the underlying communication library.
 \begin{description}
 \item[Type:] Synchronous.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -651,14 +651,14 @@ Kind, rank and size must agree on all processes.
 \clearpage\subsection{psb\_snd --- Send data}
 
 \begin{verbatim}
-call psb_snd(icontxt, dat, dst, m)
+call psb_snd(ctxt, dat, dst, m)
 \end{verbatim}
 
 This subroutine sends a packet of data to a destination.
 \begin{description}
 \item[Type:] Synchronous: see usage notes. 
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
@@ -702,14 +702,14 @@ same value on sending and receiving processes.
 \clearpage\subsection{psb\_rcv --- Receive data}
 
 \begin{verbatim}
-call psb_rcv(icontxt, dat, src, m)
+call psb_rcv(ctxt, dat, src, m)
 \end{verbatim}
 
 This subroutine receives a packet of data to a destination.
 \begin{description}
 \item[Type:] Synchronous: see usage notes.
 \item[\bf  On Entry ]
-\item[icontxt] the communication context identifying the virtual
+\item[ctxt] the communication context identifying the virtual
   parallel machine.\\
 Scope: {\bf global}.\\
 Type: {\bf required}.\\
diff --git a/docs/src/psbrout.tex b/docs/src/psbrout.tex
index fbe0d3da..37f84ec8 100644
--- a/docs/src/psbrout.tex
+++ b/docs/src/psbrout.tex
@@ -215,7 +215,7 @@ An integer value; 0 means no error has been detected.
     vres(1) = psb_gedot(x1,y1,desc_a,info,global=.false.)
     vres(2) = psb_gedot(x2,y2,desc_a,info,global=.false.)
     vres(3) = psb_gedot(x3,y3,desc_a,info,global=.false.)
-    call psb_sum(ictxt,vres(1:3))
+    call psb_sum(ctxt,vres(1:3))
   \end{lstlisting}
   In this way the global communication, which for small sizes is a
   latency-bound operation, is invoked only once.
@@ -391,7 +391,7 @@ An integer value; 0 means no error has been detected.
     vres(1) = psb_geamax(x1,desc_a,info,global=.false.)
     vres(2) = psb_geamax(x2,desc_a,info,global=.false.)
     vres(3) = psb_geamax(x3,desc_a,info,global=.false.)
-    call psb_amx(ictxt,vres(1:3))
+    call psb_amx(ctxt,vres(1:3))
   \end{lstlisting}
   In this way the global communication, which for small sizes is a
   latency-bound operation, is invoked only once.
@@ -544,7 +544,7 @@ An integer value; 0 means no error has been detected.
     vres(1) = psb_geasum(x1,desc_a,info,global=.false.)
     vres(2) = psb_geasum(x2,desc_a,info,global=.false.)
     vres(3) = psb_geasum(x3,desc_a,info,global=.false.)
-    call psb_sum(ictxt,vres(1:3))
+    call psb_sum(ctxt,vres(1:3))
   \end{lstlisting}
   In this way the global communication, which for small sizes is a
   latency-bound operation, is invoked only once.
@@ -714,7 +714,7 @@ An integer value; 0 means no error has been detected.
     vres(1) = psb_genrm2(x1,desc_a,info,global=.false.)
     vres(2) = psb_genrm2(x2,desc_a,info,global=.false.)
     vres(3) = psb_genrm2(x3,desc_a,info,global=.false.)
-    call psb_nrm2(ictxt,vres(1:3))
+    call psb_nrm2(ctxt,vres(1:3))
   \end{lstlisting}
   In this way the global communication, which for small sizes is a
   latency-bound operation, is invoked only once.
diff --git a/prec/Makefile b/prec/Makefile
index e3b727b7..ec5892fe 100644
--- a/prec/Makefile
+++ b/prec/Makefile
@@ -12,12 +12,20 @@ MODOBJS=psb_prec_const_mod.o\
 	psb_d_diagprec.o psb_d_nullprec.o psb_d_bjacprec.o psb_s_ilu_fact_mod.o \
 	psb_s_diagprec.o psb_s_nullprec.o psb_s_bjacprec.o psb_d_ilu_fact_mod.o \
 	psb_c_diagprec.o psb_c_nullprec.o psb_c_bjacprec.o psb_c_ilu_fact_mod.o \
-	psb_z_diagprec.o psb_z_nullprec.o psb_z_bjacprec.o psb_z_ilu_fact_mod.o 
-
+	psb_z_diagprec.o psb_z_nullprec.o psb_z_bjacprec.o psb_z_ilu_fact_mod.o \
+	psb_c_ainv_fact_mod.o psb_d_ainv_fact_mod.o psb_s_ainv_fact_mod.o psb_z_ainv_fact_mod.o \
+	psb_c_ainv_tools_mod.o psb_d_ainv_tools_mod.o psb_s_ainv_tools_mod.o psb_z_ainv_tools_mod.o \
+	psb_ainv_tools_mod.o \
+	psb_biconjg_mod.o psb_c_biconjg_mod.o psb_d_biconjg_mod.o psb_s_biconjg_mod.o \
+	psb_z_biconjg_mod.o \
+	psb_c_invt_fact_mod.o psb_d_invt_fact_mod.o psb_s_invt_fact_mod.o \
+	psb_z_invt_fact_mod.o\
+	psb_c_invk_fact_mod.o psb_d_invk_fact_mod.o psb_s_invk_fact_mod.o \
+	psb_z_invk_fact_mod.o
 
 LIBNAME=$(PRECLIBNAME)
 COBJS=
-FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR) 
+FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR)
 OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MODOBJS)
 
 lib:  $(OBJS) impld
@@ -43,15 +51,30 @@ psb_d_prec_mod.o:  psb_prec_type.o
 psb_c_prec_mod.o:  psb_prec_type.o
 psb_z_prec_mod.o:  psb_prec_type.o
 psb_prec_type.o: psb_s_prec_type.o psb_d_prec_type.o psb_c_prec_type.o psb_z_prec_type.o
-psb_prec_mod.o: psb_s_prec_mod.o psb_d_prec_mod.o psb_c_prec_mod.o psb_z_prec_mod.o 
-psb_s_bjacprec.o psb_s_diagprec.o psb_s_nullprec.o: psb_prec_mod.o psb_s_base_prec_mod.o 
-psb_d_bjacprec.o psb_d_diagprec.o psb_d_nullprec.o: psb_prec_mod.o psb_d_base_prec_mod.o 
-psb_c_bjacprec.o psb_c_diagprec.o psb_c_nullprec.o: psb_prec_mod.o psb_c_base_prec_mod.o 
+psb_prec_mod.o: psb_s_prec_mod.o psb_d_prec_mod.o psb_c_prec_mod.o psb_z_prec_mod.o
+psb_s_bjacprec.o psb_s_diagprec.o psb_s_nullprec.o: psb_prec_mod.o psb_s_base_prec_mod.o
+psb_d_bjacprec.o psb_d_diagprec.o psb_d_nullprec.o: psb_prec_mod.o psb_d_base_prec_mod.o
+psb_c_bjacprec.o psb_c_diagprec.o psb_c_nullprec.o: psb_prec_mod.o psb_c_base_prec_mod.o
 psb_z_bjacprec.o psb_z_diagprec.o psb_z_nullprec.o: psb_prec_mod.o psb_z_base_prec_mod.o
-psb_s_bjacprec.o: psb_s_ilu_fact_mod.o 
-psb_d_bjacprec.o: psb_d_ilu_fact_mod.o 
-psb_c_bjacprec.o: psb_c_ilu_fact_mod.o 
-psb_z_bjacprec.o: psb_z_ilu_fact_mod.o 
+psb_s_bjacprec.o: psb_s_ilu_fact_mod.o psb_s_ainv_fact_mod.o psb_s_invk_fact_mod.o psb_s_invt_fact_mod.o
+psb_d_bjacprec.o: psb_d_ilu_fact_mod.o psb_d_ainv_fact_mod.o psb_d_invk_fact_mod.o psb_d_invt_fact_mod.o
+psb_c_bjacprec.o: psb_c_ilu_fact_mod.o psb_c_ainv_fact_mod.o psb_c_invk_fact_mod.o psb_c_invt_fact_mod.o
+psb_z_bjacprec.o: psb_z_ilu_fact_mod.o psb_z_ainv_fact_mod.o psb_z_invk_fact_mod.o psb_z_invt_fact_mod.o
+psb_d_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o
+psb_s_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o
+psb_c_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o
+psb_z_ainv_fact_mod.o: psb_prec_const_mod.o psb_ainv_tools_mod.o
+psb_ainv_tools_mod.o: psb_c_ainv_tools_mod.o psb_d_ainv_tools_mod.o psb_s_ainv_tools_mod.o psb_z_ainv_tools_mod.o
+psb_biconjg_mod.o: psb_prec_const_mod.o psb_c_biconjg_mod.o \
+	psb_d_biconjg_mod.o psb_s_biconjg_mod.o psb_z_biconjg_mod.o
+psb_c_invt_fact_mod.o: psb_prec_const_mod.o psb_c_ilu_fact_mod.o
+psb_d_invt_fact_mod.o: psb_prec_const_mod.o psb_d_ilu_fact_mod.o
+psb_s_invt_fact_mod.o: psb_prec_const_mod.o psb_s_ilu_fact_mod.o
+psb_z_invt_fact_mod.o: psb_prec_const_mod.o psb_z_ilu_fact_mod.o
+psb_c_invk_fact_mod.o: psb_prec_const_mod.o psb_c_ilu_fact_mod.o
+psb_d_invk_fact_mod.o: psb_prec_const_mod.o psb_d_ilu_fact_mod.o
+psb_s_invk_fact_mod.o: psb_prec_const_mod.o psb_s_ilu_fact_mod.o
+psb_z_invk_fact_mod.o: psb_prec_const_mod.o psb_z_ilu_fact_mod.o
 
 veryclean: clean
 	/bin/rm -f $(LIBNAME) *$(.mod)
@@ -60,4 +83,3 @@ iclean:
 	cd impl && $(MAKE) clean
 clean: iclean
 	/bin/rm -f $(OBJS) $(LOCAL_MODS)
-
diff --git a/prec/impl/Makefile b/prec/impl/Makefile
index 80e87a54..57d7c304 100644
--- a/prec/impl/Makefile
+++ b/prec/impl/Makefile
@@ -17,13 +17,32 @@ OBJS=psb_s_prec_type_impl.o psb_d_prec_type_impl.o \
      psb_cprecbld.o psb_cprecset.o psb_cprecinit.o \
      psb_z_diagprec_impl.o psb_z_bjacprec_impl.o psb_z_nullprec_impl.o \
      psb_zilu_fct.o psb_z_ilu0_fact.o psb_z_iluk_fact.o psb_z_ilut_fact.o \
-     psb_zprecbld.o psb_zprecset.o psb_zprecinit.o 
+     psb_zprecbld.o psb_zprecset.o psb_zprecinit.o \
+		 psb_c_sparsify.o psb_d_sparsify.o psb_s_sparsify.o psb_z_sparsify.o \
+		 psb_crwclip.o psb_drwclip.o psb_srwclip.o psb_zrwclip.o \
+		 psb_c_sp_drop.o psb_d_sp_drop.o psb_s_sp_drop.o psb_z_sp_drop.o \
+		 psb_dsparse_biconjg_llk_noth.o psb_dsparse_biconjg_llk.o \
+		 psb_dsparse_biconjg_mlk.o psb_dsparse_biconjg_s_ft_llk.o \
+		 psb_dsparse_biconjg_s_llk.o \
+		 psb_csparse_biconjg_llk_noth.o psb_csparse_biconjg_llk.o \
+		 psb_csparse_biconjg_mlk.o psb_csparse_biconjg_s_ft_llk.o \
+		 psb_csparse_biconjg_s_llk.o \
+		 psb_zsparse_biconjg_llk_noth.o psb_zsparse_biconjg_llk.o \
+		 psb_zsparse_biconjg_mlk.o psb_zsparse_biconjg_s_ft_llk.o \
+		 psb_zsparse_biconjg_s_llk.o \
+		 psb_ssparse_biconjg_llk_noth.o psb_ssparse_biconjg_llk.o \
+		 psb_ssparse_biconjg_mlk.o psb_ssparse_biconjg_s_ft_llk.o \
+		 psb_ssparse_biconjg_s_llk.o \
+		 psb_d_ainv_bld.o psb_c_ainv_bld.o psb_s_ainv_bld.o \
+		 psb_z_ainv_bld.o \
+		 psb_c_invt_fact.o psb_d_invt_fact.o psb_s_invt_fact.o psb_z_invt_fact.o\
+		 psb_c_invk_fact.o psb_d_invk_fact.o psb_s_invk_fact.o psb_z_invk_fact.o
 
 LIBNAME=$(PRECLIBNAME)
 COBJS=
-FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) 
+FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR)
 
-lib:  $(OBJS) 
+lib:  $(OBJS)
 	$(AR) $(HERE)/$(LIBNAME) $(OBJS)
 	$(RANLIB) $(HERE)/$(LIBNAME)
 
@@ -31,4 +50,3 @@ veryclean: clean
 
 clean:
 	/bin/rm -f $(OBJS) $(LOCAL_MODS)
-
diff --git a/prec/impl/psb_c_ainv_bld.f90 b/prec/impl/psb_c_ainv_bld.f90
new file mode 100644
index 00000000..5cf2a45e
--- /dev/null
+++ b/prec/impl/psb_c_ainv_bld.f90
@@ -0,0 +1,225 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+subroutine psb_c_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+
+  use psb_base_mod
+  use psb_prec_const_mod
+  use psb_c_biconjg_mod
+
+  implicit none
+
+  ! Arguments
+  type(psb_cspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)                 :: fillin,alg
+  real(psb_spk_), intent(in)                  :: thresh
+  type(psb_cspmat_type), intent(inout)        :: wmat, zmat
+  complex(psb_spk_), allocatable                  :: d(:)
+  Type(psb_desc_type), Intent(in)               :: desc
+  integer(psb_ipk_), intent(out)                :: info
+  type(psb_cspmat_type), intent(in), optional :: blck
+  integer(psb_ipk_), intent(in), optional       :: iscale
+  !
+  integer(psb_ipk_)             :: i, nztota, err_act, n_row, nrow_a
+  type(psb_c_coo_sparse_mat)  :: acoo
+  type(psb_c_csr_sparse_mat)  :: acsr
+  type(psb_cspmat_type)       :: atmp
+  real(psb_spk_), allocatable :: arws(:), acls(:)
+  complex(psb_spk_), allocatable  :: pq(:), ad(:)
+  integer(psb_ipk_)             :: debug_level, debug_unit
+  type(psb_ctxt_type)           :: ctxt
+  integer(psb_ipk_)             :: np,me
+  integer(psb_ipk_)             :: nzrmax, iscale_
+  real(psb_spk_)              :: sp_thresh
+  complex(psb_spk_)               :: weight
+  character(len=20)             :: name, ch_err
+
+
+  info = psb_success_
+  name = 'psb_cainv_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  iscale_ = psb_ilu_scale_none_
+  if (present(iscale)) iscale_ = iscale
+  weight = cone
+  !
+  ! Check the memory available to hold the W and Z factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = desc%get_local_rows()
+  allocate(pq(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  !
+  ! Ok, let's start first with Z (i.e. Upper)
+  !
+  call a%csclip(acoo,info,imax=n_row,jmax=n_row)
+  call acsr%mv_from_coo(acoo,info)
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+
+  case(psb_ilu_scale_maxval_)
+    weight = acsr%maxval()
+    weight = cone/weight
+    call acsr%scal(weight,info)
+
+  case(psb_ilu_scale_arcsum_)
+    allocate(arws(n_row),acls(n_row),ad(n_row),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    end if
+    call acsr%arwsum(arws)
+    call acsr%aclsum(acls)
+    ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row)))
+    ad(1:n_row) = cone/ad(1:n_row)
+    call acsr%scal(ad,info,side='L')
+    call acsr%scal(ad,info,side='R')
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  !
+  ! Here for the actual workhorses.
+  ! Only biconjg is surviving for now....
+  !
+    call psb_sparse_biconjg(alg,n_row,acsr,pq,&
+         &   zmat,wmat,nzrmax,sp_thresh,info)
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='sparse_biconjg')
+    goto 9999
+  end if
+  call atmp%mv_from(acsr)
+
+  !
+  ! Is this right???
+  !
+  do i=1, n_row
+    if (abs(pq(i)) < d_epstol) then
+      pq(i) = cone
+    else
+      pq(i) = cone/pq(i)
+    end if
+  end do
+
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+  case(psb_ilu_scale_maxval_)
+    pq(:) = pq(:)*weight
+
+  case(psb_ilu_scale_arcsum_)
+    call zmat%scal(ad,info,side='L')
+    call wmat%scal(ad,info,side='R')
+
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  call psb_move_alloc(pq,d,info)
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_ainv_bld
diff --git a/prec/impl/psb_c_bjacprec_impl.f90 b/prec/impl/psb_c_bjacprec_impl.f90
index dbd890c7..32baf385 100644
--- a/prec/impl/psb_c_bjacprec_impl.f90
+++ b/prec/impl/psb_c_bjacprec_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,13 +27,13 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 
 subroutine psb_c_bjac_dump(prec,info,prefix,head)
   use psb_base_mod
   use psb_c_bjacprec, psb_protect_name => psb_c_bjac_dump
-  implicit none 
+  implicit none
   class(psb_c_bjac_prec_type), intent(in) :: prec
   integer(psb_ipk_), intent(out)                    :: info
   character(len=*), intent(in), optional  :: prefix,head
@@ -43,13 +43,13 @@ subroutine psb_c_bjac_dump(prec,info,prefix,head)
   character(len=80)  :: prefix_
   character(len=120) :: fname ! len should be at least 20 more than
 
-  !  len of prefix_ 
+  !  len of prefix_
 
   info = 0
   ctxt = prec%get_ctxt()
   call psb_info(ctxt,iam,np)
 
-  if (present(prefix)) then 
+  if (present(prefix)) then
     prefix_ = trim(prefix(1:min(len(prefix),len(prefix_))))
   else
     prefix_ = "dump_fact_c"
@@ -74,7 +74,7 @@ end subroutine psb_c_bjac_dump
 subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_c_bjacprec, psb_protect_name => psb_c_bjac_apply_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_c_bjac_prec_type), intent(inout)  :: prec
   complex(psb_spk_),intent(in)         :: alpha,beta
@@ -118,12 +118,12 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (x%get_nrows() < n_row) then 
+  if (x%get_nrows() < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (y%get_nrows() < n_row) then 
+  if (y%get_nrows() < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -140,9 +140,9 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
@@ -152,19 +152,19 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     allocate(ww(n_col),aux(4*n_col),stat=info)
   endif
 
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
 
   do_alloc_wrk = .not.prec%is_allocated_wrk()
   if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v)
 
   associate (wv => prec%wrk(1), wv1 => prec%wrk(2))
-    
+
     select case(prec%iprcparm(psb_f_type_))
-    case(psb_f_ilu_n_) 
-      
+    case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
+
       select case(trans_)
       case('N')
         call psb_spsm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,&
@@ -172,31 +172,58 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
         if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
       case('T')
         call psb_spsm(cone,prec%av(psb_u_pr_),x,czero,wv,desc_data,info,&
              & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux)
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
       case('C')
-        
+
         call psb_spsm(cone,prec%av(psb_u_pr_),x,czero,wv,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
         call wv1%mlt(cone,prec%dv,wv,czero,info,conjgx=trans_)
-        
+
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
+      end select
+      if (info /= psb_success_) then
+        ch_err="psb_spsm"
+        goto 9999
+      end if
+
+    case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+      ! Application of approximate inverse preconditioner, just some spmm
+
+      select case(trans_)
+      case('N')
+        call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+        if (info == psb_success_) call wv1%mlt(cone,prec%dv,wv,czero,info)
+        if(info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,&
+             & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.)
+
+       case('T','C')
+         call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,wv,desc_data,info,&
+              & trans=trans_,work=aux,doswap=.false.)
+         if (info == psb_success_) call wv1%mlt(cone,prec%dv,wv,czero,info)
+         if (info == psb_success_) &
+              & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, &
+              & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.)
+
       end select
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         ch_err="psb_spsm"
         goto 9999
       end if
-      
+
     case default
       info = psb_err_internal_error_
       call psb_errpush(info,name,a_err='Invalid factorization')
@@ -204,12 +231,12 @@ subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     end select
 
   end associate
-  
+
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
   if (do_alloc_wrk) call prec%free_wrk(info)
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -231,7 +258,7 @@ end subroutine psb_c_bjac_apply_vect
 subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_c_bjacprec, psb_protect_name => psb_c_bjac_apply
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_c_bjac_prec_type), intent(inout)  :: prec
   complex(psb_spk_),intent(in)         :: alpha,beta
@@ -244,6 +271,7 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   ! Local variables
   integer(psb_ipk_) :: n_row,n_col
   complex(psb_spk_), pointer :: ww(:), aux(:)
+  type(psb_d_vect_type) :: tx,ty
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
   integer(psb_ipk_) :: err_act, ierr(5)
@@ -273,12 +301,12 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (size(x) < n_row) then 
+  if (size(x) < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (size(y) < n_row) then 
+  if (size(y) < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -295,29 +323,29 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-        goto 9999      
+        goto 9999
       end if
 
     endif
   else
     allocate(ww(n_col),aux(4*n_col),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
   endif
 
 
   select case(prec%iprcparm(psb_f_type_))
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
 
     select case(trans_)
     case('N')
@@ -344,11 +372,42 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
            & trans=trans_,scale='U',choice=psb_none_,work=aux)
 
     end select
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       ch_err="psb_spsm"
       goto 9999
     end if
 
+  case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+    ! Application of approximate inverse preconditioner, just some spmm
+
+    select case(trans_)
+
+      case('N')
+        call psb_spmm(cone,prec%av(psb_l_pr_),x,czero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('T')
+        call psb_spmm(cone,prec%av(psb_u_pr_),x,czero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('C')
+        call psb_spmm(cone,prec%av(psb_u_pr_),x,czero,ww,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * conjg(prec%dv%v%v(1:n_row))
+        if (info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+    end select
+
 
   case default
     info = psb_err_internal_error_
@@ -358,8 +417,8 @@ subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
 
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -397,11 +456,26 @@ subroutine psb_c_bjac_precinit(prec,info)
     call psb_Errpush(info,name)
     goto 9999
   end if
+  call psb_realloc(psb_rfpsz,prec%rprcparm,info)
+  if (info /= psb_success_) then
+    info = psb_err_alloc_dealloc_
+    call psb_Errpush(info,name)
+    goto 9999
+  end if
 
   prec%iprcparm(:)                = 0
   prec%iprcparm(psb_p_type_)      = psb_bjac_
   prec%iprcparm(psb_f_type_)      = psb_f_ilu_n_
   prec%iprcparm(psb_ilu_fill_in_) = 0
+  prec%iprcparm(psb_ilu_ialg_)    = psb_ilu_n_
+  prec%iprcparm(psb_ilu_scale_)   = psb_ilu_scale_none_
+  prec%iprcparm(psb_inv_fillin_)  = 0
+  prec%iprcparm(psb_ainv_alg_)    = psb_ainv_llk_
+
+
+  prec%rprcparm(:)                = 0
+  prec%rprcparm(psb_fact_eps_)    = 1E-1_psb_spk_
+  prec%rprcparm(psb_inv_thresh_)  = 1E-1_psb_spk_
 
 
   call psb_erractionrestore(err_act)
@@ -416,7 +490,8 @@ end subroutine psb_c_bjac_precinit
 subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
 
   use psb_base_mod
-  use psb_prec_mod, only : psb_ilu_fct
+  use psb_c_ilu_fact_mod
+  use psb_c_ainv_fact_mod
   use psb_c_bjacprec, psb_protect_name => psb_c_bjac_precbld
   Implicit None
 
@@ -428,12 +503,13 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   class(psb_c_base_vect_type), intent(in), optional  :: vmold
   class(psb_i_base_vect_type), intent(in), optional  :: imold
 
-  !     .. Local Scalars ..                                                       
-  integer(psb_ipk_) ::    i, m
+  !     .. Local Scalars ..
+  integer(psb_ipk_) ::    i, m, ialg, fill_in, iscale, inv_fill, iinvalg
   integer(psb_ipk_) ::    ierr(5)
   character ::        trans, unitd
-  type(psb_c_csr_sparse_mat), allocatable  :: lf, uf
+  type(psb_cspmat_type), allocatable :: lf, uf
   complex(psb_spk_), allocatable :: dd(:)
+  real(psb_spk_) :: fact_eps, inv_thresh
   integer(psb_ipk_) :: nztota,  err_act, n_row, nrow_a,n_col, nhalo
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -462,19 +538,114 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   trans = 'N'
   unitd = 'U'
 
+  ! We check if all the information contained in the preconditioner structure
+  ! are meaningful, otherwise we give an error and get out of the build
+  ! procedure
+  ialg    = prec%iprcparm(psb_ilu_ialg_)  ! Integer for ILU type algorithm
+  iinvalg = prec%iprcparm(psb_ainv_alg_)  ! Integer for AINV type algorithm
+  iscale = prec%iprcparm(psb_ilu_scale_)  ! Integer for scaling of matrix
+  fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization
+  inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse
+  fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization
+  inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization
+
+  ! Check if the type of scaling is known, pay attention that not all the
+  ! scalings make sense for all the factorization, if something that does not
+  ! make sense is required the factorization routine will fail in an
+  ! unnrecoverable way.
+  if ((iscale == psb_ilu_scale_none_).or.&
+      (iscale == psb_ilu_scale_maxval_).or.&
+      (iscale == psb_ilu_scale_diag_).or.&
+      (iscale == psb_ilu_scale_arwsum_).or.&
+      (iscale == psb_ilu_scale_aclsum_).or.&
+      (iscale == psb_ilu_scale_arcsum_)) then
+  ! Do nothing: admissible request
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_ilu_scale_'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+  select case (prec%iprcparm(psb_f_type_))
+    case (psb_f_ainv_)
+      ! Check if the variant for the AINV is known to the library
+      select case (iinvalg)
+      case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,&
+        & psb_ainv_mlk_)
+        ! Do nothing these are okay
+      case default
+        info=psb_err_from_subroutine_
+        ch_err='psb_ainv_alg_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end select ! AINV Variant
+      ! Check if the drop-tolerance make sense
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_ilu_t_)
+      if (fact_eps > 1) then
+        ! Check if the drop-tolerance make sense
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_invt_)
+      ! Check both tolerances
+      if (fact_eps > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case default
+  end select
+
+
+  ! Checks relative to the fill-in parameters
+  if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then
+    if(fill_in < 0) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilu_fill_in_'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    else if (fill_in == 0) then
+      ! If the requested level of fill is equal to zero, we default to the
+      ! specialized ILU(0) routine
+      prec%iprcparm(psb_f_type_) = psb_f_ilu_n_
+    end if
+  end if
+  ! If no limit on the fill_in is required we allow every fill, this is needed
+  ! since this quantity is used to allocate the auxiliary vectors for the
+  ! factorization
+  if (inv_fill <= 0) inv_fill = m
+
+  ! Select on the type of factorization to be used
   select case(prec%iprcparm(psb_f_type_))
 
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_)
+  ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix
+  ! is equal to the one of the input matrix
 
-    if (allocated(prec%av)) then 
-      if (size(prec%av) < psb_bp_ilu_avsz) then 
-        do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
           call prec%av(i)%free()
         enddo
         deallocate(prec%av,stat=info)
       endif
     end if
-    if (.not.allocated(prec%av)) then 
+    if (.not.allocated(prec%av)) then
       allocate(prec%av(psb_max_avsz),stat=info)
       if (info /= psb_success_) then
         call psb_errpush(psb_err_alloc_dealloc_,name)
@@ -501,27 +672,27 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     end if
 
     allocate(dd(n_row),stat=info)
-    if (info == psb_success_) then 
+    if (info == psb_success_) then
       allocate(prec%dv, stat=info)
-      if (info == 0) then 
-        if (present(vmold)) then 
-          allocate(prec%dv%v,mold=vmold,stat=info) 
-        else       
-          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info) 
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
         end if
       end if
     end if
 
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     endif
-    ! This is where we have no renumbering, thus no need 
-    call psb_ilu_fct(a,lf,uf,dd,info)
+    ! This is where we have no renumbering, thus no need
+    call psb_ilu0_fact(ialg,a,lf,uf,dd,info)
 
     if(info == psb_success_) then
-      call prec%av(psb_l_pr_)%mv_from(lf)
-      call prec%av(psb_u_pr_)%mv_from(uf)
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
       call prec%av(psb_l_pr_)%set_asb()
       call prec%av(psb_u_pr_)%set_asb()
       call prec%av(psb_l_pr_)%trim()
@@ -530,12 +701,375 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       ! call move_alloc(dd,prec%d)
     else
       info=psb_err_from_subroutine_
-      ch_err='psb_ilu_fct'
+      ch_err='psb_ilu0_fact'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
 
-  case(psb_f_none_) 
+  case(psb_f_ilu_k_)
+  ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on
+  ! the type of the variant of the algorithm the may be forgotten or added to
+  ! the diagonal (MILU)
+
+  if (allocated(prec%av)) then
+    if (size(prec%av) < psb_bp_ilu_avsz) then
+      do i=1,size(prec%av)
+        call prec%av(i)%free()
+      enddo
+      deallocate(prec%av,stat=info)
+    endif
+  end if
+  if (.not.allocated(prec%av)) then
+    allocate(prec%av(psb_max_avsz),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_alloc_dealloc_,name)
+      goto 9999
+    end if
+  endif
+
+  nrow_a = desc_a%get_local_rows()
+  nztota = a%get_nzeros()
+
+  n_col  = desc_a%get_local_cols()
+  nhalo  = n_col-nrow_a
+  n_row  = nrow_a
+
+  allocate(lf,uf,stat=info)
+  if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+  if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+  if(info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  allocate(dd(n_row),stat=info)
+  if (info == psb_success_) then
+    allocate(prec%dv, stat=info)
+    if (info == 0) then
+      if (present(vmold)) then
+        allocate(prec%dv%v,mold=vmold,stat=info)
+      else
+        allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
+      end if
+    end if
+  end if
+
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  endif
+  ! This is where we have no renumbering, thus no need
+  call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info)
+
+  if(info == psb_success_) then
+    call prec%av(psb_l_pr_)%mv_from(lf%a)
+    call prec%av(psb_u_pr_)%mv_from(uf%a)
+    call prec%av(psb_l_pr_)%set_asb()
+    call prec%av(psb_u_pr_)%set_asb()
+    call prec%av(psb_l_pr_)%trim()
+    call prec%av(psb_u_pr_)%trim()
+    call prec%dv%bld(dd)
+    ! call move_alloc(dd,prec%d)
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_iluk_fact'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  case(psb_f_ilu_t_)
+  ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill
+
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! This is where we have no renumbering, thus no need
+    call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_ainv_)
+    ! Approximate Inverse Factorizations based on variants of the incomplete
+    ! biconjugation algorithms
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invk_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(n) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invt_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(eps) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_c_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,&
+      & desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_none_)
     info=psb_err_from_subroutine_
     ch_err='Inconsistent prec  psb_f_none_'
     call psb_errpush(info,name,a_err=ch_err)
@@ -548,7 +1082,7 @@ subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     goto 9999
   end select
 
-  if (present(amold)) then 
+  if (present(amold)) then
     call prec%av(psb_l_pr_)%cscnv(info,mold=amold)
     call prec%av(psb_u_pr_)%cscnv(info,mold=amold)
   end if
@@ -567,8 +1101,8 @@ subroutine psb_c_bjac_precseti(prec,what,val,info)
   Implicit None
 
   class(psb_c_bjac_prec_type),intent(inout) :: prec
-  integer(psb_ipk_), intent(in)                      :: what 
-  integer(psb_ipk_), intent(in)                      :: val 
+  integer(psb_ipk_), intent(in)                      :: what
+  integer(psb_ipk_), intent(in)                      :: val
   integer(psb_ipk_), intent(out)                     :: info
   integer(psb_ipk_) :: err_act, nrow
   character(len=20)  :: name='c_bjac_precset'
@@ -576,34 +1110,33 @@ subroutine psb_c_bjac_precseti(prec,what,val,info)
   call psb_erractionsave(err_act)
 
   info = psb_success_
-  if (.not.allocated(prec%iprcparm)) then 
+  if (.not.allocated(prec%iprcparm)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
 
   select case(what)
-  case (psb_f_type_) 
-    if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_f_type_)
     prec%iprcparm(psb_f_type_)     = val
 
-  case (psb_ilu_fill_in_) 
-    if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.&
-         & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_ilu_fill_in_)
     prec%iprcparm(psb_ilu_fill_in_) = val
 
+  case (psb_ilu_ialg_)
+    prec%iprcparm(psb_ilu_ialg_) = val
+
+  case (psb_ilu_scale_)
+    prec%iprcparm(psb_ilu_scale_) = val
+
+  case (psb_ainv_alg_)
+    prec%iprcparm(psb_ainv_alg_) = val
+
+  case(psb_inv_fillin_)
+    prec%iprcparm(psb_inv_fillin_) = val
+
   case default
-    write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification'
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
 
   end select
 
@@ -613,3 +1146,45 @@ subroutine psb_c_bjac_precseti(prec,what,val,info)
 9999 call psb_error_handler(err_act)
   return
 end subroutine psb_c_bjac_precseti
+
+subroutine psb_c_bjac_precsetr(prec,what,val,info)
+
+  use psb_base_mod
+  use psb_c_bjacprec, psb_protect_name => psb_c_bjac_precsetr
+  Implicit None
+
+  class(psb_c_bjac_prec_type),intent(inout) :: prec
+  integer(psb_ipk_), intent(in)                      :: what
+  real(psb_spk_), intent(in)                       :: val
+  integer(psb_ipk_), intent(out)                     :: info
+  integer(psb_ipk_) :: err_act, nrow
+  character(len=20)  :: name='c_bjac_precset'
+
+  call psb_erractionsave(err_act)
+
+  info = psb_success_
+  if (.not.allocated(prec%rprcparm)) then
+    info = 1124
+    call psb_errpush(info,name,a_err="preconditioner")
+    goto 9999
+  end if
+
+  select case(what)
+
+  case (psb_fact_eps_)
+    prec%rprcparm(psb_fact_eps_) = val
+
+  case (psb_inv_thresh_)
+    prec%rprcparm(psb_inv_thresh_) = val
+
+  case default
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
+
+  end select
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_bjac_precsetr
diff --git a/prec/impl/psb_c_ilu0_fact.f90 b/prec/impl/psb_c_ilu0_fact.f90
index c4097dea..1a3e1046 100644
--- a/prec/impl/psb_c_ilu0_fact.f90
+++ b/prec/impl/psb_c_ilu0_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_cilu0_fact.f90
 !
 ! Subroutine: psb_cilu0_fact
@@ -75,7 +75,7 @@
 !
 !  This routine computes either the ILU(0) or the MILU(0) factorization of
 !  the diagonal blocks of a distributed matrix. These factorizations are used
-!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver, 
+!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a given level of a
 !  multilevel preconditioner.
 !
@@ -83,10 +83,10 @@
 !    Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition,
 !    SIAM, 2003, Chapter 10.
 !
-!  The local matrix is stored into a and blck, as specified in the description 
-!  of the arguments below. The storage format for both the L and U factors is CSR. 
-!  The diagonal of the U factor is stored separately (actually, the inverse of the 
-!  diagonal entries is stored; this is then managed in the solve stage associated 
+!  The local matrix is stored into a and blck, as specified in the description
+!  of the arguments below. The storage format for both the L and U factors is CSR.
+!  The diagonal of the U factor is stored separately (actually, the inverse of the
+!  diagonal entries is stored; this is then managed in the solve stage associated
 !  to the ILU(0)/MILU(0) factorization).
 !
 !  The routine copies and factors "on the fly" from a and blck into l (L factor),
@@ -94,7 +94,7 @@
 !
 !  This implementation of ILU(0)/MILU(0) is faster than the implementation in
 !  psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)).
-!  
+!
 !
 ! Arguments:
 !    ialg    -  integer, input.
@@ -121,7 +121,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                         
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_cspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -129,7 +129,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck is empty.
-!  
+!
 subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd)
 
   use psb_base_mod
@@ -157,30 +157,30 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(upd)) then 
-    upd_ = psb_toupper(upd) 
+  if (present(upd)) then
+    upd_ = psb_toupper(upd)
   else
     upd_ = 'F'
   end if
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -212,12 +212,12 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
     if(info.ne.0) then
@@ -226,7 +226,7 @@ subroutine psb_cilu0_fact(ialg,a,l,u,d,info,blck, upd)
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
-    deallocate(blck_) 
+    deallocate(blck_)
   endif
 
   call psb_erractionrestore(err_act)
@@ -243,9 +243,9 @@ contains
   ! Version:    complex
   ! Note: internal subroutine of psb_cilu0_fact.
   !
-  !  This routine computes either the ILU(0) or the MILU(0) factorization of the 
-  !  diagonal blocks of a distributed matrix. 
-  !  These factorizations are used to build the 'base preconditioner' 
+  !  This routine computes either the ILU(0) or the MILU(0) factorization of the
+  !  diagonal blocks of a distributed matrix.
+  !  These factorizations are used to build the 'base preconditioner'
   !  (block-Jacobi preconditioner/solver, Additive Schwarz
   !  preconditioner) corresponding to a given level of a multilevel preconditioner.
   !
@@ -257,7 +257,7 @@ contains
   !
   !  The routine copies and factors "on the fly" from the sparse matrix structures a
   !  and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U).
-  !  
+  !
   !
   ! Arguments:
   !    ialg    -  integer, input.
@@ -296,7 +296,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -   complex(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -305,18 +305,18 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output.
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output.
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_cilu0_factint(ialg,a,b,&
        & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info)
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)     :: ialg
@@ -332,7 +332,7 @@ contains
     complex(psb_spk_) :: dia,temp
     integer(psb_ipk_), parameter :: nrb=16
     type(psb_c_coo_sparse_mat) :: trw
-    integer(psb_ipk_)   :: int_err(5) 
+    integer(psb_ipk_)   :: int_err(5)
     character(len=20)   :: name, ch_err
 
     name='psb_cilu0_factint'
@@ -346,10 +346,10 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione,ialg,izero,izero,izero/))
       goto 9999
     end select
@@ -363,7 +363,7 @@ contains
     end if
     m = ma+mb
 
-    if (psb_toupper(upd) == 'F' ) then 
+    if (psb_toupper(upd) == 'F' ) then
       lirp(1) = 1
       uirp(1) = 1
       l1      = 0
@@ -379,14 +379,14 @@ contains
         if (i <= ma) then
           !
           ! Copy the i-th local row of the matrix, stored in a,
-          ! into lval/d(i)/uval 
+          ! into lval/d(i)/uval
           !
           call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
         else
           !
           ! Copy the i-th local row of the matrix, stored in b
-          ! (as (i-ma)-th row), into lval/d(i)/uval 
+          ! (as (i-ma)-th row), into lval/d(i)/uval
           !
           call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
@@ -437,7 +437,7 @@ contains
               !
               dia = dia - temp*uval(jj)
               cycle updateloop
-              !     
+              !
             else if (j > i) then
               !
               ! search u(i,*) (i-th row of U) for a matching index j
@@ -454,23 +454,23 @@ contains
                 end if
               enddo
             end if
-            !     
-            ! If we get here we missed the cycle updateloop, which means 
+            !
+            ! If we get here we missed the cycle updateloop, which means
             ! that this entry does not match; thus we accumulate on the
             ! diagonal for MILU(0).
             !
-            if (ialg == psb_milu_n_) then 
+            if (ialg == psb_milu_n_) then
               dia = dia - temp*uval(jj)
             end if
           enddo updateloop
         enddo
-        !     
+        !
         ! Check the pivot size
-        !     
+        !
         if (abs(dia) < s_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') abs(dia)
@@ -493,7 +493,7 @@ contains
     else
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
@@ -515,7 +515,7 @@ contains
   ! Version:    complex
   ! Note: internal subroutine of psb_cilu0_fact
   !
-  !  This routine copies a row of a sparse matrix A, stored in the psb_cspmat_type 
+  !  This routine copies a row of a sparse matrix A, stored in the psb_cspmat_type
   !  data structure a, into the arrays lval and uval and into the scalar variable
   !  dia, corresponding to the lower and upper triangles of A and to the diagonal
   !  entry of the row, respectively. The entries in lval and uval are stored
@@ -529,14 +529,14 @@ contains
   !
   !  The routine is used by psb_cilu0_factint in the computation of the ILU(0)/MILU(0)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !  TODO: modify the routine to allow copying into output L and U that are
   !  already filled with indices; this would allow computing an ILU(k) pattern,
-  !  then use the ILU(0) internal for subsequent calls with the same pattern. 
+  !  then use the ILU(0) internal for subsequent calls with the same pattern.
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from  the 
+  !               The local index of the row to be extracted from  the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -570,13 +570,13 @@ contains
   !               to the CSR storage format.
   !    uval   -  complex(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
-  !               upper triangle are copied. 
+  !               upper triangle are copied.
   !    ktrw    -  integer, input/output.
   !               The index identifying the last entry taken from the
   !               staging buffer trw. See below.
   !    trw     -  type(psb_cspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -608,10 +608,10 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    if (psb_toupper(upd) == 'F') then 
+    if (psb_toupper(upd) == 'F') then
 
-      select type(aa => a%a) 
-      type is (psb_c_csr_sparse_mat) 
+      select type(aa => a%a)
+      type is (psb_c_csr_sparse_mat)
 
         !
         ! Take a fast shortcut if the matrix is stored in CSR format
@@ -633,19 +633,19 @@ contains
           end if
         enddo
 
-      class default 
+      class default
 
         !
-        ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+        ! Otherwise use psb_sp_getblk, slower but able (in principle) of
         ! handling any format. In this case, a block of rows is extracted
         ! instead of a single row, for performance reasons, and these
         ! rows are copied one by one into lval, dia, uval, through
         ! successive calls to ilu_copyin.
         !
 
-        if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+        if ((mod(i,nrb) == 1).or.(nrb == 1)) then
           irb = min(m-i+1,nrb)
-          call aa%csget(i,i+irb-1,trw,info) 
+          call aa%csget(i,i+irb-1,trw,info)
           if(info /= psb_success_) then
             info=psb_err_from_subroutine_
             ch_err='csget'
@@ -656,7 +656,7 @@ contains
         end if
 
         nz = trw%get_nzeros()
-        do 
+        do
           if (ktrw > nz) exit
           if (trw%ia(ktrw) > i) exit
           k = trw%ja(ktrw)
@@ -680,7 +680,7 @@ contains
 
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
diff --git a/prec/impl/psb_c_iluk_fact.f90 b/prec/impl/psb_c_iluk_fact.f90
index 8748816d..c4ebc678 100644
--- a/prec/impl/psb_c_iluk_fact.f90
+++ b/prec/impl/psb_c_iluk_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_ciluk_fact.f90
 !
 ! Subroutine: psb_ciluk_fact
@@ -74,7 +74,7 @@
 ! Contains:   psb_ciluk_factint, iluk_copyin, iluk_fact, iluk_copyout.
 !
 !  This routine computes either the ILU(k) or the MILU(k) factorization of the
-!  diagonal blocks of a distributed matrix. These factorizations are used to 
+!  diagonal blocks of a distributed matrix. These factorizations are used to
 !  build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a certain level of a
 !  multilevel preconditioner.
@@ -88,7 +88,7 @@
 !  U factors is CSR. The diagonal of the U factor is stored separately (actually,
 !  the inverse of the diagonal entries is stored; this is then managed in the solve
 !  stage associated to the ILU(k)/MILU(k) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -118,7 +118,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_cspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -126,7 +126,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   use psb_base_mod
@@ -143,7 +143,7 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   complex(psb_spk_), intent(inout)    ::  d(:)
   !     Local Variables
   integer(psb_ipk_)   :: l1, l2, m, err_act
-  
+
   type(psb_cspmat_type), pointer  :: blck_
   type(psb_c_csr_sparse_mat)      :: ll, uu
   character(len=20)   :: name, ch_err
@@ -152,17 +152,17 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') 
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
@@ -170,7 +170,7 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -203,15 +203,15 @@ subroutine psb_ciluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -280,7 +280,7 @@ contains
   !               according to the CSR storage format.
   !    lia2    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in laspk, according to the CSR storage format. 
+  !               of the L factor in laspk, according to the CSR storage format.
   !    uval   -   complex(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -289,12 +289,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in laspk.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_ciluk_factint(fill_in,ialg,a,b,&
@@ -304,7 +304,7 @@ contains
 
     implicit none
 
-  ! Arguments 
+  ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in, ialg
     type(psb_cspmat_type),intent(in)              :: a,b
     integer(psb_ipk_),intent(inout)               :: l1,l2,info
@@ -330,14 +330,14 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name,&
            & i_err=(/itwo,ialg,izero,izero,izero/))
       goto 9999
     end select
-    if (fill_in < 0) then 
+    if (fill_in < 0) then
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name, &
            & i_err=(/ione,fill_in,izero,izero,izero/))
@@ -349,7 +349,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the iluk_copyin function 
+    ! Allocate a temporary buffer for the iluk_copyin function
     !
 
     call trw%allocate(izero,izero,ione)
@@ -361,7 +361,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -381,34 +381,34 @@ contains
     uplevs(:)  = m+1
     row(:)     = czero
     rowlevs(:) = -(m+1)
-    
+
     !
     ! Cycle over the matrix rows
     !
     do i = 1, m
-      
+
       !
       ! At each iteration of the loop we keep in a heap the column indices
       ! affected by the factorization. The heap is initialized and filled
       ! in the iluk_copyin routine, and updated during the elimination, in
       ! the iluk_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = czero
       if (i<=ma) then
         !
-        ! Copy into trw the i-th local row of the matrix, stored in a 
-        ! 
+        ! Copy into trw the i-th local row of the matrix, stored in a
+        !
         call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info)
       else
         !
         ! Copy into trw the i-th local row of the matrix, stored in b
-        ! (as (i-ma)-th row) 
-        ! 
+        ! (as (i-ma)-th row)
+        !
         call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info)
       endif
-      
+
       ! Do an elimination step on the current row. It turns out we only
       ! need to keep track of fill levels for the upper triangle, hence we
       ! do not have a lowlevs variable.
@@ -417,7 +417,7 @@ contains
            & d,uja,uirp,uval,uplevs,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info)
       if (info /= psb_success_) then
@@ -474,11 +474,11 @@ contains
   !
   !  This routine is used by psb_ciluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -510,7 +510,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_cspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -521,8 +521,8 @@ contains
     use psb_base_mod
 
     implicit none
-  
-  ! Arguments 
+
+  ! Arguments
     type(psb_cspmat_type), intent(in)         :: a
     type(psb_c_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)           :: i,m,jmin,jmax
@@ -542,17 +542,17 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    call heap%init(info) 
+    call heap%init(info)
 
-    select type (aa=> a%a) 
-    type is (psb_c_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_c_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
-      
+
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = aa%val(j)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -562,14 +562,14 @@ contains
     class default
 
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to iluk_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -581,11 +581,11 @@ contains
         ktrw=1
       end if
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -674,10 +674,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i, fill_in
     integer(psb_ipk_), intent(inout)              :: nidx,info
     integer(psb_ipk_), intent(inout)              :: rowlevs(:)
@@ -690,7 +690,7 @@ contains
     complex(psb_spk_)   :: rwk
 
     info = psb_success_
-    if (.not.allocated(idxs)) then 
+    if (.not.allocated(idxs)) then
       allocate(idxs(200),stat=info)
       if (info /= psb_success_) return
     endif
@@ -702,34 +702,34 @@ contains
     !
     do
       ! Beware: (iret < 0) means that the heap is empty, not an error.
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) return
 
-      ! 
+      !
       ! Just in case an index has been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       nidx = nidx + 1
-      if (nidx>size(idxs)) then 
+      if (nidx>size(idxs)) then
         call psb_realloc(nidx+psb_heap_resize,idxs,info)
         if (info /= psb_success_) return
       end if
       idxs(nidx) = k
-      
-      if ((row(k) /= czero).and.(rowlevs(k) <= fill_in).and.(k<i)) then 
+
+      if ((row(k) /= czero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
         !
         ! Note: since U is scaled while copying it out (see iluk_copyout),
         ! we can use rwk in the update below
-        ! 
+        !
         rwk    = row(k)
-        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)          
+        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)
         lrwk   = rowlevs(k)
-          
+
         do jj=uirp(k),uirp(k+1)-1
           j = uja(jj)
-          if (j<=k) then 
+          if (j<=k) then
             info = -i
             return
           endif
@@ -738,9 +738,9 @@ contains
           ! The fill levels are initialized to a negative value. If we find
           ! one, it means that it is an as yet untouched index, so we need
           ! to insert it; otherwise it is already on the heap, there is no
-          ! need to insert it more than once. 
+          ! need to insert it more than once.
           !
-          if (rowlevs(j)<0) then 
+          if (rowlevs(j)<0) then
             call heap%insert(j,info)
             if (info /= psb_success_) return
             rowlevs(j) = abs(rowlevs(j))
@@ -781,7 +781,7 @@ contains
   !
   !  This routine is used by psb_ciluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -822,14 +822,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_ciluk_factint), according to the CSR storage format.
   !    lval   -  complex(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  complex(psb_spk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_ciluk_factint), according
@@ -850,7 +850,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                  :: fill_in, ialg, i, m, nidx
@@ -880,17 +880,17 @@ contains
       if (j<i) then
         !
         ! Copy the lower part of the row
-        !  
-        if (rowlevs(j) <= fill_in) then 
-          l1     = l1 + 1 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l1     = l1 + 1
           if (size(lval) < l1) then
-            ! 
+            !
             ! Figure out a good reallocation size!
-            ! 
+            !
             isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-            call psb_realloc(isz,lval,info) 
-            if (info == psb_success_) call psb_realloc(isz,lja,info) 
-            if (info /= psb_success_) then 
+            call psb_realloc(isz,lval,info)
+            if (info == psb_success_) call psb_realloc(isz,lja,info)
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -914,26 +914,26 @@ contains
         !
         ! Copy the diagonal entry of the row and re-initialize
         ! row(j) and rowlevs(j)
-        !  
-        d(i)       = d(i) + row(i) 
+        !
+        d(i)       = d(i) + row(i)
         row(i)     = czero
         rowlevs(i) = -(m+1)
 
-      else if (j>i) then 
+      else if (j>i) then
         !
         ! Copy the upper part of the row
-        ! 
-        if (rowlevs(j) <= fill_in) then 
-          l2     = l2 + 1 
-          if (size(uval) < l2) then 
-            ! 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l2     = l2 + 1
+          if (size(uval) < l2) then
+            !
             ! Figure out a good reallocation size!
             !
             isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-            call psb_realloc(isz,uval,info) 
-            if (info == psb_success_) call psb_realloc(isz,uja,info) 
+            call psb_realloc(isz,uval,info)
+            if (info == psb_success_) call psb_realloc(isz,uja,info)
             if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1))
-            if (info /= psb_success_) then 
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -941,11 +941,11 @@ contains
           end if
           uja(l2)   = j
           uval(l2)  = row(j)
-          uplevs(l2) = rowlevs(j) 
+          uplevs(l2) = rowlevs(j)
         else if (ialg == psb_milu_n_) then
           !
           ! MILU(k): add discarded entries to the diagonal one
-          ! 
+          !
           d(i) = d(i) + row(j)
         end if
         !
@@ -963,13 +963,13 @@ contains
     lirp(i+1) = l1 + 1
     uirp(i+1) = l2 + 1
 
-    !     
+    !
     ! Check the pivot size
     !
     if (abs(d(i)) < s_epstol) then
       !
       ! Too small pivot: unstable factorization
-      !     
+      !
       info = psb_err_pivot_too_small_
       int_err(1) = i
       write(ch_err,'(g20.10)') d(i)
diff --git a/prec/impl/psb_c_ilut_fact.f90 b/prec/impl/psb_c_ilut_fact.f90
index 06b8b477..633899de 100644
--- a/prec/impl/psb_c_ilut_fact.f90
+++ b/prec/impl/psb_c_ilut_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_cilut_fact.f90
 !
 ! Subroutine: psb_cilut_fact
@@ -87,7 +87,7 @@
 !  CSR. The diagonal of the U factor is stored separately (actually, the
 !  inverse of the diagonal entries is stored; this is then managed in the
 !  solve stage associated to the ILU(k,t) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -114,7 +114,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_cspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -122,9 +122,9 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
-  
+
   use psb_base_mod
   use psb_c_ilu_fact_mod, psb_protect_name => psb_cilut_fact
 
@@ -141,7 +141,7 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   integer(psb_ipk_), intent(in), optional       :: iscale
   !     Local Variables
   integer(psb_ipk_)   ::  l1, l2, m, err_act, iscale_
-  
+
   type(psb_cspmat_type), pointer  :: blck_
   type(psb_c_csr_sparse_mat)       :: ll, uu
   real(psb_spk_)      :: scale
@@ -151,34 +151,34 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  if (fill_in < 0) then 
+  if (fill_in < 0) then
     info=psb_err_input_asize_invalid_i_
     call psb_errpush(info,name, &
          & i_err=(/ione,fill_in,izero,izero,izero/))
     goto 9999
   end if
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(iscale)) then 
+  if (present(iscale)) then
     iscale_ = iscale
   else
     iscale_ = psb_ilu_scale_none_
   end if
-  
-  select case(iscale_) 
+
+  select case(iscale_)
   case(psb_ilu_scale_none_)
     scale = sone
   case(psb_ilu_scale_maxval_)
@@ -189,10 +189,10 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
     call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/))
     goto 9999
   end select
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -225,15 +225,15 @@ subroutine psb_cilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -298,7 +298,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -  complex(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -307,12 +307,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_cilut_factint(fill_in,thres,a,b,&
@@ -320,7 +320,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in
@@ -355,7 +355,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the ilut_copyin function 
+    ! Allocate a temporary buffer for the ilut_copyin function
     !
     call trw%allocate(izero,izero,ione)
     if (info == psb_success_) call psb_ensure_size(m+1,lirp,info)
@@ -366,7 +366,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -396,10 +396,10 @@ contains
       ! in the ilut_copyin function, and updated during the elimination, in
       ! the ilut_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = czero
-      if (i<=ma) then 
+      if (i<=ma) then
         call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,&
              & row,heap,ktrw,trw,info)
       else
@@ -414,7 +414,7 @@ contains
            & d,uja,uirp,uval,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,&
            & nlw,nup,jmaxup,nrmi,row,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info)
@@ -429,7 +429,7 @@ contains
     !
     ! Adjust diagonal accounting for scale factor
     !
-    if (weight /= sone) then 
+    if (weight /= sone) then
       d(1:m) = d(1:m)*weight
     end if
 
@@ -469,7 +469,7 @@ contains
   !  - storing into a heap the column indices of the nonzero entries of the copied
   !    row;
   !  - computing the column index of the first entry with maximum absolute value
-  !    in the part of the row belonging to the upper triangle;            
+  !    in the part of the row belonging to the upper triangle;
   !  - computing the 2-norm of the row.
   !  The output array row is such that it contains a full row of A, i.e. it contains
   !  also the zero entries of the row. This is useful for the elimination step
@@ -482,11 +482,11 @@ contains
   !
   !  This routine is used by psb_cilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -533,7 +533,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_cspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -542,7 +542,7 @@ contains
   subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,&
        & nrmi,weight,row,heap,ktrw,trw,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_cspmat_type), intent(in)         :: a
     type(psb_c_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)               :: i, m,jmin,jmax,jd
@@ -584,29 +584,29 @@ contains
     dmaxup = szero
     nrmi   = szero
 
-    select type (aa=> a%a) 
-    type is (psb_c_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_c_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
 
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
-          row(k)     = aa%val(j)*weight 
+        if ((jmin<=k).and.(k<=jmax)) then
+          row(k)     = aa%val(j)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
           end if
         end if
       end do
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         info=psb_err_from_subroutine_
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
@@ -617,16 +617,16 @@ contains
 
 
     class default
-      
+
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to ilut_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -639,18 +639,18 @@ contains
 
       kin = ktrw
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
@@ -734,10 +734,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i
     integer(psb_ipk_), intent(inout)              :: nidx,info
     real(psb_spk_), intent(in)                     :: thres,nrmi
@@ -753,23 +753,23 @@ contains
     call psb_ensure_size(200*ione,idxs,info)
     if (info /= psb_success_) return
     nidx  = 0
-    lastk = -1 
+    lastk = -1
     !
     ! Do while there are indices to be processed
     !
     do
 
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) exit
 
-      ! 
+      !
       ! An index may have been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       lowert: if (k<i)  then
- 
+
         !
         ! Dropping rule based on the threshold: compare the absolute
         ! value of each updated entry of row with thres * 2-norm of row.
@@ -777,7 +777,7 @@ contains
         rwk    = row(k)
         row(k) = row(k) * d(k)
         if (abs(row(k)) < thres*nrmi) then
-          ! 
+          !
           ! Drop the entry.
           !
           row(k) = czero
@@ -786,11 +786,11 @@ contains
           !
           ! Note: since U is scaled while copying it out (see ilut_copyout),
           ! we can use rwk in the update below.
-          !           
+          !
           do jj=uirp(k),uirp(k+1)-1
             j = uja(jj)
-            if (j<=k) then 
-              info = -i 
+            if (j<=k) then
+              info = -i
               return
             endif
             !
@@ -799,7 +799,7 @@ contains
             !
             row(j)     = row(j) - rwk * uval(jj)
             if (abs(row(j)) < thres*nrmi) then
-              ! 
+              !
               ! Drop the entry.
               !
               row(j) = czero
@@ -818,7 +818,7 @@ contains
       ! If we get here it is an index we need to keep on copyout.
       !
       nidx       = nidx + 1
-      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)      
+      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
       if (info /= psb_success_) return
       idxs(nidx) = k
 
@@ -852,7 +852,7 @@ contains
   !
   !  This routine is used by psb_cilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -895,14 +895,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_cilut_factint), according to the CSR storage format.
   !    lval   -  complex(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  complex(psb_spk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_cilut_factint), according
@@ -921,7 +921,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                       :: fill_in,i,m,nidx,nlw,nup,jmaxup
@@ -950,22 +950,22 @@ contains
     end if
 
     !
-    ! Here we need to apply also the dropping rule base on the fill-in. 
+    ! Here we need to apply also the dropping rule base on the fill-in.
     ! We do it by putting into a heap the elements that are not dropped
-    ! by using the 2-norm rule, and then copying them out. 
+    ! by using the 2-norm rule, and then copying them out.
     !
     ! The heap goes down on the entry absolute value, so the first item
-    ! is the largest absolute value. 
+    ! is the largest absolute value.
     !
 
     call heap%init(info,dir=psb_asort_down_)
 
     if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info=psb_err_alloc_request_
       call psb_errpush(info,name,i_err=(/3*nidx,izero,izero,izero,izero/),&
            & a_err='complex(psb_spk_)')
-      goto 9999      
+      goto 9999
     end if
 
     !
@@ -976,7 +976,7 @@ contains
     idxp = 0
 
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       if (idxs(idxp) >= i) exit
@@ -987,8 +987,8 @@ contains
       !
       if (abs(witem) < thres*nrmi) cycle
 
-      nz       = nz + 1 
-      xw(nz)   = witem 
+      nz       = nz + 1
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1000,9 +1000,9 @@ contains
 
     !
     ! Now we have to take out the first nlw+fill_in entries
-    ! 
+    !
     if (nz <= nlw+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw, and it is already ordered
       !
     else
@@ -1014,7 +1014,7 @@ contains
           call psb_errpush(info,name,a_err='psb_heap_get_first')
           goto 9999
         end if
-      
+
         xw(k)   = witem
         xwid(k) = widx
       end do
@@ -1029,15 +1029,15 @@ contains
     ! Copy out the lower part of the row
     !
     do k=1,nz
-      l1     = l1 + 1 
+      l1     = l1 + 1
       if (size(lval) < l1) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-        call psb_realloc(isz,lval,info) 
-        if (info == psb_success_) call psb_realloc(isz,lja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,lval,info)
+        if (info == psb_success_) call psb_realloc(isz,lja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
@@ -1046,25 +1046,25 @@ contains
       lja(l1)   = xwid(k)
       lval(l1)  = xw(indx(k))
     end do
-    
+
     !
     ! Make sure idxp points to the diagonal entry
     !
-    if (idxp <= size(idxs)) then 
-      if (idxs(idxp) < i) then 
-        do 
+    if (idxp <= size(idxs)) then
+      if (idxs(idxp) < i) then
+        do
           idxp = idxp + 1
           if (idxp > nidx) exit
           if (idxs(idxp) >= i) exit
         end do
       end if
     end if
-    if (idxp > size(idxs)) then 
+    if (idxp > size(idxs)) then
 !!$      write(0,*) 'Warning: missing diagonal element in the row '
     else
-      if (idxs(idxp) > i) then 
+      if (idxs(idxp) > i) then
 !!$        write(0,*) 'Warning: missing diagonal element in the row '
-      else if (idxs(idxp) /= i) then 
+      else if (idxs(idxp) /= i) then
 !!$        write(0,*) 'Warning: impossible error: diagonal has vanished'
       else
         !
@@ -1076,7 +1076,7 @@ contains
         if (abs(d(i)) < s_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') d(i)
@@ -1092,7 +1092,7 @@ contains
     end if
 
     !
-    ! Now the upper part 
+    ! Now the upper part
     !
 
     call heap%init(info,dir=psb_asort_down_)
@@ -1104,15 +1104,15 @@ contains
 
     nz       = 0
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       widx      = idxs(idxp)
-      if (widx <= i) then 
+      if (widx <= i) then
 !!$        write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
         cycle
       end if
-      if (widx > m) then 
+      if (widx > m) then
 !!$        write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp)
         cycle
       end if
@@ -1120,12 +1120,12 @@ contains
       !
       ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
       !
-      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then 
-        cycle 
+      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then
+        cycle
       end if
 
       nz       = nz + 1
-      xw(nz)   = witem 
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1133,7 +1133,7 @@ contains
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
       end if
-      
+
     end do
 
     !
@@ -1141,7 +1141,7 @@ contains
     ! we include entry jmaxup.
     !
     if (nz <= nup+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw
       !
       fndmaxup=.true.
@@ -1155,13 +1155,13 @@ contains
         if (widx == jmaxup) fndmaxup=.true.
       end do
     end if
-    if ((i<jmaxup).and.(jmaxup<=m)) then 
-      if (.not.fndmaxup) then 
-        ! 
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
         ! Include entry jmaxup, if it is not already there.
-        ! Put it in the place of the smallest coefficient. 
+        ! Put it in the place of the smallest coefficient.
         !
-        xw(nz)   = row(jmaxup) 
+        xw(nz)   = row(jmaxup)
         xwid(nz) = jmaxup
       endif
     end if
@@ -1175,15 +1175,15 @@ contains
     ! Copy out the upper part of the row
     !
     do k=1,nz
-      l2     = l2 + 1 
+      l2     = l2 + 1
       if (size(uval) < l2) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-        call psb_realloc(isz,uval,info) 
-        if (info == psb_success_) call psb_realloc(isz,uja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,uval,info)
+        if (info == psb_success_) call psb_realloc(isz,uja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
diff --git a/prec/impl/psb_c_invk_fact.f90 b/prec/impl/psb_c_invk_fact.f90
new file mode 100644
index 00000000..5c047b18
--- /dev/null
+++ b/prec/impl/psb_c_invk_fact.f90
@@ -0,0 +1,495 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_c_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_c_invk_fact_mod, psb_protect_name =>  psb_c_invk_bld
+  use psb_c_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_cspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fill1, fill2
+  type(psb_cspmat_type), intent(inout)        :: lmat, umat
+  complex(psb_spk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_cspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_)      :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_cspmat_type)  :: atmp
+  complex(psb_spk_), allocatable :: pq(:), pd(:)
+  integer(psb_ipk_), allocatable :: uplevs(:)
+  integer(psb_ipk_)   :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  character(len=20)   :: name, ch_err
+
+
+  info = psb_success_
+  name='psb_cinvk_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+
+  call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+       !,uplevs=uplevs)
+  !call psb_ciluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+
+  !
+  ! Compute the aprox U^-1  and L^-1
+  !
+  call psb_sparse_invk(n_row,umat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invk_bld
+
+subroutine psb_csparse_invk(n,a,z,fill_in,info,inlevs)
+
+  use psb_base_mod
+  use psb_c_invk_fact_mod, psb_protect_name => psb_csparse_invk
+
+  integer(psb_ipk_), intent(in)           :: n
+  type(psb_cspmat_type), intent(in)       :: a
+  type(psb_cspmat_type), intent(inout)    :: z
+  integer(psb_ipk_), intent(in)           :: fill_in
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: inlevs(:)
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), iz(:), jz(:)
+  complex(psb_spk_), allocatable :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable        :: uplevs(:), rowlevs(:), idxs(:)
+  complex(psb_spk_), allocatable :: row(:)
+  type(psb_c_coo_sparse_mat)  :: trw
+  type(psb_c_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx
+  type(psb_i_heap)            :: heap
+
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_sp_invk'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  allocate(uplevs(acsr%get_nzeros()),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='Allocate')
+    goto 9999
+  end if
+  uplevs(:)  = 0
+  row(:)     = czero
+  rowlevs(:) = -(n+1)
+
+  call zcsr%allocate(n,n,n*fill_in)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  call psb_ensure_size(n+1, idxs,  info)
+
+
+  !
+  !
+  zcsr%irp(1)  = 1
+  nzz          = 0
+
+  l2 = 0
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,&
+         & sign=-sone,inlevs=inlevs)
+    row(i)     = cone
+    rowlevs(i) = 0
+
+    ! Update loop
+    call psb_invk_inv(fill_in,i,row,rowlevs,heap,&
+         & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info)
+
+    call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,&
+         & l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = cone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+  call zcsr%set_sorted()
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_csparse_invk
+
+subroutine psb_c_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs)
+
+  use psb_base_mod
+  use psb_c_invk_fact_mod, psb_protect_name => psb_c_invk_copyin
+
+  implicit none
+
+  ! Arguments
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+  integer(psb_ipk_), intent(inout)          :: ktrw,info
+  integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+  complex(psb_spk_), intent(inout)             :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_spk_), optional, intent(in)      :: sign
+  integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+
+  ! Local variables
+  integer(psb_ipk_)             :: k,j,irb,err_act, nz
+  integer(psb_ipk_), parameter  :: nrb=16
+  real(psb_spk_)                :: sign_
+  character(len=20), parameter  :: name='invk_copyin'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+
+  if (present(sign)) then
+    sign_ = sign
+  else
+    sign_ = sone
+  end if
+
+
+  !
+  ! Take a fast shortcut if the matrix is stored in CSR format
+  !
+  if (present(inlevs)) then
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = inlevs(j)
+        call heap%insert(k,info)
+      end if
+    end do
+  else
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = 0
+        call heap%insert(k,info)
+      end if
+    end do
+  end if
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invk_copyin
+
+
+subroutine psb_c_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+     &  l2,uia1,uia2,uaspk,info)
+
+  use psb_base_mod
+  use psb_c_invk_fact_mod, psb_protect_name => psb_c_invk_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+  complex(psb_spk_), allocatable, intent(inout)    :: uaspk(:)
+  complex(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_)              :: j,isz,err_act,int_err(5),idxp
+  character(len=20), parameter  :: name='psb_ciluk_factint'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  do idxp=1,nidx
+
+    j = idxs(idxp)
+
+    if (j>=i) then
+      !
+      ! Copy the upper part of the row
+      !
+      if (rowlevs(j) <= fill_in) then
+        l2     = l2 + 1
+        if (size(uaspk) < l2) then
+          !
+          ! Figure out a good reallocation size!
+          !
+          isz  = max(int(1.2*l2),l2+100)
+          call psb_realloc(isz,uaspk,info)
+          if (info == psb_success_) call psb_realloc(isz,uia1,info)
+          if (info /= psb_success_) then
+            info=psb_err_from_subroutine_
+            call psb_errpush(info,name,a_err='Allocate')
+            goto 9999
+          end if
+        end if
+        uia1(l2)   = j
+        uaspk(l2)  = row(j)
+      end if
+      !
+      ! Re-initialize row(j) and rowlevs(j)
+      !
+      row(j)     = czero
+      rowlevs(j) = -(m+1)
+    end if
+  end do
+
+  uia2(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invk_copyout
+
+subroutine psb_cinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,&
+     & nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_c_invk_fact_mod, psb_protect_name => psb_cinvk_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)               :: heap
+  integer(psb_ipk_), intent(in)                 :: i, fill_in
+  integer(psb_ipk_), intent(inout)              :: nidx,info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)                 :: ja(:),irp(:),uplevs(:)
+  complex(psb_spk_), intent(in)                    :: val(:)
+  complex(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret
+  real(psb_dpk_)    :: rwk
+
+
+  info = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+    ! Beware: (iret < 0) means that the heap is empty, not an error.
+    call heap%get_first(k,iret)
+    if (iret < 0) then
+!!$        write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk
+      return
+    end if
+
+    !
+    ! Just in case an index has been put on the heap more than once.
+    !
+    if (k == lastk) cycle
+
+    lastk = k
+    nidx = nidx + 1
+    if (nidx>size(idxs)) then
+      call psb_realloc(nidx+psb_heap_resize,idxs,info)
+      if (info /= psb_success_) return
+    end if
+    idxs(nidx) = k
+
+    if ((row(k) /= czero).and.(rowlevs(k) <= fill_in)) then
+      !
+      ! Note: since U is scaled while copying it out (see iluk_copyout),
+      ! we can use rwk in the update below
+      !
+      rwk    = row(k)
+      lrwk   = rowlevs(k)
+
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Insert the index into the heap for further processing.
+        ! The fill levels are initialized to a negative value. If we find
+        ! one, it means that it is an as yet untouched index, so we need
+        ! to insert it; otherwise it is already on the heap, there is no
+        ! need to insert it more than once.
+        !
+        if (rowlevs(j)<0) then
+          call heap%insert(j,info)
+          if (info /= psb_success_) return
+          rowlevs(j) = abs(rowlevs(j))
+        end if
+        !
+        ! Update row(j) and the corresponding fill level
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
+      end do
+
+    end if
+  end do
+
+end subroutine psb_cinvk_inv
diff --git a/prec/impl/psb_c_invt_fact.f90 b/prec/impl/psb_c_invt_fact.f90
new file mode 100644
index 00000000..73e242f6
--- /dev/null
+++ b/prec/impl/psb_c_invt_fact.f90
@@ -0,0 +1,739 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_c_invt_bld(a,fillin,invfill,thresh,invthresh,&
+     & lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_c_invt_fact_mod, psb_protect_name =>  psb_c_invt_bld
+  use psb_c_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_cspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fillin,invfill
+  real(psb_spk_), intent(in)                :: thresh
+  real(psb_spk_), intent(in)                  :: invthresh
+  type(psb_cspmat_type), intent(inout)        :: lmat, umat
+  complex(psb_spk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_cspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_cspmat_type)       :: atmp
+  complex(psb_spk_), allocatable :: pq(:), pd(:), w(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  real(psb_spk_)    :: sp_thresh
+  character(len=20) :: name, ch_err, fname
+
+
+  info = psb_success_
+  name='psb_cinvt_fact'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),w(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+  if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,&
+       & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  if (.false.) then
+!!$    if (debug_level >= psb_debug_inner_) then
+    write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx'
+    call lmat%print(fname,head="INVTLOW")
+    write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx'
+    call umat%print(fname,head="INVTUPP")
+  end if
+
+  !
+  ! Compute the approx U^-1  and L^-1
+  !
+  nzrmax    = invfill
+  call psb_csparse_invt(n_row,umat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_csparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invt_bld
+
+subroutine psb_csparse_invt(n,a,z,nzrmax,sp_thresh,info)
+
+  use psb_base_mod
+  use psb_c_invt_fact_mod, psb_protect_name => psb_csparse_invt
+
+  implicit none
+  integer(psb_ipk_), intent(in)        :: n
+  type(psb_cspmat_type), intent(in)    :: a
+  type(psb_cspmat_type), intent(inout) :: z
+  integer(psb_ipk_), intent(in)        :: nzrmax
+  real(psb_spk_), intent(in)           :: sp_thresh
+  integer(psb_ipk_), intent(out)       :: info
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:)
+  complex(psb_spk_), allocatable    :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:)
+  complex(psb_spk_), allocatable :: row(:)
+  type(psb_c_coo_sparse_mat)  :: trw
+  type(psb_c_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx, nlw,nup,jmaxup
+  type(psb_i_heap)            :: heap
+  real(psb_spk_)     :: alpha, nrmi
+  character(len=20)  :: name='psb_sp_invt'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  call zcsr%allocate(n,n,n*nzrmax)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  !
+  !
+  nzz        = 0
+  row(:)     = czero
+  rowlevs(:) = 0
+  l2         = 0
+  zcsr%irp(1) = 1
+
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_c_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,&
+         & heap,rowlevs,ktrw,trw,info,sign=-sone)
+    if (info /= 0) exit
+    row(i) = cone
+    ! Adjust norm
+    if (nrmi < sone) then
+      nrmi = sqrt(sone + nrmi**2)
+    else
+      nrmi = nrmi*sqrt(cone+cone/(nrmi**2))
+    end if
+
+    call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,&
+         & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info)
+    if (info /= 0) exit
+!!$    write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2
+    call psb_c_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,&
+         & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+    if (info /= 0) exit
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = cone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_csparse_invt
+
+subroutine psb_c_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+     & irwt,ktrw,trw,info,sign)
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin
+  implicit none
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+  integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+  integer(psb_ipk_), intent(inout)          :: irwt(:)
+  real(psb_spk_), intent(inout)          :: nrmi
+  complex(psb_spk_), intent(inout)            :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_spk_), intent(in), optional    :: sign
+  !
+  integer(psb_ipk_)            :: k,j,irb,kin,nz, err_act
+  integer(psb_ipk_), parameter :: nrb=16
+  real(psb_dpk_)               :: dmaxup, sign_
+  real(psb_dpk_), external     :: dnrm2
+  character(len=20), parameter :: name='invt_copyin'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+  sign_ = sone
+  if (present(sign)) sign_ = sign
+  !
+  ! nrmi is the norm of the current sparse row (for the time being,
+  ! we use the 2-norm).
+  ! NOTE: the 2-norm below includes also elements that are outside
+  ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED.
+  !
+
+  nlw    = 0
+  nup    = 0
+  jmaxup = 0
+  dmaxup = czero
+  nrmi   = czero
+
+  do j = a%irp(i), a%irp(i+1) - 1
+    k = a%ja(j)
+    if ((jmin<=k).and.(k<=jmax)) then
+      row(k)     = sign_ * a%val(j)
+      call heap%insert(k,info)
+      irwt(k) = 1
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+    end if
+    if (k<jd) nlw = nlw + 1
+    if (k>jd) then
+      nup = nup + 1
+      if (abs(row(k))>dmaxup) then
+        jmaxup = k
+        dmaxup = abs(row(k))
+      end if
+    end if
+  end do
+  nz   = a%irp(i+1) - a%irp(i)
+  nrmi = dnrm2(nz,a%val(a%irp(i):),ione)
+
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invt_copyin
+
+subroutine psb_c_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+     & nidx,idxs,l2,ja,irp,val,info)
+
+  use psb_base_mod
+  use psb_c_invt_fact_mod, psb_protect_name => psb_c_invt_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+  integer(psb_ipk_), intent(in)                 :: idxs(:)
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+  real(psb_spk_), intent(in)                    :: thres,nrmi
+  complex(psb_spk_),allocatable, intent(inout)     :: val(:)
+  complex(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  real(psb_dpk_),allocatable     :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  real(psb_dpk_)                 :: witem, wmin
+  integer(psb_ipk_)              :: widx
+  integer(psb_ipk_)              :: k,isz,err_act,int_err(5),idxp, nz
+  type(psb_d_idx_heap)           :: heap
+  character(len=20), parameter   :: name='invt_copyout'
+  character(len=20)              :: ch_err
+  logical                        :: fndmaxup
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  !
+  ! Here we need to apply also the dropping rule base on the fill-in.
+  ! We do it by putting into a heap the elements that are not dropped
+  ! by using the 2-norm rule, and then copying them out.
+  !
+  ! The heap goes down on the entry absolute value, so the first item
+  ! is the largest absolute value.
+  !
+!!$  write(0,*) 'invt_copyout ',nidx,nup+fill_in
+  call heap%init(info,dir=psb_asort_down_)
+
+  if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_alloc_request_
+    call psb_errpush(info,name,i_err=(/3*nidx/),&
+         & a_err='real(psb_dpk_)')
+    goto 9999
+  end if
+
+  !
+  ! First the lower part
+  !
+
+  nz   = 0
+  idxp = 0
+
+  do
+
+    idxp = idxp + 1
+    if (idxp > nidx) exit
+    if (idxs(idxp) >= i) exit
+    widx      = idxs(idxp)
+    witem     = row(widx)
+    !
+    ! Dropping rule based on the 2-norm
+    !
+    if (abs(witem) < thres*nrmi) cycle
+
+    nz       = nz + 1
+    xw(nz)   = witem
+    xwid(nz) = widx
+    call heap%insert(witem,widx,info)
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_insert_heap')
+      goto 9999
+    end if
+  end do
+
+  if (nz > 1) then
+    write(psb_err_unit,*) 'Warning: lower triangle from invt???? '
+  end if
+
+
+  if (idxp <= size(idxs)) then
+    if (idxs(idxp) < i) then
+      do
+        idxp = idxp + 1
+        if (idxp > nidx) exit
+        if (idxs(idxp) >= i) exit
+      end do
+    end if
+  end if
+  idxp = idxp - 1
+  nz   = 0
+  wmin=HUGE(wmin)
+  if (.false.) then
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if ((widx/=jmaxup).and.(nz > nup+fill_in)) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz <= nup+fill_in) then
+      !
+      ! Just copy everything from xw
+      !
+      fndmaxup=.true.
+    else
+      fndmaxup = .false.
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+        if (widx == jmaxup) fndmaxup=.true.
+      end do
+    end if
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
+        ! Include entry jmaxup, if it is not already there.
+        ! Put it in the place of the smallest coefficient.
+        !
+        xw(nz)   = row(jmaxup)
+        xwid(nz) = jmaxup
+      endif
+    end if
+
+  else if (.true.) then
+
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if (nz > nup+fill_in) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz >  nup+fill_in) then
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+      end do
+    end if
+  end if
+
+  !
+  ! Now we put things back into ascending column order
+  !
+  call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+
+  !
+  ! Copy out the upper part of the row
+  !
+  do k=1,nz
+    l2     = l2 + 1
+    if (size(val) < l2) then
+      !
+      ! Figure out a good reallocation size!
+      !
+      isz  = max(int(1.2*l2),l2+100)
+      call psb_realloc(isz,val,info)
+      if (info == psb_success_) call psb_realloc(isz,ja,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='Allocate')
+        goto 9999
+      end if
+    end if
+    ja(l2)   = xwid(k)
+    val(l2)  = xw(indx(k))
+  end do
+
+  !
+  ! Set row to zero
+  !
+  do idxp=1,nidx
+    row(idxs(idxp)) = czero
+  end do
+
+  irp(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_c_invt_copyout
+
+subroutine psb_c_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_c_invt_fact_mod, psb_protect_name => psb_c_invt_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)     :: heap
+  integer(psb_ipk_), intent(in)       :: i
+  integer(psb_ipk_), intent(inout)    :: nidx,info
+  integer(psb_ipk_), intent(inout)    :: irwt(:)
+  real(psb_spk_), intent(in)          :: thres,nrmi
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+  complex(psb_spk_), intent(in)          :: val(:)
+  complex(psb_spk_), intent(inout)       :: row(:)
+
+  ! Local Variables
+  integer(psb_ipk_) :: k,j,jj,lastk,iret
+  real(psb_dpk_)    :: rwk, alpha
+
+  info  = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+  irwt(i) = 1
+!!$  write(0,*) 'Drop Threshold ',thres*nrmi
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+
+    call heap%get_first(k,iret)
+    if (iret < 0)  exit
+
+    !
+    ! An index may have been put on the heap more than once.
+    ! Should not happen, but just in case.
+    !
+    if (k == lastk)  cycle
+    lastk = k
+
+    !
+    ! Dropping rule based on the threshold: compare the absolute
+    ! value of each updated entry of row with thres * 2-norm of row.
+    !
+    rwk    = row(k)
+
+    if (abs(rwk) < thres*nrmi) then
+      !
+      ! Drop the entry.
+      !
+      row(k)  = dzero
+      irwt(k) = 0
+      cycle
+    else
+      !
+      ! Note: since U is scaled while copying it out (see ilut_copyout),
+      ! we can use rwk in the update below.
+      !
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Update row(j) and, if it is not to be discarded, insert
+        ! its index into the heap for further processing.
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        if (irwt(j) == 0) then
+          if (abs(row(j)) < thres*nrmi) then
+            !
+            ! Drop the entry.
+            !
+            row(j)  = dzero
+          else
+            !
+            ! Do the insertion.
+            !
+            call heap%insert(j,info)
+            if (info /= psb_success_) return
+            irwt(j) = 1
+          end if
+        end if
+      end do
+    end if
+
+    !
+    ! If we get here it is an index we need to keep on copyout.
+    !
+
+    nidx       = nidx + 1
+    call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
+    if (info /= psb_success_) return
+    idxs(nidx) = k
+    irwt(k)    = 0
+  end do
+
+  irwt(i) = 0
+end subroutine psb_c_invt_inv
diff --git a/prec/impl/psb_c_prec_type_impl.f90 b/prec/impl/psb_c_prec_type_impl.f90
index 81a139ea..e82d7dca 100644
--- a/prec/impl/psb_c_prec_type_impl.f90
+++ b/prec/impl/psb_c_prec_type_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,14 +27,14 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-!   
+!
+!
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -46,7 +46,7 @@
 !!$    3. The name of the PSBLAS group or the names of its contributors may
 !!$       not be used to endorse or promote products derived from this
 !!$       software without specific written permission.
-!!$ 
+!!$
 !!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -58,14 +58,14 @@
 !!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
+!!$
+!!$
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_c_prec_type, psb_protect_name => psb_c_apply2_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_cprec_type), intent(inout) :: prec
   type(psb_c_vect_type),intent(inout)  :: x
@@ -74,7 +74,7 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional           :: trans
   complex(psb_spk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -88,25 +88,25 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -115,13 +115,13 @@ subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work)
   call prec%prec%apply(cone,x,czero,y,desc_data,info,&
        & trans=trans_,work=work_)
 
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -136,7 +136,7 @@ end subroutine psb_c_apply2_vect
 subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work)
   use psb_base_mod
   use psb_c_prec_type, psb_protect_name => psb_c_apply1_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_cprec_type), intent(inout) :: prec
   type(psb_c_vect_type),intent(inout)  :: x
@@ -145,7 +145,7 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work)
   complex(psb_spk_),intent(inout), optional, target :: work(:)
 
   type(psb_c_vect_type)       :: ww
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -159,25 +159,25 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -188,13 +188,13 @@ subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work)
        & trans=trans_,work=work_)
   if (info == 0) call psb_geaxpby(cone,ww,czero,x,desc_data,info)
   call psb_gefree(ww,desc_data,info)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -209,7 +209,7 @@ end subroutine psb_c_apply1_vect
 subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_c_prec_type, psb_protect_name => psb_c_apply2v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_cprec_type), intent(inout) :: prec
   complex(psb_spk_),intent(inout)   :: x(:)
@@ -218,7 +218,7 @@ subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional        :: trans
   complex(psb_spk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -232,37 +232,37 @@ subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=trans
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   call prec%prec%apply(cone,x,czero,y,desc_data,info,trans_,work=work_)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -277,7 +277,7 @@ end subroutine psb_c_apply2v
 subroutine psb_c_apply1v(prec,x,desc_data,info,trans)
   use psb_base_mod
   use psb_c_prec_type, psb_protect_name => psb_c_apply1v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_cprec_type), intent(inout) :: prec
   complex(psb_spk_),intent(inout)   :: x(:)
@@ -293,36 +293,34 @@ subroutine psb_c_apply1v(prec,x,desc_data,info,trans)
   name='psb_c_apply1v'
   info = psb_success_
   call psb_erractionsave(err_act)
-
-
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   allocate(ww(size(x)),w1(size(x)),stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
   call prec%prec%apply(cone,x,czero,ww,desc_data,info,&
        & trans_,work=w1)
   if(info /= psb_success_) goto 9999
   x(:) = ww(:)
   deallocate(ww,W1,stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='DeAllocate')
-    goto 9999      
+    goto 9999
   end if
 
 
@@ -336,3 +334,159 @@ subroutine psb_c_apply1v(prec,x,desc_data,info,trans)
 
 end subroutine psb_c_apply1v
 
+subroutine psb_ccprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_c_prec_type, psb_protect_name => psb_ccprecseti
+  implicit none
+
+  class(psb_cprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  integer(psb_ipk_), intent(in)            :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precseti'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_FILLIN')
+      call prec%prec%precset(psb_ilu_fill_in_,val,info)
+    case ('INV_FILLIN')
+      call prec%prec%precset(psb_inv_fillin_,val,info)
+    case default
+      info = psb_err_invalid_args_combination_
+      write(psb_err_unit,*) name,&
+           & ': Error: uninitialized preconditioner,',&
+           &' should call prec%init'
+      return
+  end select
+
+end subroutine psb_ccprecseti
+
+subroutine psb_ccprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_c_prec_type, psb_protect_name => psb_ccprecsetr
+  implicit none
+
+  class(psb_cprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  real(psb_spk_), intent(in)             :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetr'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+  case('SUB_ILUTHRS')
+    call prec%prec%precset(psb_fact_eps_,val,info)
+  case('INV_THRESH')
+    call prec%prec%precset(psb_inv_thresh_,val,info)
+  case default
+    info = psb_err_invalid_args_combination_
+    write(psb_err_unit,*) name,&
+         & ': Error: uninitialized preconditioner,',&
+         &' should call prec%init'
+    return
+  end select
+
+end subroutine psb_ccprecsetr
+
+subroutine psb_ccprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_c_prec_type, psb_protect_name => psb_ccprecsetc
+  implicit none
+
+  class(psb_cprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  character(len=*), intent(in)             :: string
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetc'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_SOLVE')
+      ! We select here the type of solver on the block
+      select case (psb_toupper(trim(string)))
+        case("ILU")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+        case("ILUT")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info)
+        case("AINV")
+            call prec%prec%precset(psb_f_type_,psb_f_ainv_,info)
+        case("INVK")
+            call prec%prec%precset(psb_f_type_,psb_f_invk_,info)
+        case("INVT")
+            call prec%prec%precset(psb_f_type_,psb_f_invt_,info)
+        case default
+          ! Default to ILU(0) factorization
+          call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info)
+          call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+      end select
+    case ("ILU_ALG")
+      select case (psb_toupper(trim(string)))
+        case ("MILU")
+          call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info)
+        case default
+          ! Do nothing
+      end select
+    case ("ILUT_SCALE")
+      select case (psb_toupper(trim(string)))
+      case ("MAXVAL")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info)
+      case ("DIAG")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info)
+      case ("ARWSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info)
+      case ("ARCSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info)
+      case ("ACLSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info)
+      case ("NONE")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      case default
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      end select
+    case ("AINV_ALG")
+      select case (psb_toupper(trim(string)))
+      case("LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      case("SYM-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info)
+      case("STAB-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info)
+      case("MLK","LMX")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info)
+      case default
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      end select
+    case default
+
+  end select
+
+end subroutine psb_ccprecsetc
diff --git a/prec/impl/psb_c_sp_drop.f90 b/prec/impl/psb_c_sp_drop.f90
new file mode 100644
index 00000000..fda59cc7
--- /dev/null
+++ b/prec/impl/psb_c_sp_drop.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_c_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+  use psb_base_mod
+  implicit none
+  real(psb_spk_), intent(in)       :: sp_thresh
+  integer(psb_ipk_), intent(in)    :: idiag, nzrmax
+  integer(psb_ipk_), intent(inout) :: nz
+  integer(psb_ipk_), intent(inout) :: iz(:)
+  complex(psb_spk_), intent(inout)    :: valz(:)
+  integer(psb_ipk_), intent(out)   :: info
+  !
+  integer(psb_ipk_)              :: i, j, idf, nw
+  complex(psb_spk_)                 :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_spk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+
+  if (nz > min(size(iz),size(valz))) then
+    write(0,*) 'Serious size problem ',nz,size(iz),size(valz)
+    info = -2
+    return
+  end if
+  allocate(xw(nz),xwid(nz),indx(nz),stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info
+    return
+  endif
+
+  ! Always keep the diagonal element
+  idf = -1
+  do i=1, nz
+    if (iz(i) == idiag) then
+      idf     = i
+      witem   = valz(i)
+      widx    = iz(i)
+      valz(i) = valz(1)
+      iz(i)   = iz(1)
+      valz(1) = witem
+      iz(1)   = widx
+      exit
+    end if
+  end do
+
+  if (idf == -1) then
+
+    xw(1:nz) = valz(1:nz)
+    call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_)
+    do i=1, nz
+      xwid(i) = iz(indx(i))
+    end do
+    nw = min(nz,nzrmax)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+    nw = max(nw, 1)
+
+  else
+
+    nw = nz-1
+
+    xw(1:nw) = valz(2:nz)
+
+    call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_)
+    nw = min(nw,nzrmax-1)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+
+    do i=1, nw
+      xwid(i) = iz(1+indx(i))
+    end do
+    nw       = nw + 1
+    xw(nw)   = valz(1)
+    xwid(nw) = iz(1)
+  end if
+
+  call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_)
+
+  do i=1, nw
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+  end do
+  nz = nw
+  if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz
+  deallocate(xw,xwid,indx,stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info
+    return
+  endif
+  return
+end subroutine psb_c_sp_drop
diff --git a/prec/impl/psb_c_sparsify.f90 b/prec/impl/psb_c_sparsify.f90
new file mode 100644
index 00000000..4962f337
--- /dev/null
+++ b/prec/impl/psb_c_sparsify.f90
@@ -0,0 +1,260 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_c_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr)
+  use psb_base_mod
+  implicit none
+
+  real(psb_spk_), intent(in)              :: sp_thresh
+  integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+  complex(psb_spk_), intent(inout)           :: zw(:)
+  integer(psb_ipk_), intent(out)          :: nz
+  integer(psb_ipk_), intent(out)          :: iz(:)
+  complex(psb_spk_), intent(out)             :: valz(:)
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: istart
+  type(psb_i_heap), optional              :: iheap
+  integer(psb_ipk_), optional             :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k
+  complex(psb_spk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_spk_), allocatable   :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  type(psb_c_idx_heap)         :: heap
+
+
+  info = psb_success_
+  istart_ = 1
+  if (present(istart)) istart_ = max(1,istart)
+  if (.false.) then
+    nz = 0
+    do i=istart_, n
+      if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then
+        nz       = nz + 1
+        iz(nz)   = i
+        valz(nz) = zw(i)
+      end if
+    end do
+
+  else
+    allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info)
+    if (info /= psb_success_) then
+      return
+    end if
+
+    call heap%init(info,dir=psb_asort_down_)
+
+    ! Keep at least the diagonal
+    nz = 0
+
+    if (present(iheap)) then
+      if (.not.(present(ikr))) then
+        write(psb_err_unit,*) 'Error: if IHEAP then also IKR'
+        info = -1
+        return
+      end if
+      last_i = -1
+      do
+        call iheap%get_first(i,iret)
+        if (iret < 0) exit
+        ! An index may have been put on the heap more than once.
+        if (i == last_i) cycle
+        last_i = i
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i)  = dzero
+        ikr(i) = 0
+      end do
+
+    else
+
+      do i=istart_, n
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i) = dzero
+      end do
+    end if
+
+    k = 1
+    do
+      if (k == nzrmax) exit
+      call heap%get_first(witem,widx,info)
+      if (info == -1) then
+        info = psb_success_
+        exit
+      endif
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        return
+      end if
+      k = k + 1
+      xw(k)   = witem
+      xwid(k) = widx
+    end do
+    call heap%free(info)
+    nz = k
+    call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+!!$    write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh
+    do i=1, nz
+      valz(i) = xw(indx(i))
+      iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+    end do
+
+  end if
+
+  return
+
+end subroutine psb_c_sparsify
+
+
+subroutine psb_c_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+  use psb_base_mod
+  implicit none
+
+  real(psb_spk_), intent(in)      :: sp_thresh
+  integer(psb_ipk_), intent(in)     :: idiag, n, nzrmax
+  complex(psb_spk_), intent(inout)    :: zw(:)
+  integer(psb_ipk_), intent(out)    :: nz
+  integer(psb_ipk_), intent(out)    :: iz(:)
+  complex(psb_spk_), intent(out)       :: valz(:)
+  integer(psb_ipk_), intent(out)    :: info
+  integer(psb_ipk_), intent(inout)  :: lhead, listv(:)
+  integer(psb_ipk_)                 :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k,current, next
+  complex(psb_spk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_spk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+  istart_ = 1
+  allocate(xw(n),xwid(n),indx(n),stat=info)
+
+  current = lhead
+  lhead = -1
+  i = 0
+  do while (current >0)
+    i = i + 1
+    xw(i)   = zw(current)
+    xwid(i) = current
+
+    if (current == idiag) then
+      ! Bring the diagona into first position
+      witem = xw(1)
+      widx  = xwid(1)
+      xw(1)   = xw(i)
+      xwid(1) = xwid(i)
+      xw(i)   = witem
+      xwid(i) = widx
+    end if
+
+    zw(current)  = dzero
+    ikr(current) = 0
+
+    next = listv(current)
+    listv(current) = -1
+    current = next
+  end do
+  nz = i
+  if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),&
+       & dir=psb_asort_down_,flag=psb_sort_keep_idx_)
+!!$  write(0,*) 'Done first msort '
+!!$  write(0,*) '   after first msort for idiag ',idiag,' :',nz,sp_thresh
+!!$  do i=1, nz
+!!$    write(0,*) '         ',xwid(i),xw(i)
+!!$  end do
+
+  i = 2
+  do while (i<=nz)
+    if (abs(xw(i)) < sp_thresh) exit
+    i = i + 1
+  end do
+!!$  write(0,*) 'NZ ',nz, i, nzrmax
+  nz = max(1,min(i-1,nzrmax))
+  call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_)
+!!$  write(0,*) 'Done second msort '
+
+!!$  write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh
+  do i=1, nz
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+  end do
+
+  return
+
+end subroutine psb_c_sparsify_list
diff --git a/prec/impl/psb_crwclip.f90 b/prec/impl/psb_crwclip.f90
new file mode 100644
index 00000000..ade1171f
--- /dev/null
+++ b/prec/impl/psb_crwclip.f90
@@ -0,0 +1,90 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_c_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+  use psb_base_mod
+
+  implicit none
+  integer(psb_ipk_), intent(inout)  :: nz
+  integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+  complex(psb_spk_), intent(inout)    :: val(*)
+  integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+
+  integer(psb_ipk_) :: i,j
+
+  j = 0
+  do i=1, nz
+    if ((imin <= ia(i)).and.&
+         & (ia(i) <= imax).and.&
+         & (jmin <= ja(i)).and.&
+         & (ja(i) <= jmax) ) then
+      j = j + 1
+      ia(j) = ia(i)
+      ja(j) = ja(i)
+      val(j) = val(i)
+    end if
+  end do
+  nz = j
+end subroutine psb_c_rwclip
diff --git a/prec/impl/psb_csparse_biconjg_llk.F90 b/prec/impl/psb_csparse_biconjg_llk.F90
new file mode 100644
index 00000000..98110cd1
--- /dev/null
+++ b/prec/impl/psb_csparse_biconjg_llk.F90
@@ -0,0 +1,366 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_csparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_c_ainv_tools_mod
+  use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_llk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  complex(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_spk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       & ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_c_csc_sparse_mat) :: ac
+  complex(psb_spk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = czero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = czero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = cone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = cone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+!!$        write(0,*) i, ' Outer inserting ',ac%ia(j)
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! ! write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+!!$                write(0,*) j,i,' Inner inserting ',nextj
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_csparse_biconjg_llk
diff --git a/prec/impl/psb_csparse_biconjg_llk_noth.F90 b/prec/impl/psb_csparse_biconjg_llk_noth.F90
new file mode 100644
index 00000000..5fe472eb
--- /dev/null
+++ b/prec/impl/psb_csparse_biconjg_llk_noth.F90
@@ -0,0 +1,362 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_csparse_biconjg_llk_noth(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_c_ainv_tools_mod
+  use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_llk_noth
+
+  !
+  ! Left-looking variant, with NO drop rule on p(i)/p(j)
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)               :: n
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)               :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  complex(psb_spk_), intent(out)                :: p(:)
+  integer(psb_ipk_), intent(out)              :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_spk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_c_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk_noth'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = czero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = czero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.true.) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.true.) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_csparse_biconjg_llk_noth
diff --git a/prec/impl/psb_csparse_biconjg_mlk.F90 b/prec/impl/psb_csparse_biconjg_mlk.F90
new file mode 100644
index 00000000..58838c82
--- /dev/null
+++ b/prec/impl/psb_csparse_biconjg_mlk.F90
@@ -0,0 +1,501 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_csparse_biconjg_mlk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_c_ainv_tools_mod
+  use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_mlk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  complex(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:)
+  complex(psb_spk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_c_csc_sparse_mat) :: ac
+  complex(psb_spk_)     :: alpha
+  character(len=20)  :: name='psb_biconjg_mlk'
+  logical, parameter :: debug=.false., test_merge=.true.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), &
+       & hlist(n),rwlist(n),bfr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i)  = 0
+    izcr(i)  = 0
+    zval(i)  = czero
+    hlist(i) = -1
+    rwlist(i) = -1
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = czero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = cone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = cone
+  nzw       = 1
+
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    rwhead  = i
+
+    hlhead = -1
+
+    kkc = 0
+    ilst  = ac%icp(i)-1
+    ifrst = ac%icp(i)
+    do j = ac%icp(i+1)-1, ac%icp(i), -1
+      if (ac%ia(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (.true..or.debug) then
+!!$      write(0,*) 'Outer Before insertion  : ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+    if (kkc > 0) then
+!!$      write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      mj      = hlhead
+      if (mj > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outer
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+
+      if (debug) write(0,*) 'update loop, using row: ',j,i,mj
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+!!$      write(0,*) '   Current list is : ',hlhead
+!!$      call printlist(hlhead,hlist)
+!!$
+
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=z%icp(j)
+        ilst=z%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst))
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+!!$            call hlmerge(rwhead,rwlist,(/kr/))
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = ac%icp(kr)-1
+            ifrst = ac%icp(kr+1)
+            kkc = 0
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              if ((ac%ia(kc) < i).and.(ac%ia(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+            end if
+            if (ilst >= ifrst) then
+!!$              write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst)
+              call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+            end if
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    rwhead  = i
+    hlhead = -1
+
+    kkc = 0
+    ilst  = a%irp(i)-1
+    ifrst = a%irp(i)
+    do j = a%irp(i+1)-1, a%irp(i), -1
+      if (a%ja(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (debug) then
+      write(0,*) 'Outer Before insertion: '
+      call printlist(hlhead,hlist)
+      write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst)
+    end if
+    if (kkc > 0 ) then
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      mj      = hlhead
+      if (hlhead > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outerw
+
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=w%icp(j)
+        ilst=w%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst))
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = a%irp(kr)-1
+            ifrst = a%irp(kr+1)
+            kkc = 0
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              if ((a%ja(kc) < i).and.(a%ja(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst)
+            end if
+
+            call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+contains
+
+  subroutine hlmerge(head,listv,vals)
+    integer(psb_ipk_), intent(inout) :: head, listv(:)
+    integer(psb_ipk_), intent(in)    :: vals(:)
+    integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph
+
+    nv  = size(vals)
+    lh  = head
+    flh = -1
+    lv  = 1
+    if ((head < 0).and.(nv > 0)) then
+      ! Adjust head if empty
+      head = vals(lv)
+      lv = lv + 1
+    else if ((head > 0) .and. (nv >0)) then
+      ! Adjust head if first item less than it
+      if (head > vals(lv)) then
+        listv(vals(lv)) = head
+        head = vals(lv)
+        lv   = lv + 1
+      end if
+    end if
+
+    lh = head
+    ph = lh
+    do while ((lh > 0) .and. (lv <= nv))
+      if (lh == vals(lv)) then
+        lv = lv + 1
+      else if (lh > vals(lv)) then
+        listv(vals(lv)) = lh
+        listv(ph) = vals(lv)
+        lh = vals(lv)
+        lv = lv + 1
+      else
+        ph = lh
+        lh = listv(lh)
+      end if
+    end do
+    lh = ph
+    do while (lv <= nv)
+      listv(lh) = vals(lv)
+      lh = listv(lh)
+      lv = lv + 1
+    end do
+  end subroutine hlmerge
+
+
+  subroutine printlist(head,listv)
+    integer(psb_ipk_), intent(in) :: head, listv(:)
+    integer(psb_ipk_) :: li
+
+    li = head
+    do while (li > 0)
+      write(0,*) 'Item: ', li
+      li = listv(li)
+    end do
+  end subroutine printlist
+
+end subroutine psb_csparse_biconjg_mlk
diff --git a/prec/impl/psb_csparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_csparse_biconjg_s_ft_llk.F90
new file mode 100644
index 00000000..64af2ed5
--- /dev/null
+++ b/prec/impl/psb_csparse_biconjg_s_ft_llk.F90
@@ -0,0 +1,414 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_csparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_c_ainv_tools_mod
+  use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_s_ft_llk
+
+  !
+  ! Left-looking variant, stabilized i.e. product by A is applied
+  ! to compute the diagonal elements.
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  complex(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:)
+  complex(psb_spk_), allocatable    :: zval(:),val(:), q(:),  ww(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, nzrw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_c_csc_sparse_mat) :: ac
+  complex(psb_spk_)     :: alpha, tmpq,tmpq2
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! Init pointers to:
+  !  ljr(i): last occupied column index within row  I
+  !  izcr(i): first occupied row index within column I
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = czero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = czero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = cone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = cone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j)<i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+      izcr(j) = 0
+      if (j>=i) exit outer
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = w%icp(j)
+      ip2 = w%icp(j+1) - 1
+      nzra = max(0,ip2 - ip1 + 1)
+      nzww = 0
+      call psb_c_spvspm(cone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),&
+           & czero,nzww,iww,ww,info)
+
+      p(i) =  psb_spge_dot(nzww,iww,ww,zval)
+
+      ipz1 = z%icp(j)
+      ipz2 = z%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-p(i)/p(j))
+!!$        write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outer
+
+    if (.false.) then
+      ! We can't do the proper thing until we have bot Z_i and W_i.
+      call a%csget(i,i,nzra,ia,ja,val,info)
+      call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+      p(i) = psb_spge_dot(nzra,ja,val,zval)
+      if (abs(p(i)) < s_epstol) &
+           & p(i) = 1.d-3
+    end if
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+!!$      write(0,*) 'Inserting into heap ',i
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j)<i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) exit outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      if (.false.) then
+        ip1 = ac%icp(j)
+        ip2 = ac%icp(j+1) - 1
+        do
+          if (ip2 < ip1 ) exit
+          if (ac%ia(ip2) <= n) exit
+          ip2 = ip2 -1
+        end do
+        nzra = max(0,ip2 - ip1 + 1)
+        q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+        ! !$          write(psb_err_unit,*) j,i,p(i)
+      else
+        ip1 = z%icp(j)
+        ip2 = z%icp(j+1) - 1
+        nzra = max(0,ip2 - ip1 + 1)
+        nzww = 0
+        call psb_c_spmspv(cone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),&
+             & czero,nzww,iww,ww,info)
+
+        q(i) =  psb_spge_dot(nzww,iww,ww,zval)
+      end if
+
+      ipz1 = w%icp(j)
+      ipz2 = w%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-q(i)/q(j))
+!!$        write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outerw
+
+!!$      ip1 = ac%icp(i)
+!!$      ip2 = ac%icp(i+1) - 1
+!!$      do
+!!$        if (ip2 < ip1 ) exit
+!!$        if (ac%ia(ip2) <= n) exit
+!!$        ip2 = ip2 -1
+!!$      end do
+!!$      nzra = max(0,ip2 - ip1 + 1)
+!!$
+!!$      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+!!$      if (abs(q(i)) < s_epstol) &
+!!$           & q(i) = 1.d-3
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrw,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrw, w%ia,  info)
+    call psb_ensure_size(nzw+nzrw, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrw
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrw
+    nzw        = nzw + nzrw
+
+!!$      !
+!!$      ! Ok, now compute w_i^T A z_i
+!!$      !
+    nzww = 0
+    nzrz = z%icp(i+1)-z%icp(i)
+    ipz1 = z%icp(i)
+    call psb_c_spmspv(cone,ac,&
+         & nzrz,z%ia(ipz1:ipz1+nzrz-1),z%val(ipz1:ipz1+nzrz-1),&
+         & czero,nzww,iww,ww,info)
+    tmpq  = psb_spdot_srtd(nzww,iww,ww,nzrw,ia,val)
+    q(i) = tmpq
+!    if (tmpq <0) then
+!!$        write(0,*) 'On negative dot prod at ',i
+!!$        write(0,*) 'On negative dot prod a ',ia(1:nzrw),val(1:nzrw)
+!!$        write(0,*) 'On negative dot prod w ',iww(1:nzww),ww(1:nzww)
+!!$        ip1 = ac%icp(i)
+!!$        ip2 = ac%icp(i+1) - 1
+!!$        do
+!!$          if (ip2 < ip1 ) exit
+!!$          if (ac%ia(ip2) <= n) exit
+!!$          ip2 = ip2 -1
+!!$        end do
+!!$        nzra = max(0,ip2 - ip1 + 1)
+!!$        write(0,*) 'On negative dot prod a ',ac%ia(ip1:ip2),ac%val(ip1:ip2)
+!
+!    end if
+
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+    p(i) = q(i)
+
+  end do
+
+end subroutine psb_csparse_biconjg_s_ft_llk
diff --git a/prec/impl/psb_csparse_biconjg_s_llk.F90 b/prec/impl/psb_csparse_biconjg_s_llk.F90
new file mode 100644
index 00000000..b38b2a0c
--- /dev/null
+++ b/prec/impl/psb_csparse_biconjg_s_llk.F90
@@ -0,0 +1,248 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_csparse_biconjg_s_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_c_ainv_tools_mod
+  use psb_c_biconjg_mod, psb_protect_name => psb_csparse_biconjg_s_llk
+
+  !
+  ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned!
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_c_csr_sparse_mat), intent(in)    :: a
+  type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  complex(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_spk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw,kk
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_c_csc_sparse_mat) :: ac
+  complex(psb_spk_)     :: alpha, zvalmax
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = czero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = czero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1) = 1
+  z%icp(2) = 2
+  z%ia(1)  = 1
+  z%val(1) = cone
+  nzz      = 1
+  zvalmax  = cone
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = czero
+    ! !$        end do
+    zval(i)  = cone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+!!$          if (abs(zval(kr)) > 1e16) then
+!!$            write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr)
+!!$          end if
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+!!$    if ((1761<=i).and.(i<=1780)) then
+!!$      write(0,*) 'Dot product terms at ',i,nzra
+!!$      do kk=1,nzra
+!!$        write(0,*) kk,ja(kk),val(kk),zval(ja(kk))
+!!$      end do
+!!$    end if
+
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+    ! !$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+!!$      zvalmax = max(zvalmax,abs(val(j)))
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+!!$    write(0,*) ' Dot: ',i,p(i),zvalmax
+
+  end do
+
+  call z%cp_to_fmt(w,info)
+
+end subroutine psb_csparse_biconjg_s_llk
diff --git a/prec/impl/psb_d_ainv_bld.f90 b/prec/impl/psb_d_ainv_bld.f90
new file mode 100644
index 00000000..6c175d25
--- /dev/null
+++ b/prec/impl/psb_d_ainv_bld.f90
@@ -0,0 +1,225 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+subroutine psb_d_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+
+  use psb_base_mod
+  use psb_prec_const_mod
+  use psb_d_biconjg_mod
+
+  implicit none
+
+  ! Arguments
+  type(psb_dspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)                 :: fillin,alg
+  real(psb_dpk_), intent(in)                  :: thresh
+  type(psb_dspmat_type), intent(inout)        :: wmat, zmat
+  real(psb_dpk_), allocatable                  :: d(:)
+  Type(psb_desc_type), Intent(in)               :: desc
+  integer(psb_ipk_), intent(out)                :: info
+  type(psb_dspmat_type), intent(in), optional :: blck
+  integer(psb_ipk_), intent(in), optional       :: iscale
+  !
+  integer(psb_ipk_)             :: i, nztota, err_act, n_row, nrow_a
+  type(psb_d_coo_sparse_mat)  :: acoo
+  type(psb_d_csr_sparse_mat)  :: acsr
+  type(psb_dspmat_type)       :: atmp
+  real(psb_dpk_), allocatable :: arws(:), acls(:)
+  real(psb_dpk_), allocatable  :: pq(:), ad(:)
+  integer(psb_ipk_)             :: debug_level, debug_unit
+  type(psb_ctxt_type)           :: ctxt
+  integer(psb_ipk_)             :: np,me
+  integer(psb_ipk_)             :: nzrmax, iscale_
+  real(psb_dpk_)              :: sp_thresh
+  real(psb_dpk_)               :: weight
+  character(len=20)             :: name, ch_err
+
+
+  info = psb_success_
+  name = 'psb_dainv_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  iscale_ = psb_ilu_scale_none_
+  if (present(iscale)) iscale_ = iscale
+  weight = done
+  !
+  ! Check the memory available to hold the W and Z factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = desc%get_local_rows()
+  allocate(pq(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  !
+  ! Ok, let's start first with Z (i.e. Upper)
+  !
+  call a%csclip(acoo,info,imax=n_row,jmax=n_row)
+  call acsr%mv_from_coo(acoo,info)
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+
+  case(psb_ilu_scale_maxval_)
+    weight = acsr%maxval()
+    weight = done/weight
+    call acsr%scal(weight,info)
+
+  case(psb_ilu_scale_arcsum_)
+    allocate(arws(n_row),acls(n_row),ad(n_row),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    end if
+    call acsr%arwsum(arws)
+    call acsr%aclsum(acls)
+    ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row)))
+    ad(1:n_row) = done/ad(1:n_row)
+    call acsr%scal(ad,info,side='L')
+    call acsr%scal(ad,info,side='R')
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  !
+  ! Here for the actual workhorses.
+  ! Only biconjg is surviving for now....
+  !
+    call psb_sparse_biconjg(alg,n_row,acsr,pq,&
+         &   zmat,wmat,nzrmax,sp_thresh,info)
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='sparse_biconjg')
+    goto 9999
+  end if
+  call atmp%mv_from(acsr)
+
+  !
+  ! Is this right???
+  !
+  do i=1, n_row
+    if (abs(pq(i)) < d_epstol) then
+      pq(i) = done
+    else
+      pq(i) = done/pq(i)
+    end if
+  end do
+
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+  case(psb_ilu_scale_maxval_)
+    pq(:) = pq(:)*weight
+
+  case(psb_ilu_scale_arcsum_)
+    call zmat%scal(ad,info,side='L')
+    call wmat%scal(ad,info,side='R')
+
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  call psb_move_alloc(pq,d,info)
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_ainv_bld
diff --git a/prec/impl/psb_d_bjacprec_impl.f90 b/prec/impl/psb_d_bjacprec_impl.f90
index 08409346..ec866dbe 100644
--- a/prec/impl/psb_d_bjacprec_impl.f90
+++ b/prec/impl/psb_d_bjacprec_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,13 +27,13 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 
 subroutine psb_d_bjac_dump(prec,info,prefix,head)
   use psb_base_mod
   use psb_d_bjacprec, psb_protect_name => psb_d_bjac_dump
-  implicit none 
+  implicit none
   class(psb_d_bjac_prec_type), intent(in) :: prec
   integer(psb_ipk_), intent(out)                    :: info
   character(len=*), intent(in), optional  :: prefix,head
@@ -43,13 +43,13 @@ subroutine psb_d_bjac_dump(prec,info,prefix,head)
   character(len=80)  :: prefix_
   character(len=120) :: fname ! len should be at least 20 more than
 
-  !  len of prefix_ 
+  !  len of prefix_
 
   info = 0
   ctxt = prec%get_ctxt()
   call psb_info(ctxt,iam,np)
 
-  if (present(prefix)) then 
+  if (present(prefix)) then
     prefix_ = trim(prefix(1:min(len(prefix),len(prefix_))))
   else
     prefix_ = "dump_fact_d"
@@ -74,7 +74,7 @@ end subroutine psb_d_bjac_dump
 subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_d_bjacprec, psb_protect_name => psb_d_bjac_apply_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_d_bjac_prec_type), intent(inout)  :: prec
   real(psb_dpk_),intent(in)         :: alpha,beta
@@ -118,12 +118,12 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (x%get_nrows() < n_row) then 
+  if (x%get_nrows() < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (y%get_nrows() < n_row) then 
+  if (y%get_nrows() < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -140,9 +140,9 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
@@ -152,19 +152,19 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     allocate(ww(n_col),aux(4*n_col),stat=info)
   endif
 
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
 
   do_alloc_wrk = .not.prec%is_allocated_wrk()
   if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v)
 
   associate (wv => prec%wrk(1), wv1 => prec%wrk(2))
-    
+
     select case(prec%iprcparm(psb_f_type_))
-    case(psb_f_ilu_n_) 
-      
+    case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
+
       select case(trans_)
       case('N')
         call psb_spsm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,&
@@ -172,31 +172,58 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
         if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
       case('T')
         call psb_spsm(done,prec%av(psb_u_pr_),x,dzero,wv,desc_data,info,&
              & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux)
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
       case('C')
-        
+
         call psb_spsm(done,prec%av(psb_u_pr_),x,dzero,wv,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
         call wv1%mlt(done,prec%dv,wv,dzero,info,conjgx=trans_)
-        
+
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
+      end select
+      if (info /= psb_success_) then
+        ch_err="psb_spsm"
+        goto 9999
+      end if
+
+    case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+      ! Application of approximate inverse preconditioner, just some spmm
+
+      select case(trans_)
+      case('N')
+        call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+        if (info == psb_success_) call wv1%mlt(done,prec%dv,wv,dzero,info)
+        if(info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,&
+             & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.)
+
+       case('T','C')
+         call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,wv,desc_data,info,&
+              & trans=trans_,work=aux,doswap=.false.)
+         if (info == psb_success_) call wv1%mlt(done,prec%dv,wv,dzero,info)
+         if (info == psb_success_) &
+              & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, &
+              & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.)
+
       end select
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         ch_err="psb_spsm"
         goto 9999
       end if
-      
+
     case default
       info = psb_err_internal_error_
       call psb_errpush(info,name,a_err='Invalid factorization')
@@ -204,12 +231,12 @@ subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     end select
 
   end associate
-  
+
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
   if (do_alloc_wrk) call prec%free_wrk(info)
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -231,7 +258,7 @@ end subroutine psb_d_bjac_apply_vect
 subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_d_bjacprec, psb_protect_name => psb_d_bjac_apply
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_d_bjac_prec_type), intent(inout)  :: prec
   real(psb_dpk_),intent(in)         :: alpha,beta
@@ -244,6 +271,7 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   ! Local variables
   integer(psb_ipk_) :: n_row,n_col
   real(psb_dpk_), pointer :: ww(:), aux(:)
+  type(psb_d_vect_type) :: tx,ty
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
   integer(psb_ipk_) :: err_act, ierr(5)
@@ -273,12 +301,12 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (size(x) < n_row) then 
+  if (size(x) < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (size(y) < n_row) then 
+  if (size(y) < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -295,29 +323,29 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-        goto 9999      
+        goto 9999
       end if
 
     endif
   else
     allocate(ww(n_col),aux(4*n_col),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
   endif
 
 
   select case(prec%iprcparm(psb_f_type_))
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
 
     select case(trans_)
     case('N')
@@ -344,11 +372,42 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
            & trans=trans_,scale='U',choice=psb_none_,work=aux)
 
     end select
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       ch_err="psb_spsm"
       goto 9999
     end if
 
+  case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+    ! Application of approximate inverse preconditioner, just some spmm
+
+    select case(trans_)
+
+      case('N')
+        call psb_spmm(done,prec%av(psb_l_pr_),x,dzero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('T')
+        call psb_spmm(done,prec%av(psb_u_pr_),x,dzero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('C')
+        call psb_spmm(done,prec%av(psb_u_pr_),x,dzero,ww,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * (prec%dv%v%v(1:n_row))
+        if (info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+    end select
+
 
   case default
     info = psb_err_internal_error_
@@ -358,8 +417,8 @@ subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
 
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -397,11 +456,26 @@ subroutine psb_d_bjac_precinit(prec,info)
     call psb_Errpush(info,name)
     goto 9999
   end if
+  call psb_realloc(psb_rfpsz,prec%rprcparm,info)
+  if (info /= psb_success_) then
+    info = psb_err_alloc_dealloc_
+    call psb_Errpush(info,name)
+    goto 9999
+  end if
 
   prec%iprcparm(:)                = 0
   prec%iprcparm(psb_p_type_)      = psb_bjac_
   prec%iprcparm(psb_f_type_)      = psb_f_ilu_n_
   prec%iprcparm(psb_ilu_fill_in_) = 0
+  prec%iprcparm(psb_ilu_ialg_)    = psb_ilu_n_
+  prec%iprcparm(psb_ilu_scale_)   = psb_ilu_scale_none_
+  prec%iprcparm(psb_inv_fillin_)  = 0
+  prec%iprcparm(psb_ainv_alg_)    = psb_ainv_llk_
+
+
+  prec%rprcparm(:)                = 0
+  prec%rprcparm(psb_fact_eps_)    = 1E-1_psb_dpk_
+  prec%rprcparm(psb_inv_thresh_)  = 1E-1_psb_dpk_
 
 
   call psb_erractionrestore(err_act)
@@ -416,7 +490,8 @@ end subroutine psb_d_bjac_precinit
 subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
 
   use psb_base_mod
-  use psb_prec_mod, only : psb_ilu_fct
+  use psb_d_ilu_fact_mod
+  use psb_d_ainv_fact_mod
   use psb_d_bjacprec, psb_protect_name => psb_d_bjac_precbld
   Implicit None
 
@@ -428,12 +503,13 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   class(psb_d_base_vect_type), intent(in), optional  :: vmold
   class(psb_i_base_vect_type), intent(in), optional  :: imold
 
-  !     .. Local Scalars ..                                                       
-  integer(psb_ipk_) ::    i, m
+  !     .. Local Scalars ..
+  integer(psb_ipk_) ::    i, m, ialg, fill_in, iscale, inv_fill, iinvalg
   integer(psb_ipk_) ::    ierr(5)
   character ::        trans, unitd
-  type(psb_d_csr_sparse_mat), allocatable  :: lf, uf
+  type(psb_dspmat_type), allocatable :: lf, uf
   real(psb_dpk_), allocatable :: dd(:)
+  real(psb_dpk_) :: fact_eps, inv_thresh
   integer(psb_ipk_) :: nztota,  err_act, n_row, nrow_a,n_col, nhalo
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -462,19 +538,114 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   trans = 'N'
   unitd = 'U'
 
+  ! We check if all the information contained in the preconditioner structure
+  ! are meaningful, otherwise we give an error and get out of the build
+  ! procedure
+  ialg    = prec%iprcparm(psb_ilu_ialg_)  ! Integer for ILU type algorithm
+  iinvalg = prec%iprcparm(psb_ainv_alg_)  ! Integer for AINV type algorithm
+  iscale = prec%iprcparm(psb_ilu_scale_)  ! Integer for scaling of matrix
+  fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization
+  inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse
+  fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization
+  inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization
+
+  ! Check if the type of scaling is known, pay attention that not all the
+  ! scalings make sense for all the factorization, if something that does not
+  ! make sense is required the factorization routine will fail in an
+  ! unnrecoverable way.
+  if ((iscale == psb_ilu_scale_none_).or.&
+      (iscale == psb_ilu_scale_maxval_).or.&
+      (iscale == psb_ilu_scale_diag_).or.&
+      (iscale == psb_ilu_scale_arwsum_).or.&
+      (iscale == psb_ilu_scale_aclsum_).or.&
+      (iscale == psb_ilu_scale_arcsum_)) then
+  ! Do nothing: admissible request
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_ilu_scale_'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+  select case (prec%iprcparm(psb_f_type_))
+    case (psb_f_ainv_)
+      ! Check if the variant for the AINV is known to the library
+      select case (iinvalg)
+      case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,&
+        & psb_ainv_mlk_)
+        ! Do nothing these are okay
+      case default
+        info=psb_err_from_subroutine_
+        ch_err='psb_ainv_alg_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end select ! AINV Variant
+      ! Check if the drop-tolerance make sense
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_ilu_t_)
+      if (fact_eps > 1) then
+        ! Check if the drop-tolerance make sense
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_invt_)
+      ! Check both tolerances
+      if (fact_eps > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case default
+  end select
+
+
+  ! Checks relative to the fill-in parameters
+  if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then
+    if(fill_in < 0) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilu_fill_in_'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    else if (fill_in == 0) then
+      ! If the requested level of fill is equal to zero, we default to the
+      ! specialized ILU(0) routine
+      prec%iprcparm(psb_f_type_) = psb_f_ilu_n_
+    end if
+  end if
+  ! If no limit on the fill_in is required we allow every fill, this is needed
+  ! since this quantity is used to allocate the auxiliary vectors for the
+  ! factorization
+  if (inv_fill <= 0) inv_fill = m
+
+  ! Select on the type of factorization to be used
   select case(prec%iprcparm(psb_f_type_))
 
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_)
+  ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix
+  ! is equal to the one of the input matrix
 
-    if (allocated(prec%av)) then 
-      if (size(prec%av) < psb_bp_ilu_avsz) then 
-        do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
           call prec%av(i)%free()
         enddo
         deallocate(prec%av,stat=info)
       endif
     end if
-    if (.not.allocated(prec%av)) then 
+    if (.not.allocated(prec%av)) then
       allocate(prec%av(psb_max_avsz),stat=info)
       if (info /= psb_success_) then
         call psb_errpush(psb_err_alloc_dealloc_,name)
@@ -501,27 +672,27 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     end if
 
     allocate(dd(n_row),stat=info)
-    if (info == psb_success_) then 
+    if (info == psb_success_) then
       allocate(prec%dv, stat=info)
-      if (info == 0) then 
-        if (present(vmold)) then 
-          allocate(prec%dv%v,mold=vmold,stat=info) 
-        else       
-          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info) 
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
         end if
       end if
     end if
 
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     endif
-    ! This is where we have no renumbering, thus no need 
-    call psb_ilu_fct(a,lf,uf,dd,info)
+    ! This is where we have no renumbering, thus no need
+    call psb_ilu0_fact(ialg,a,lf,uf,dd,info)
 
     if(info == psb_success_) then
-      call prec%av(psb_l_pr_)%mv_from(lf)
-      call prec%av(psb_u_pr_)%mv_from(uf)
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
       call prec%av(psb_l_pr_)%set_asb()
       call prec%av(psb_u_pr_)%set_asb()
       call prec%av(psb_l_pr_)%trim()
@@ -530,12 +701,375 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       ! call move_alloc(dd,prec%d)
     else
       info=psb_err_from_subroutine_
-      ch_err='psb_ilu_fct'
+      ch_err='psb_ilu0_fact'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
 
-  case(psb_f_none_) 
+  case(psb_f_ilu_k_)
+  ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on
+  ! the type of the variant of the algorithm the may be forgotten or added to
+  ! the diagonal (MILU)
+
+  if (allocated(prec%av)) then
+    if (size(prec%av) < psb_bp_ilu_avsz) then
+      do i=1,size(prec%av)
+        call prec%av(i)%free()
+      enddo
+      deallocate(prec%av,stat=info)
+    endif
+  end if
+  if (.not.allocated(prec%av)) then
+    allocate(prec%av(psb_max_avsz),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_alloc_dealloc_,name)
+      goto 9999
+    end if
+  endif
+
+  nrow_a = desc_a%get_local_rows()
+  nztota = a%get_nzeros()
+
+  n_col  = desc_a%get_local_cols()
+  nhalo  = n_col-nrow_a
+  n_row  = nrow_a
+
+  allocate(lf,uf,stat=info)
+  if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+  if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+  if(info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  allocate(dd(n_row),stat=info)
+  if (info == psb_success_) then
+    allocate(prec%dv, stat=info)
+    if (info == 0) then
+      if (present(vmold)) then
+        allocate(prec%dv%v,mold=vmold,stat=info)
+      else
+        allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
+      end if
+    end if
+  end if
+
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  endif
+  ! This is where we have no renumbering, thus no need
+  call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info)
+
+  if(info == psb_success_) then
+    call prec%av(psb_l_pr_)%mv_from(lf%a)
+    call prec%av(psb_u_pr_)%mv_from(uf%a)
+    call prec%av(psb_l_pr_)%set_asb()
+    call prec%av(psb_u_pr_)%set_asb()
+    call prec%av(psb_l_pr_)%trim()
+    call prec%av(psb_u_pr_)%trim()
+    call prec%dv%bld(dd)
+    ! call move_alloc(dd,prec%d)
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_iluk_fact'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  case(psb_f_ilu_t_)
+  ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill
+
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! This is where we have no renumbering, thus no need
+    call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_ainv_)
+    ! Approximate Inverse Factorizations based on variants of the incomplete
+    ! biconjugation algorithms
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invk_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(n) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invt_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(eps) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_d_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,&
+      & desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_none_)
     info=psb_err_from_subroutine_
     ch_err='Inconsistent prec  psb_f_none_'
     call psb_errpush(info,name,a_err=ch_err)
@@ -548,7 +1082,7 @@ subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     goto 9999
   end select
 
-  if (present(amold)) then 
+  if (present(amold)) then
     call prec%av(psb_l_pr_)%cscnv(info,mold=amold)
     call prec%av(psb_u_pr_)%cscnv(info,mold=amold)
   end if
@@ -567,8 +1101,8 @@ subroutine psb_d_bjac_precseti(prec,what,val,info)
   Implicit None
 
   class(psb_d_bjac_prec_type),intent(inout) :: prec
-  integer(psb_ipk_), intent(in)                      :: what 
-  integer(psb_ipk_), intent(in)                      :: val 
+  integer(psb_ipk_), intent(in)                      :: what
+  integer(psb_ipk_), intent(in)                      :: val
   integer(psb_ipk_), intent(out)                     :: info
   integer(psb_ipk_) :: err_act, nrow
   character(len=20)  :: name='d_bjac_precset'
@@ -576,34 +1110,33 @@ subroutine psb_d_bjac_precseti(prec,what,val,info)
   call psb_erractionsave(err_act)
 
   info = psb_success_
-  if (.not.allocated(prec%iprcparm)) then 
+  if (.not.allocated(prec%iprcparm)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
 
   select case(what)
-  case (psb_f_type_) 
-    if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_f_type_)
     prec%iprcparm(psb_f_type_)     = val
 
-  case (psb_ilu_fill_in_) 
-    if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.&
-         & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_ilu_fill_in_)
     prec%iprcparm(psb_ilu_fill_in_) = val
 
+  case (psb_ilu_ialg_)
+    prec%iprcparm(psb_ilu_ialg_) = val
+
+  case (psb_ilu_scale_)
+    prec%iprcparm(psb_ilu_scale_) = val
+
+  case (psb_ainv_alg_)
+    prec%iprcparm(psb_ainv_alg_) = val
+
+  case(psb_inv_fillin_)
+    prec%iprcparm(psb_inv_fillin_) = val
+
   case default
-    write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification'
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
 
   end select
 
@@ -613,3 +1146,45 @@ subroutine psb_d_bjac_precseti(prec,what,val,info)
 9999 call psb_error_handler(err_act)
   return
 end subroutine psb_d_bjac_precseti
+
+subroutine psb_d_bjac_precsetr(prec,what,val,info)
+
+  use psb_base_mod
+  use psb_d_bjacprec, psb_protect_name => psb_d_bjac_precsetr
+  Implicit None
+
+  class(psb_d_bjac_prec_type),intent(inout) :: prec
+  integer(psb_ipk_), intent(in)                      :: what
+  real(psb_dpk_), intent(in)                       :: val
+  integer(psb_ipk_), intent(out)                     :: info
+  integer(psb_ipk_) :: err_act, nrow
+  character(len=20)  :: name='d_bjac_precset'
+
+  call psb_erractionsave(err_act)
+
+  info = psb_success_
+  if (.not.allocated(prec%rprcparm)) then
+    info = 1124
+    call psb_errpush(info,name,a_err="preconditioner")
+    goto 9999
+  end if
+
+  select case(what)
+
+  case (psb_fact_eps_)
+    prec%rprcparm(psb_fact_eps_) = val
+
+  case (psb_inv_thresh_)
+    prec%rprcparm(psb_inv_thresh_) = val
+
+  case default
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
+
+  end select
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_bjac_precsetr
diff --git a/prec/impl/psb_d_ilu0_fact.f90 b/prec/impl/psb_d_ilu0_fact.f90
index 4ec9ffb1..478eedfa 100644
--- a/prec/impl/psb_d_ilu0_fact.f90
+++ b/prec/impl/psb_d_ilu0_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_dilu0_fact.f90
 !
 ! Subroutine: psb_dilu0_fact
@@ -75,7 +75,7 @@
 !
 !  This routine computes either the ILU(0) or the MILU(0) factorization of
 !  the diagonal blocks of a distributed matrix. These factorizations are used
-!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver, 
+!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a given level of a
 !  multilevel preconditioner.
 !
@@ -83,10 +83,10 @@
 !    Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition,
 !    SIAM, 2003, Chapter 10.
 !
-!  The local matrix is stored into a and blck, as specified in the description 
-!  of the arguments below. The storage format for both the L and U factors is CSR. 
-!  The diagonal of the U factor is stored separately (actually, the inverse of the 
-!  diagonal entries is stored; this is then managed in the solve stage associated 
+!  The local matrix is stored into a and blck, as specified in the description
+!  of the arguments below. The storage format for both the L and U factors is CSR.
+!  The diagonal of the U factor is stored separately (actually, the inverse of the
+!  diagonal entries is stored; this is then managed in the solve stage associated
 !  to the ILU(0)/MILU(0) factorization).
 !
 !  The routine copies and factors "on the fly" from a and blck into l (L factor),
@@ -94,7 +94,7 @@
 !
 !  This implementation of ILU(0)/MILU(0) is faster than the implementation in
 !  psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)).
-!  
+!
 !
 ! Arguments:
 !    ialg    -  integer, input.
@@ -121,7 +121,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                         
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_dspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -129,7 +129,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck is empty.
-!  
+!
 subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd)
 
   use psb_base_mod
@@ -157,30 +157,30 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(upd)) then 
-    upd_ = psb_toupper(upd) 
+  if (present(upd)) then
+    upd_ = psb_toupper(upd)
   else
     upd_ = 'F'
   end if
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -212,12 +212,12 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
     if(info.ne.0) then
@@ -226,7 +226,7 @@ subroutine psb_dilu0_fact(ialg,a,l,u,d,info,blck, upd)
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
-    deallocate(blck_) 
+    deallocate(blck_)
   endif
 
   call psb_erractionrestore(err_act)
@@ -243,9 +243,9 @@ contains
   ! Version:    real
   ! Note: internal subroutine of psb_dilu0_fact.
   !
-  !  This routine computes either the ILU(0) or the MILU(0) factorization of the 
-  !  diagonal blocks of a distributed matrix. 
-  !  These factorizations are used to build the 'base preconditioner' 
+  !  This routine computes either the ILU(0) or the MILU(0) factorization of the
+  !  diagonal blocks of a distributed matrix.
+  !  These factorizations are used to build the 'base preconditioner'
   !  (block-Jacobi preconditioner/solver, Additive Schwarz
   !  preconditioner) corresponding to a given level of a multilevel preconditioner.
   !
@@ -257,7 +257,7 @@ contains
   !
   !  The routine copies and factors "on the fly" from the sparse matrix structures a
   !  and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U).
-  !  
+  !
   !
   ! Arguments:
   !    ialg    -  integer, input.
@@ -296,7 +296,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -   real(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -305,18 +305,18 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output.
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output.
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_dilu0_factint(ialg,a,b,&
        & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info)
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)     :: ialg
@@ -332,7 +332,7 @@ contains
     real(psb_dpk_) :: dia,temp
     integer(psb_ipk_), parameter :: nrb=16
     type(psb_d_coo_sparse_mat) :: trw
-    integer(psb_ipk_)   :: int_err(5) 
+    integer(psb_ipk_)   :: int_err(5)
     character(len=20)   :: name, ch_err
 
     name='psb_dilu0_factint'
@@ -346,10 +346,10 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione,ialg,izero,izero,izero/))
       goto 9999
     end select
@@ -363,7 +363,7 @@ contains
     end if
     m = ma+mb
 
-    if (psb_toupper(upd) == 'F' ) then 
+    if (psb_toupper(upd) == 'F' ) then
       lirp(1) = 1
       uirp(1) = 1
       l1      = 0
@@ -379,14 +379,14 @@ contains
         if (i <= ma) then
           !
           ! Copy the i-th local row of the matrix, stored in a,
-          ! into lval/d(i)/uval 
+          ! into lval/d(i)/uval
           !
           call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
         else
           !
           ! Copy the i-th local row of the matrix, stored in b
-          ! (as (i-ma)-th row), into lval/d(i)/uval 
+          ! (as (i-ma)-th row), into lval/d(i)/uval
           !
           call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
@@ -437,7 +437,7 @@ contains
               !
               dia = dia - temp*uval(jj)
               cycle updateloop
-              !     
+              !
             else if (j > i) then
               !
               ! search u(i,*) (i-th row of U) for a matching index j
@@ -454,23 +454,23 @@ contains
                 end if
               enddo
             end if
-            !     
-            ! If we get here we missed the cycle updateloop, which means 
+            !
+            ! If we get here we missed the cycle updateloop, which means
             ! that this entry does not match; thus we accumulate on the
             ! diagonal for MILU(0).
             !
-            if (ialg == psb_milu_n_) then 
+            if (ialg == psb_milu_n_) then
               dia = dia - temp*uval(jj)
             end if
           enddo updateloop
         enddo
-        !     
+        !
         ! Check the pivot size
-        !     
+        !
         if (abs(dia) < d_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') abs(dia)
@@ -493,7 +493,7 @@ contains
     else
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
@@ -515,7 +515,7 @@ contains
   ! Version:    real
   ! Note: internal subroutine of psb_dilu0_fact
   !
-  !  This routine copies a row of a sparse matrix A, stored in the psb_dspmat_type 
+  !  This routine copies a row of a sparse matrix A, stored in the psb_dspmat_type
   !  data structure a, into the arrays lval and uval and into the scalar variable
   !  dia, corresponding to the lower and upper triangles of A and to the diagonal
   !  entry of the row, respectively. The entries in lval and uval are stored
@@ -529,14 +529,14 @@ contains
   !
   !  The routine is used by psb_dilu0_factint in the computation of the ILU(0)/MILU(0)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !  TODO: modify the routine to allow copying into output L and U that are
   !  already filled with indices; this would allow computing an ILU(k) pattern,
-  !  then use the ILU(0) internal for subsequent calls with the same pattern. 
+  !  then use the ILU(0) internal for subsequent calls with the same pattern.
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from  the 
+  !               The local index of the row to be extracted from  the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -570,13 +570,13 @@ contains
   !               to the CSR storage format.
   !    uval   -  real(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
-  !               upper triangle are copied. 
+  !               upper triangle are copied.
   !    ktrw    -  integer, input/output.
   !               The index identifying the last entry taken from the
   !               staging buffer trw. See below.
   !    trw     -  type(psb_dspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -608,10 +608,10 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    if (psb_toupper(upd) == 'F') then 
+    if (psb_toupper(upd) == 'F') then
 
-      select type(aa => a%a) 
-      type is (psb_d_csr_sparse_mat) 
+      select type(aa => a%a)
+      type is (psb_d_csr_sparse_mat)
 
         !
         ! Take a fast shortcut if the matrix is stored in CSR format
@@ -633,19 +633,19 @@ contains
           end if
         enddo
 
-      class default 
+      class default
 
         !
-        ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+        ! Otherwise use psb_sp_getblk, slower but able (in principle) of
         ! handling any format. In this case, a block of rows is extracted
         ! instead of a single row, for performance reasons, and these
         ! rows are copied one by one into lval, dia, uval, through
         ! successive calls to ilu_copyin.
         !
 
-        if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+        if ((mod(i,nrb) == 1).or.(nrb == 1)) then
           irb = min(m-i+1,nrb)
-          call aa%csget(i,i+irb-1,trw,info) 
+          call aa%csget(i,i+irb-1,trw,info)
           if(info /= psb_success_) then
             info=psb_err_from_subroutine_
             ch_err='csget'
@@ -656,7 +656,7 @@ contains
         end if
 
         nz = trw%get_nzeros()
-        do 
+        do
           if (ktrw > nz) exit
           if (trw%ia(ktrw) > i) exit
           k = trw%ja(ktrw)
@@ -680,7 +680,7 @@ contains
 
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
diff --git a/prec/impl/psb_d_iluk_fact.f90 b/prec/impl/psb_d_iluk_fact.f90
index 6d644e42..544ec987 100644
--- a/prec/impl/psb_d_iluk_fact.f90
+++ b/prec/impl/psb_d_iluk_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_diluk_fact.f90
 !
 ! Subroutine: psb_diluk_fact
@@ -74,7 +74,7 @@
 ! Contains:   psb_diluk_factint, iluk_copyin, iluk_fact, iluk_copyout.
 !
 !  This routine computes either the ILU(k) or the MILU(k) factorization of the
-!  diagonal blocks of a distributed matrix. These factorizations are used to 
+!  diagonal blocks of a distributed matrix. These factorizations are used to
 !  build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a certain level of a
 !  multilevel preconditioner.
@@ -88,7 +88,7 @@
 !  U factors is CSR. The diagonal of the U factor is stored separately (actually,
 !  the inverse of the diagonal entries is stored; this is then managed in the solve
 !  stage associated to the ILU(k)/MILU(k) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -118,7 +118,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_dspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -126,7 +126,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   use psb_base_mod
@@ -143,7 +143,7 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   real(psb_dpk_), intent(inout)    ::  d(:)
   !     Local Variables
   integer(psb_ipk_)   :: l1, l2, m, err_act
-  
+
   type(psb_dspmat_type), pointer  :: blck_
   type(psb_d_csr_sparse_mat)      :: ll, uu
   character(len=20)   :: name, ch_err
@@ -152,17 +152,17 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') 
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
@@ -170,7 +170,7 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -203,15 +203,15 @@ subroutine psb_diluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -280,7 +280,7 @@ contains
   !               according to the CSR storage format.
   !    lia2    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in laspk, according to the CSR storage format. 
+  !               of the L factor in laspk, according to the CSR storage format.
   !    uval   -   real(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -289,12 +289,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in laspk.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_diluk_factint(fill_in,ialg,a,b,&
@@ -304,7 +304,7 @@ contains
 
     implicit none
 
-  ! Arguments 
+  ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in, ialg
     type(psb_dspmat_type),intent(in)              :: a,b
     integer(psb_ipk_),intent(inout)               :: l1,l2,info
@@ -330,14 +330,14 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name,&
            & i_err=(/itwo,ialg,izero,izero,izero/))
       goto 9999
     end select
-    if (fill_in < 0) then 
+    if (fill_in < 0) then
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name, &
            & i_err=(/ione,fill_in,izero,izero,izero/))
@@ -349,7 +349,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the iluk_copyin function 
+    ! Allocate a temporary buffer for the iluk_copyin function
     !
 
     call trw%allocate(izero,izero,ione)
@@ -361,7 +361,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -381,34 +381,34 @@ contains
     uplevs(:)  = m+1
     row(:)     = dzero
     rowlevs(:) = -(m+1)
-    
+
     !
     ! Cycle over the matrix rows
     !
     do i = 1, m
-      
+
       !
       ! At each iteration of the loop we keep in a heap the column indices
       ! affected by the factorization. The heap is initialized and filled
       ! in the iluk_copyin routine, and updated during the elimination, in
       ! the iluk_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = dzero
       if (i<=ma) then
         !
-        ! Copy into trw the i-th local row of the matrix, stored in a 
-        ! 
+        ! Copy into trw the i-th local row of the matrix, stored in a
+        !
         call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info)
       else
         !
         ! Copy into trw the i-th local row of the matrix, stored in b
-        ! (as (i-ma)-th row) 
-        ! 
+        ! (as (i-ma)-th row)
+        !
         call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info)
       endif
-      
+
       ! Do an elimination step on the current row. It turns out we only
       ! need to keep track of fill levels for the upper triangle, hence we
       ! do not have a lowlevs variable.
@@ -417,7 +417,7 @@ contains
            & d,uja,uirp,uval,uplevs,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info)
       if (info /= psb_success_) then
@@ -474,11 +474,11 @@ contains
   !
   !  This routine is used by psb_diluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -510,7 +510,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_dspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -521,8 +521,8 @@ contains
     use psb_base_mod
 
     implicit none
-  
-  ! Arguments 
+
+  ! Arguments
     type(psb_dspmat_type), intent(in)         :: a
     type(psb_d_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)           :: i,m,jmin,jmax
@@ -542,17 +542,17 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    call heap%init(info) 
+    call heap%init(info)
 
-    select type (aa=> a%a) 
-    type is (psb_d_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_d_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
-      
+
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = aa%val(j)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -562,14 +562,14 @@ contains
     class default
 
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to iluk_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -581,11 +581,11 @@ contains
         ktrw=1
       end if
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -674,10 +674,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i, fill_in
     integer(psb_ipk_), intent(inout)              :: nidx,info
     integer(psb_ipk_), intent(inout)              :: rowlevs(:)
@@ -690,7 +690,7 @@ contains
     real(psb_dpk_)   :: rwk
 
     info = psb_success_
-    if (.not.allocated(idxs)) then 
+    if (.not.allocated(idxs)) then
       allocate(idxs(200),stat=info)
       if (info /= psb_success_) return
     endif
@@ -702,34 +702,34 @@ contains
     !
     do
       ! Beware: (iret < 0) means that the heap is empty, not an error.
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) return
 
-      ! 
+      !
       ! Just in case an index has been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       nidx = nidx + 1
-      if (nidx>size(idxs)) then 
+      if (nidx>size(idxs)) then
         call psb_realloc(nidx+psb_heap_resize,idxs,info)
         if (info /= psb_success_) return
       end if
       idxs(nidx) = k
-      
-      if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then 
+
+      if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
         !
         ! Note: since U is scaled while copying it out (see iluk_copyout),
         ! we can use rwk in the update below
-        ! 
+        !
         rwk    = row(k)
-        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)          
+        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)
         lrwk   = rowlevs(k)
-          
+
         do jj=uirp(k),uirp(k+1)-1
           j = uja(jj)
-          if (j<=k) then 
+          if (j<=k) then
             info = -i
             return
           endif
@@ -738,9 +738,9 @@ contains
           ! The fill levels are initialized to a negative value. If we find
           ! one, it means that it is an as yet untouched index, so we need
           ! to insert it; otherwise it is already on the heap, there is no
-          ! need to insert it more than once. 
+          ! need to insert it more than once.
           !
-          if (rowlevs(j)<0) then 
+          if (rowlevs(j)<0) then
             call heap%insert(j,info)
             if (info /= psb_success_) return
             rowlevs(j) = abs(rowlevs(j))
@@ -781,7 +781,7 @@ contains
   !
   !  This routine is used by psb_diluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -822,14 +822,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_diluk_factint), according to the CSR storage format.
   !    lval   -  real(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  real(psb_dpk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_diluk_factint), according
@@ -850,7 +850,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                  :: fill_in, ialg, i, m, nidx
@@ -880,17 +880,17 @@ contains
       if (j<i) then
         !
         ! Copy the lower part of the row
-        !  
-        if (rowlevs(j) <= fill_in) then 
-          l1     = l1 + 1 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l1     = l1 + 1
           if (size(lval) < l1) then
-            ! 
+            !
             ! Figure out a good reallocation size!
-            ! 
+            !
             isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-            call psb_realloc(isz,lval,info) 
-            if (info == psb_success_) call psb_realloc(isz,lja,info) 
-            if (info /= psb_success_) then 
+            call psb_realloc(isz,lval,info)
+            if (info == psb_success_) call psb_realloc(isz,lja,info)
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -914,26 +914,26 @@ contains
         !
         ! Copy the diagonal entry of the row and re-initialize
         ! row(j) and rowlevs(j)
-        !  
-        d(i)       = d(i) + row(i) 
+        !
+        d(i)       = d(i) + row(i)
         row(i)     = dzero
         rowlevs(i) = -(m+1)
 
-      else if (j>i) then 
+      else if (j>i) then
         !
         ! Copy the upper part of the row
-        ! 
-        if (rowlevs(j) <= fill_in) then 
-          l2     = l2 + 1 
-          if (size(uval) < l2) then 
-            ! 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l2     = l2 + 1
+          if (size(uval) < l2) then
+            !
             ! Figure out a good reallocation size!
             !
             isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-            call psb_realloc(isz,uval,info) 
-            if (info == psb_success_) call psb_realloc(isz,uja,info) 
+            call psb_realloc(isz,uval,info)
+            if (info == psb_success_) call psb_realloc(isz,uja,info)
             if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1))
-            if (info /= psb_success_) then 
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -941,11 +941,11 @@ contains
           end if
           uja(l2)   = j
           uval(l2)  = row(j)
-          uplevs(l2) = rowlevs(j) 
+          uplevs(l2) = rowlevs(j)
         else if (ialg == psb_milu_n_) then
           !
           ! MILU(k): add discarded entries to the diagonal one
-          ! 
+          !
           d(i) = d(i) + row(j)
         end if
         !
@@ -963,13 +963,13 @@ contains
     lirp(i+1) = l1 + 1
     uirp(i+1) = l2 + 1
 
-    !     
+    !
     ! Check the pivot size
     !
     if (abs(d(i)) < d_epstol) then
       !
       ! Too small pivot: unstable factorization
-      !     
+      !
       info = psb_err_pivot_too_small_
       int_err(1) = i
       write(ch_err,'(g20.10)') d(i)
diff --git a/prec/impl/psb_d_ilut_fact.f90 b/prec/impl/psb_d_ilut_fact.f90
index bcd26396..6c2dc698 100644
--- a/prec/impl/psb_d_ilut_fact.f90
+++ b/prec/impl/psb_d_ilut_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_dilut_fact.f90
 !
 ! Subroutine: psb_dilut_fact
@@ -87,7 +87,7 @@
 !  CSR. The diagonal of the U factor is stored separately (actually, the
 !  inverse of the diagonal entries is stored; this is then managed in the
 !  solve stage associated to the ILU(k,t) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -114,7 +114,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_dspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -122,9 +122,9 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
-  
+
   use psb_base_mod
   use psb_d_ilu_fact_mod, psb_protect_name => psb_dilut_fact
 
@@ -141,7 +141,7 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   integer(psb_ipk_), intent(in), optional       :: iscale
   !     Local Variables
   integer(psb_ipk_)   ::  l1, l2, m, err_act, iscale_
-  
+
   type(psb_dspmat_type), pointer  :: blck_
   type(psb_d_csr_sparse_mat)       :: ll, uu
   real(psb_dpk_)      :: scale
@@ -151,34 +151,34 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  if (fill_in < 0) then 
+  if (fill_in < 0) then
     info=psb_err_input_asize_invalid_i_
     call psb_errpush(info,name, &
          & i_err=(/ione,fill_in,izero,izero,izero/))
     goto 9999
   end if
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(iscale)) then 
+  if (present(iscale)) then
     iscale_ = iscale
   else
     iscale_ = psb_ilu_scale_none_
   end if
-  
-  select case(iscale_) 
+
+  select case(iscale_)
   case(psb_ilu_scale_none_)
     scale = sone
   case(psb_ilu_scale_maxval_)
@@ -189,10 +189,10 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
     call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/))
     goto 9999
   end select
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -225,15 +225,15 @@ subroutine psb_dilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -298,7 +298,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -  real(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -307,12 +307,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_dilut_factint(fill_in,thres,a,b,&
@@ -320,7 +320,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in
@@ -355,7 +355,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the ilut_copyin function 
+    ! Allocate a temporary buffer for the ilut_copyin function
     !
     call trw%allocate(izero,izero,ione)
     if (info == psb_success_) call psb_ensure_size(m+1,lirp,info)
@@ -366,7 +366,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -396,10 +396,10 @@ contains
       ! in the ilut_copyin function, and updated during the elimination, in
       ! the ilut_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = czero
-      if (i<=ma) then 
+      if (i<=ma) then
         call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,&
              & row,heap,ktrw,trw,info)
       else
@@ -414,7 +414,7 @@ contains
            & d,uja,uirp,uval,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,&
            & nlw,nup,jmaxup,nrmi,row,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info)
@@ -429,7 +429,7 @@ contains
     !
     ! Adjust diagonal accounting for scale factor
     !
-    if (weight /= sone) then 
+    if (weight /= sone) then
       d(1:m) = d(1:m)*weight
     end if
 
@@ -469,7 +469,7 @@ contains
   !  - storing into a heap the column indices of the nonzero entries of the copied
   !    row;
   !  - computing the column index of the first entry with maximum absolute value
-  !    in the part of the row belonging to the upper triangle;            
+  !    in the part of the row belonging to the upper triangle;
   !  - computing the 2-norm of the row.
   !  The output array row is such that it contains a full row of A, i.e. it contains
   !  also the zero entries of the row. This is useful for the elimination step
@@ -482,11 +482,11 @@ contains
   !
   !  This routine is used by psb_dilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -533,7 +533,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_dspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -542,7 +542,7 @@ contains
   subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,&
        & nrmi,weight,row,heap,ktrw,trw,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_dspmat_type), intent(in)         :: a
     type(psb_d_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)               :: i, m,jmin,jmax,jd
@@ -584,29 +584,29 @@ contains
     dmaxup = szero
     nrmi   = szero
 
-    select type (aa=> a%a) 
-    type is (psb_d_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_d_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
 
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
-          row(k)     = aa%val(j)*weight 
+        if ((jmin<=k).and.(k<=jmax)) then
+          row(k)     = aa%val(j)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
           end if
         end if
       end do
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         info=psb_err_from_subroutine_
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
@@ -617,16 +617,16 @@ contains
 
 
     class default
-      
+
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to ilut_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -639,18 +639,18 @@ contains
 
       kin = ktrw
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
@@ -734,10 +734,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i
     integer(psb_ipk_), intent(inout)              :: nidx,info
     real(psb_dpk_), intent(in)                     :: thres,nrmi
@@ -753,23 +753,23 @@ contains
     call psb_ensure_size(200*ione,idxs,info)
     if (info /= psb_success_) return
     nidx  = 0
-    lastk = -1 
+    lastk = -1
     !
     ! Do while there are indices to be processed
     !
     do
 
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) exit
 
-      ! 
+      !
       ! An index may have been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       lowert: if (k<i)  then
- 
+
         !
         ! Dropping rule based on the threshold: compare the absolute
         ! value of each updated entry of row with thres * 2-norm of row.
@@ -777,7 +777,7 @@ contains
         rwk    = row(k)
         row(k) = row(k) * d(k)
         if (abs(row(k)) < thres*nrmi) then
-          ! 
+          !
           ! Drop the entry.
           !
           row(k) = czero
@@ -786,11 +786,11 @@ contains
           !
           ! Note: since U is scaled while copying it out (see ilut_copyout),
           ! we can use rwk in the update below.
-          !           
+          !
           do jj=uirp(k),uirp(k+1)-1
             j = uja(jj)
-            if (j<=k) then 
-              info = -i 
+            if (j<=k) then
+              info = -i
               return
             endif
             !
@@ -799,7 +799,7 @@ contains
             !
             row(j)     = row(j) - rwk * uval(jj)
             if (abs(row(j)) < thres*nrmi) then
-              ! 
+              !
               ! Drop the entry.
               !
               row(j) = czero
@@ -818,7 +818,7 @@ contains
       ! If we get here it is an index we need to keep on copyout.
       !
       nidx       = nidx + 1
-      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)      
+      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
       if (info /= psb_success_) return
       idxs(nidx) = k
 
@@ -852,7 +852,7 @@ contains
   !
   !  This routine is used by psb_dilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -895,14 +895,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_dilut_factint), according to the CSR storage format.
   !    lval   -  real(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  real(psb_dpk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_dilut_factint), according
@@ -921,7 +921,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                       :: fill_in,i,m,nidx,nlw,nup,jmaxup
@@ -950,22 +950,22 @@ contains
     end if
 
     !
-    ! Here we need to apply also the dropping rule base on the fill-in. 
+    ! Here we need to apply also the dropping rule base on the fill-in.
     ! We do it by putting into a heap the elements that are not dropped
-    ! by using the 2-norm rule, and then copying them out. 
+    ! by using the 2-norm rule, and then copying them out.
     !
     ! The heap goes down on the entry absolute value, so the first item
-    ! is the largest absolute value. 
+    ! is the largest absolute value.
     !
 
     call heap%init(info,dir=psb_asort_down_)
 
     if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info=psb_err_alloc_request_
       call psb_errpush(info,name,i_err=(/3*nidx,izero,izero,izero,izero/),&
            & a_err='real(psb_dpk_)')
-      goto 9999      
+      goto 9999
     end if
 
     !
@@ -976,7 +976,7 @@ contains
     idxp = 0
 
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       if (idxs(idxp) >= i) exit
@@ -987,8 +987,8 @@ contains
       !
       if (abs(witem) < thres*nrmi) cycle
 
-      nz       = nz + 1 
-      xw(nz)   = witem 
+      nz       = nz + 1
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1000,9 +1000,9 @@ contains
 
     !
     ! Now we have to take out the first nlw+fill_in entries
-    ! 
+    !
     if (nz <= nlw+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw, and it is already ordered
       !
     else
@@ -1014,7 +1014,7 @@ contains
           call psb_errpush(info,name,a_err='psb_heap_get_first')
           goto 9999
         end if
-      
+
         xw(k)   = witem
         xwid(k) = widx
       end do
@@ -1029,15 +1029,15 @@ contains
     ! Copy out the lower part of the row
     !
     do k=1,nz
-      l1     = l1 + 1 
+      l1     = l1 + 1
       if (size(lval) < l1) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-        call psb_realloc(isz,lval,info) 
-        if (info == psb_success_) call psb_realloc(isz,lja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,lval,info)
+        if (info == psb_success_) call psb_realloc(isz,lja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
@@ -1046,25 +1046,25 @@ contains
       lja(l1)   = xwid(k)
       lval(l1)  = xw(indx(k))
     end do
-    
+
     !
     ! Make sure idxp points to the diagonal entry
     !
-    if (idxp <= size(idxs)) then 
-      if (idxs(idxp) < i) then 
-        do 
+    if (idxp <= size(idxs)) then
+      if (idxs(idxp) < i) then
+        do
           idxp = idxp + 1
           if (idxp > nidx) exit
           if (idxs(idxp) >= i) exit
         end do
       end if
     end if
-    if (idxp > size(idxs)) then 
+    if (idxp > size(idxs)) then
 !!$      write(0,*) 'Warning: missing diagonal element in the row '
     else
-      if (idxs(idxp) > i) then 
+      if (idxs(idxp) > i) then
 !!$        write(0,*) 'Warning: missing diagonal element in the row '
-      else if (idxs(idxp) /= i) then 
+      else if (idxs(idxp) /= i) then
 !!$        write(0,*) 'Warning: impossible error: diagonal has vanished'
       else
         !
@@ -1076,7 +1076,7 @@ contains
         if (abs(d(i)) < d_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') d(i)
@@ -1092,7 +1092,7 @@ contains
     end if
 
     !
-    ! Now the upper part 
+    ! Now the upper part
     !
 
     call heap%init(info,dir=psb_asort_down_)
@@ -1104,15 +1104,15 @@ contains
 
     nz       = 0
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       widx      = idxs(idxp)
-      if (widx <= i) then 
+      if (widx <= i) then
 !!$        write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
         cycle
       end if
-      if (widx > m) then 
+      if (widx > m) then
 !!$        write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp)
         cycle
       end if
@@ -1120,12 +1120,12 @@ contains
       !
       ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
       !
-      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then 
-        cycle 
+      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then
+        cycle
       end if
 
       nz       = nz + 1
-      xw(nz)   = witem 
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1133,7 +1133,7 @@ contains
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
       end if
-      
+
     end do
 
     !
@@ -1141,7 +1141,7 @@ contains
     ! we include entry jmaxup.
     !
     if (nz <= nup+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw
       !
       fndmaxup=.true.
@@ -1155,13 +1155,13 @@ contains
         if (widx == jmaxup) fndmaxup=.true.
       end do
     end if
-    if ((i<jmaxup).and.(jmaxup<=m)) then 
-      if (.not.fndmaxup) then 
-        ! 
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
         ! Include entry jmaxup, if it is not already there.
-        ! Put it in the place of the smallest coefficient. 
+        ! Put it in the place of the smallest coefficient.
         !
-        xw(nz)   = row(jmaxup) 
+        xw(nz)   = row(jmaxup)
         xwid(nz) = jmaxup
       endif
     end if
@@ -1175,15 +1175,15 @@ contains
     ! Copy out the upper part of the row
     !
     do k=1,nz
-      l2     = l2 + 1 
+      l2     = l2 + 1
       if (size(uval) < l2) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-        call psb_realloc(isz,uval,info) 
-        if (info == psb_success_) call psb_realloc(isz,uja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,uval,info)
+        if (info == psb_success_) call psb_realloc(isz,uja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
diff --git a/prec/impl/psb_d_invk_fact.f90 b/prec/impl/psb_d_invk_fact.f90
new file mode 100644
index 00000000..f1a09985
--- /dev/null
+++ b/prec/impl/psb_d_invk_fact.f90
@@ -0,0 +1,495 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_d_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_d_invk_fact_mod, psb_protect_name =>  psb_d_invk_bld
+  use psb_d_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_dspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fill1, fill2
+  type(psb_dspmat_type), intent(inout)        :: lmat, umat
+  real(psb_dpk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_dspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_)      :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_dspmat_type)  :: atmp
+  real(psb_dpk_), allocatable :: pq(:), pd(:)
+  integer(psb_ipk_), allocatable :: uplevs(:)
+  integer(psb_ipk_)   :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  character(len=20)   :: name, ch_err
+
+
+  info = psb_success_
+  name='psb_dinvk_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+
+  call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+       !,uplevs=uplevs)
+  !call psb_diluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+
+  !
+  ! Compute the aprox U^-1  and L^-1
+  !
+  call psb_sparse_invk(n_row,umat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invk_bld
+
+subroutine psb_dsparse_invk(n,a,z,fill_in,info,inlevs)
+
+  use psb_base_mod
+  use psb_d_invk_fact_mod, psb_protect_name => psb_dsparse_invk
+
+  integer(psb_ipk_), intent(in)           :: n
+  type(psb_dspmat_type), intent(in)       :: a
+  type(psb_dspmat_type), intent(inout)    :: z
+  integer(psb_ipk_), intent(in)           :: fill_in
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: inlevs(:)
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), iz(:), jz(:)
+  real(psb_dpk_), allocatable :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable        :: uplevs(:), rowlevs(:), idxs(:)
+  real(psb_dpk_), allocatable :: row(:)
+  type(psb_d_coo_sparse_mat)  :: trw
+  type(psb_d_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx
+  type(psb_i_heap)            :: heap
+
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_sp_invk'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  allocate(uplevs(acsr%get_nzeros()),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='Allocate')
+    goto 9999
+  end if
+  uplevs(:)  = 0
+  row(:)     = dzero
+  rowlevs(:) = -(n+1)
+
+  call zcsr%allocate(n,n,n*fill_in)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  call psb_ensure_size(n+1, idxs,  info)
+
+
+  !
+  !
+  zcsr%irp(1)  = 1
+  nzz          = 0
+
+  l2 = 0
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,&
+         & sign=-done,inlevs=inlevs)
+    row(i)     = done
+    rowlevs(i) = 0
+
+    ! Update loop
+    call psb_invk_inv(fill_in,i,row,rowlevs,heap,&
+         & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info)
+
+    call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,&
+         & l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = done
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+  call zcsr%set_sorted()
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_dsparse_invk
+
+subroutine psb_d_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs)
+
+  use psb_base_mod
+  use psb_d_invk_fact_mod, psb_protect_name => psb_d_invk_copyin
+
+  implicit none
+
+  ! Arguments
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+  integer(psb_ipk_), intent(inout)          :: ktrw,info
+  integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+  real(psb_dpk_), intent(inout)             :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_dpk_), optional, intent(in)      :: sign
+  integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+
+  ! Local variables
+  integer(psb_ipk_)             :: k,j,irb,err_act, nz
+  integer(psb_ipk_), parameter  :: nrb=16
+  real(psb_dpk_)                :: sign_
+  character(len=20), parameter  :: name='invk_copyin'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+
+  if (present(sign)) then
+    sign_ = sign
+  else
+    sign_ = done
+  end if
+
+
+  !
+  ! Take a fast shortcut if the matrix is stored in CSR format
+  !
+  if (present(inlevs)) then
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = inlevs(j)
+        call heap%insert(k,info)
+      end if
+    end do
+  else
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = 0
+        call heap%insert(k,info)
+      end if
+    end do
+  end if
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invk_copyin
+
+
+subroutine psb_d_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+     &  l2,uia1,uia2,uaspk,info)
+
+  use psb_base_mod
+  use psb_d_invk_fact_mod, psb_protect_name => psb_d_invk_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+  real(psb_dpk_), allocatable, intent(inout)    :: uaspk(:)
+  real(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_)              :: j,isz,err_act,int_err(5),idxp
+  character(len=20), parameter  :: name='psb_diluk_factint'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  do idxp=1,nidx
+
+    j = idxs(idxp)
+
+    if (j>=i) then
+      !
+      ! Copy the upper part of the row
+      !
+      if (rowlevs(j) <= fill_in) then
+        l2     = l2 + 1
+        if (size(uaspk) < l2) then
+          !
+          ! Figure out a good reallocation size!
+          !
+          isz  = max(int(1.2*l2),l2+100)
+          call psb_realloc(isz,uaspk,info)
+          if (info == psb_success_) call psb_realloc(isz,uia1,info)
+          if (info /= psb_success_) then
+            info=psb_err_from_subroutine_
+            call psb_errpush(info,name,a_err='Allocate')
+            goto 9999
+          end if
+        end if
+        uia1(l2)   = j
+        uaspk(l2)  = row(j)
+      end if
+      !
+      ! Re-initialize row(j) and rowlevs(j)
+      !
+      row(j)     = dzero
+      rowlevs(j) = -(m+1)
+    end if
+  end do
+
+  uia2(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invk_copyout
+
+subroutine psb_dinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,&
+     & nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_d_invk_fact_mod, psb_protect_name => psb_dinvk_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)               :: heap
+  integer(psb_ipk_), intent(in)                 :: i, fill_in
+  integer(psb_ipk_), intent(inout)              :: nidx,info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)                 :: ja(:),irp(:),uplevs(:)
+  real(psb_dpk_), intent(in)                    :: val(:)
+  real(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret
+  real(psb_dpk_)    :: rwk
+
+
+  info = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+    ! Beware: (iret < 0) means that the heap is empty, not an error.
+    call heap%get_first(k,iret)
+    if (iret < 0) then
+!!$        write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk
+      return
+    end if
+
+    !
+    ! Just in case an index has been put on the heap more than once.
+    !
+    if (k == lastk) cycle
+
+    lastk = k
+    nidx = nidx + 1
+    if (nidx>size(idxs)) then
+      call psb_realloc(nidx+psb_heap_resize,idxs,info)
+      if (info /= psb_success_) return
+    end if
+    idxs(nidx) = k
+
+    if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in)) then
+      !
+      ! Note: since U is scaled while copying it out (see iluk_copyout),
+      ! we can use rwk in the update below
+      !
+      rwk    = row(k)
+      lrwk   = rowlevs(k)
+
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Insert the index into the heap for further processing.
+        ! The fill levels are initialized to a negative value. If we find
+        ! one, it means that it is an as yet untouched index, so we need
+        ! to insert it; otherwise it is already on the heap, there is no
+        ! need to insert it more than once.
+        !
+        if (rowlevs(j)<0) then
+          call heap%insert(j,info)
+          if (info /= psb_success_) return
+          rowlevs(j) = abs(rowlevs(j))
+        end if
+        !
+        ! Update row(j) and the corresponding fill level
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
+      end do
+
+    end if
+  end do
+
+end subroutine psb_dinvk_inv
diff --git a/prec/impl/psb_d_invt_fact.f90 b/prec/impl/psb_d_invt_fact.f90
new file mode 100644
index 00000000..83aa73ff
--- /dev/null
+++ b/prec/impl/psb_d_invt_fact.f90
@@ -0,0 +1,739 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_d_invt_bld(a,fillin,invfill,thresh,invthresh,&
+     & lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name =>  psb_d_invt_bld
+  use psb_d_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_dspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fillin,invfill
+  real(psb_dpk_), intent(in)                :: thresh
+  real(psb_dpk_), intent(in)                  :: invthresh
+  type(psb_dspmat_type), intent(inout)        :: lmat, umat
+  real(psb_dpk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_dspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_dspmat_type)       :: atmp
+  real(psb_dpk_), allocatable :: pq(:), pd(:), w(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  real(psb_dpk_)    :: sp_thresh
+  character(len=20) :: name, ch_err, fname
+
+
+  info = psb_success_
+  name='psb_dinvt_fact'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),w(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+  if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,&
+       & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  if (.false.) then
+!!$    if (debug_level >= psb_debug_inner_) then
+    write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx'
+    call lmat%print(fname,head="INVTLOW")
+    write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx'
+    call umat%print(fname,head="INVTUPP")
+  end if
+
+  !
+  ! Compute the approx U^-1  and L^-1
+  !
+  nzrmax    = invfill
+  call psb_dsparse_invt(n_row,umat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_dsparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invt_bld
+
+subroutine psb_dsparse_invt(n,a,z,nzrmax,sp_thresh,info)
+
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_dsparse_invt
+
+  implicit none
+  integer(psb_ipk_), intent(in)        :: n
+  type(psb_dspmat_type), intent(in)    :: a
+  type(psb_dspmat_type), intent(inout) :: z
+  integer(psb_ipk_), intent(in)        :: nzrmax
+  real(psb_dpk_), intent(in)           :: sp_thresh
+  integer(psb_ipk_), intent(out)       :: info
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:)
+  real(psb_dpk_), allocatable    :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:)
+  real(psb_dpk_), allocatable :: row(:)
+  type(psb_d_coo_sparse_mat)  :: trw
+  type(psb_d_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx, nlw,nup,jmaxup
+  type(psb_i_heap)            :: heap
+  real(psb_dpk_)     :: alpha, nrmi
+  character(len=20)  :: name='psb_sp_invt'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  call zcsr%allocate(n,n,n*nzrmax)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  !
+  !
+  nzz        = 0
+  row(:)     = dzero
+  rowlevs(:) = 0
+  l2         = 0
+  zcsr%irp(1) = 1
+
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_d_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,&
+         & heap,rowlevs,ktrw,trw,info,sign=-done)
+    if (info /= 0) exit
+    row(i) = done
+    ! Adjust norm
+    if (nrmi < done) then
+      nrmi = sqrt(done + nrmi**2)
+    else
+      nrmi = nrmi*sqrt(done+done/(nrmi**2))
+    end if
+
+    call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,&
+         & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info)
+    if (info /= 0) exit
+!!$    write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2
+    call psb_d_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,&
+         & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+    if (info /= 0) exit
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = done
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_dsparse_invt
+
+subroutine psb_d_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+     & irwt,ktrw,trw,info,sign)
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin
+  implicit none
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+  integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+  integer(psb_ipk_), intent(inout)          :: irwt(:)
+  real(psb_dpk_), intent(inout)          :: nrmi
+  real(psb_dpk_), intent(inout)            :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_dpk_), intent(in), optional    :: sign
+  !
+  integer(psb_ipk_)            :: k,j,irb,kin,nz, err_act
+  integer(psb_ipk_), parameter :: nrb=16
+  real(psb_dpk_)               :: dmaxup, sign_
+  real(psb_dpk_), external     :: dnrm2
+  character(len=20), parameter :: name='invt_copyin'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+  sign_ = done
+  if (present(sign)) sign_ = sign
+  !
+  ! nrmi is the norm of the current sparse row (for the time being,
+  ! we use the 2-norm).
+  ! NOTE: the 2-norm below includes also elements that are outside
+  ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED.
+  !
+
+  nlw    = 0
+  nup    = 0
+  jmaxup = 0
+  dmaxup = dzero
+  nrmi   = dzero
+
+  do j = a%irp(i), a%irp(i+1) - 1
+    k = a%ja(j)
+    if ((jmin<=k).and.(k<=jmax)) then
+      row(k)     = sign_ * a%val(j)
+      call heap%insert(k,info)
+      irwt(k) = 1
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+    end if
+    if (k<jd) nlw = nlw + 1
+    if (k>jd) then
+      nup = nup + 1
+      if (abs(row(k))>dmaxup) then
+        jmaxup = k
+        dmaxup = abs(row(k))
+      end if
+    end if
+  end do
+  nz   = a%irp(i+1) - a%irp(i)
+  nrmi = dnrm2(nz,a%val(a%irp(i):),ione)
+
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invt_copyin
+
+subroutine psb_d_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+     & nidx,idxs,l2,ja,irp,val,info)
+
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+  integer(psb_ipk_), intent(in)                 :: idxs(:)
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+  real(psb_dpk_), intent(in)                    :: thres,nrmi
+  real(psb_dpk_),allocatable, intent(inout)     :: val(:)
+  real(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  real(psb_dpk_),allocatable     :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  real(psb_dpk_)                 :: witem, wmin
+  integer(psb_ipk_)              :: widx
+  integer(psb_ipk_)              :: k,isz,err_act,int_err(5),idxp, nz
+  type(psb_d_idx_heap)           :: heap
+  character(len=20), parameter   :: name='invt_copyout'
+  character(len=20)              :: ch_err
+  logical                        :: fndmaxup
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  !
+  ! Here we need to apply also the dropping rule base on the fill-in.
+  ! We do it by putting into a heap the elements that are not dropped
+  ! by using the 2-norm rule, and then copying them out.
+  !
+  ! The heap goes down on the entry absolute value, so the first item
+  ! is the largest absolute value.
+  !
+!!$  write(0,*) 'invt_copyout ',nidx,nup+fill_in
+  call heap%init(info,dir=psb_asort_down_)
+
+  if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_alloc_request_
+    call psb_errpush(info,name,i_err=(/3*nidx/),&
+         & a_err='real(psb_dpk_)')
+    goto 9999
+  end if
+
+  !
+  ! First the lower part
+  !
+
+  nz   = 0
+  idxp = 0
+
+  do
+
+    idxp = idxp + 1
+    if (idxp > nidx) exit
+    if (idxs(idxp) >= i) exit
+    widx      = idxs(idxp)
+    witem     = row(widx)
+    !
+    ! Dropping rule based on the 2-norm
+    !
+    if (abs(witem) < thres*nrmi) cycle
+
+    nz       = nz + 1
+    xw(nz)   = witem
+    xwid(nz) = widx
+    call heap%insert(witem,widx,info)
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_insert_heap')
+      goto 9999
+    end if
+  end do
+
+  if (nz > 1) then
+    write(psb_err_unit,*) 'Warning: lower triangle from invt???? '
+  end if
+
+
+  if (idxp <= size(idxs)) then
+    if (idxs(idxp) < i) then
+      do
+        idxp = idxp + 1
+        if (idxp > nidx) exit
+        if (idxs(idxp) >= i) exit
+      end do
+    end if
+  end if
+  idxp = idxp - 1
+  nz   = 0
+  wmin=HUGE(wmin)
+  if (.false.) then
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if ((widx/=jmaxup).and.(nz > nup+fill_in)) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz <= nup+fill_in) then
+      !
+      ! Just copy everything from xw
+      !
+      fndmaxup=.true.
+    else
+      fndmaxup = .false.
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+        if (widx == jmaxup) fndmaxup=.true.
+      end do
+    end if
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
+        ! Include entry jmaxup, if it is not already there.
+        ! Put it in the place of the smallest coefficient.
+        !
+        xw(nz)   = row(jmaxup)
+        xwid(nz) = jmaxup
+      endif
+    end if
+
+  else if (.true.) then
+
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if (nz > nup+fill_in) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz >  nup+fill_in) then
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+      end do
+    end if
+  end if
+
+  !
+  ! Now we put things back into ascending column order
+  !
+  call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+
+  !
+  ! Copy out the upper part of the row
+  !
+  do k=1,nz
+    l2     = l2 + 1
+    if (size(val) < l2) then
+      !
+      ! Figure out a good reallocation size!
+      !
+      isz  = max(int(1.2*l2),l2+100)
+      call psb_realloc(isz,val,info)
+      if (info == psb_success_) call psb_realloc(isz,ja,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='Allocate')
+        goto 9999
+      end if
+    end if
+    ja(l2)   = xwid(k)
+    val(l2)  = xw(indx(k))
+  end do
+
+  !
+  ! Set row to zero
+  !
+  do idxp=1,nidx
+    row(idxs(idxp)) = dzero
+  end do
+
+  irp(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_d_invt_copyout
+
+subroutine psb_d_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)     :: heap
+  integer(psb_ipk_), intent(in)       :: i
+  integer(psb_ipk_), intent(inout)    :: nidx,info
+  integer(psb_ipk_), intent(inout)    :: irwt(:)
+  real(psb_dpk_), intent(in)          :: thres,nrmi
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+  real(psb_dpk_), intent(in)          :: val(:)
+  real(psb_dpk_), intent(inout)       :: row(:)
+
+  ! Local Variables
+  integer(psb_ipk_) :: k,j,jj,lastk,iret
+  real(psb_dpk_)    :: rwk, alpha
+
+  info  = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+  irwt(i) = 1
+!!$  write(0,*) 'Drop Threshold ',thres*nrmi
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+
+    call heap%get_first(k,iret)
+    if (iret < 0)  exit
+
+    !
+    ! An index may have been put on the heap more than once.
+    ! Should not happen, but just in case.
+    !
+    if (k == lastk)  cycle
+    lastk = k
+
+    !
+    ! Dropping rule based on the threshold: compare the absolute
+    ! value of each updated entry of row with thres * 2-norm of row.
+    !
+    rwk    = row(k)
+
+    if (abs(rwk) < thres*nrmi) then
+      !
+      ! Drop the entry.
+      !
+      row(k)  = dzero
+      irwt(k) = 0
+      cycle
+    else
+      !
+      ! Note: since U is scaled while copying it out (see ilut_copyout),
+      ! we can use rwk in the update below.
+      !
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Update row(j) and, if it is not to be discarded, insert
+        ! its index into the heap for further processing.
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        if (irwt(j) == 0) then
+          if (abs(row(j)) < thres*nrmi) then
+            !
+            ! Drop the entry.
+            !
+            row(j)  = dzero
+          else
+            !
+            ! Do the insertion.
+            !
+            call heap%insert(j,info)
+            if (info /= psb_success_) return
+            irwt(j) = 1
+          end if
+        end if
+      end do
+    end if
+
+    !
+    ! If we get here it is an index we need to keep on copyout.
+    !
+
+    nidx       = nidx + 1
+    call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
+    if (info /= psb_success_) return
+    idxs(nidx) = k
+    irwt(k)    = 0
+  end do
+
+  irwt(i) = 0
+end subroutine psb_d_invt_inv
diff --git a/prec/impl/psb_d_prec_type_impl.f90 b/prec/impl/psb_d_prec_type_impl.f90
index 49b5bcf2..69e48079 100644
--- a/prec/impl/psb_d_prec_type_impl.f90
+++ b/prec/impl/psb_d_prec_type_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,14 +27,14 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-!   
+!
+!
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -46,7 +46,7 @@
 !!$    3. The name of the PSBLAS group or the names of its contributors may
 !!$       not be used to endorse or promote products derived from this
 !!$       software without specific written permission.
-!!$ 
+!!$
 !!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -58,14 +58,14 @@
 !!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
+!!$
+!!$
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_d_prec_type, psb_protect_name => psb_d_apply2_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_dprec_type), intent(inout) :: prec
   type(psb_d_vect_type),intent(inout)  :: x
@@ -74,7 +74,7 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional           :: trans
   real(psb_dpk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   real(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -88,25 +88,25 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -115,13 +115,13 @@ subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work)
   call prec%prec%apply(done,x,dzero,y,desc_data,info,&
        & trans=trans_,work=work_)
 
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -136,7 +136,7 @@ end subroutine psb_d_apply2_vect
 subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work)
   use psb_base_mod
   use psb_d_prec_type, psb_protect_name => psb_d_apply1_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_dprec_type), intent(inout) :: prec
   type(psb_d_vect_type),intent(inout)  :: x
@@ -145,7 +145,7 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work)
   real(psb_dpk_),intent(inout), optional, target :: work(:)
 
   type(psb_d_vect_type)       :: ww
-  character     :: trans_ 
+  character     :: trans_
   real(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -159,25 +159,25 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -188,13 +188,13 @@ subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work)
        & trans=trans_,work=work_)
   if (info == 0) call psb_geaxpby(done,ww,dzero,x,desc_data,info)
   call psb_gefree(ww,desc_data,info)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -209,7 +209,7 @@ end subroutine psb_d_apply1_vect
 subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_d_prec_type, psb_protect_name => psb_d_apply2v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_dprec_type), intent(inout) :: prec
   real(psb_dpk_),intent(inout)   :: x(:)
@@ -218,7 +218,7 @@ subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional        :: trans
   real(psb_dpk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   real(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -232,37 +232,37 @@ subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=trans
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   call prec%prec%apply(done,x,dzero,y,desc_data,info,trans_,work=work_)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -277,7 +277,7 @@ end subroutine psb_d_apply2v
 subroutine psb_d_apply1v(prec,x,desc_data,info,trans)
   use psb_base_mod
   use psb_d_prec_type, psb_protect_name => psb_d_apply1v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_dprec_type), intent(inout) :: prec
   real(psb_dpk_),intent(inout)   :: x(:)
@@ -293,36 +293,34 @@ subroutine psb_d_apply1v(prec,x,desc_data,info,trans)
   name='psb_d_apply1v'
   info = psb_success_
   call psb_erractionsave(err_act)
-
-
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   allocate(ww(size(x)),w1(size(x)),stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
   call prec%prec%apply(done,x,dzero,ww,desc_data,info,&
        & trans_,work=w1)
   if(info /= psb_success_) goto 9999
   x(:) = ww(:)
   deallocate(ww,W1,stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='DeAllocate')
-    goto 9999      
+    goto 9999
   end if
 
 
@@ -336,3 +334,159 @@ subroutine psb_d_apply1v(prec,x,desc_data,info,trans)
 
 end subroutine psb_d_apply1v
 
+subroutine psb_dcprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_d_prec_type, psb_protect_name => psb_dcprecseti
+  implicit none
+
+  class(psb_dprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  integer(psb_ipk_), intent(in)            :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precseti'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_FILLIN')
+      call prec%prec%precset(psb_ilu_fill_in_,val,info)
+    case ('INV_FILLIN')
+      call prec%prec%precset(psb_inv_fillin_,val,info)
+    case default
+      info = psb_err_invalid_args_combination_
+      write(psb_err_unit,*) name,&
+           & ': Error: uninitialized preconditioner,',&
+           &' should call prec%init'
+      return
+  end select
+
+end subroutine psb_dcprecseti
+
+subroutine psb_dcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_d_prec_type, psb_protect_name => psb_dcprecsetr
+  implicit none
+
+  class(psb_dprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  real(psb_dpk_), intent(in)             :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetr'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+  case('SUB_ILUTHRS')
+    call prec%prec%precset(psb_fact_eps_,val,info)
+  case('INV_THRESH')
+    call prec%prec%precset(psb_inv_thresh_,val,info)
+  case default
+    info = psb_err_invalid_args_combination_
+    write(psb_err_unit,*) name,&
+         & ': Error: uninitialized preconditioner,',&
+         &' should call prec%init'
+    return
+  end select
+
+end subroutine psb_dcprecsetr
+
+subroutine psb_dcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_d_prec_type, psb_protect_name => psb_dcprecsetc
+  implicit none
+
+  class(psb_dprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  character(len=*), intent(in)             :: string
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetc'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_SOLVE')
+      ! We select here the type of solver on the block
+      select case (psb_toupper(trim(string)))
+        case("ILU")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+        case("ILUT")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info)
+        case("AINV")
+            call prec%prec%precset(psb_f_type_,psb_f_ainv_,info)
+        case("INVK")
+            call prec%prec%precset(psb_f_type_,psb_f_invk_,info)
+        case("INVT")
+            call prec%prec%precset(psb_f_type_,psb_f_invt_,info)
+        case default
+          ! Default to ILU(0) factorization
+          call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info)
+          call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+      end select
+    case ("ILU_ALG")
+      select case (psb_toupper(trim(string)))
+        case ("MILU")
+          call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info)
+        case default
+          ! Do nothing
+      end select
+    case ("ILUT_SCALE")
+      select case (psb_toupper(trim(string)))
+      case ("MAXVAL")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info)
+      case ("DIAG")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info)
+      case ("ARWSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info)
+      case ("ARCSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info)
+      case ("ACLSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info)
+      case ("NONE")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      case default
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      end select
+    case ("AINV_ALG")
+      select case (psb_toupper(trim(string)))
+      case("LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      case("SYM-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info)
+      case("STAB-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info)
+      case("MLK","LMX")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info)
+      case default
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      end select
+    case default
+
+  end select
+
+end subroutine psb_dcprecsetc
diff --git a/prec/impl/psb_d_sp_drop.f90 b/prec/impl/psb_d_sp_drop.f90
new file mode 100644
index 00000000..67c49b6f
--- /dev/null
+++ b/prec/impl/psb_d_sp_drop.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_d_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+  use psb_base_mod
+  implicit none
+  real(psb_dpk_), intent(in)       :: sp_thresh
+  integer(psb_ipk_), intent(in)    :: idiag, nzrmax
+  integer(psb_ipk_), intent(inout) :: nz
+  integer(psb_ipk_), intent(inout) :: iz(:)
+  real(psb_dpk_), intent(inout)    :: valz(:)
+  integer(psb_ipk_), intent(out)   :: info
+  !
+  integer(psb_ipk_)              :: i, j, idf, nw
+  real(psb_dpk_)                 :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_dpk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+
+  if (nz > min(size(iz),size(valz))) then
+    write(0,*) 'Serious size problem ',nz,size(iz),size(valz)
+    info = -2
+    return
+  end if
+  allocate(xw(nz),xwid(nz),indx(nz),stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info
+    return
+  endif
+
+  ! Always keep the diagonal element
+  idf = -1
+  do i=1, nz
+    if (iz(i) == idiag) then
+      idf     = i
+      witem   = valz(i)
+      widx    = iz(i)
+      valz(i) = valz(1)
+      iz(i)   = iz(1)
+      valz(1) = witem
+      iz(1)   = widx
+      exit
+    end if
+  end do
+
+  if (idf == -1) then
+
+    xw(1:nz) = valz(1:nz)
+    call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_)
+    do i=1, nz
+      xwid(i) = iz(indx(i))
+    end do
+    nw = min(nz,nzrmax)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+    nw = max(nw, 1)
+
+  else
+
+    nw = nz-1
+
+    xw(1:nw) = valz(2:nz)
+
+    call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_)
+    nw = min(nw,nzrmax-1)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+
+    do i=1, nw
+      xwid(i) = iz(1+indx(i))
+    end do
+    nw       = nw + 1
+    xw(nw)   = valz(1)
+    xwid(nw) = iz(1)
+  end if
+
+  call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_)
+
+  do i=1, nw
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+  end do
+  nz = nw
+  if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz
+  deallocate(xw,xwid,indx,stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info
+    return
+  endif
+  return
+end subroutine psb_d_sp_drop
diff --git a/prec/impl/psb_d_sparsify.f90 b/prec/impl/psb_d_sparsify.f90
new file mode 100644
index 00000000..de4628ba
--- /dev/null
+++ b/prec/impl/psb_d_sparsify.f90
@@ -0,0 +1,260 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_d_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr)
+  use psb_base_mod
+  implicit none
+
+  real(psb_dpk_), intent(in)              :: sp_thresh
+  integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+  real(psb_dpk_), intent(inout)           :: zw(:)
+  integer(psb_ipk_), intent(out)          :: nz
+  integer(psb_ipk_), intent(out)          :: iz(:)
+  real(psb_dpk_), intent(out)             :: valz(:)
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: istart
+  type(psb_i_heap), optional              :: iheap
+  integer(psb_ipk_), optional             :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k
+  real(psb_dpk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_dpk_), allocatable   :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  type(psb_d_idx_heap)         :: heap
+
+
+  info = psb_success_
+  istart_ = 1
+  if (present(istart)) istart_ = max(1,istart)
+  if (.false.) then
+    nz = 0
+    do i=istart_, n
+      if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then
+        nz       = nz + 1
+        iz(nz)   = i
+        valz(nz) = zw(i)
+      end if
+    end do
+
+  else
+    allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info)
+    if (info /= psb_success_) then
+      return
+    end if
+
+    call heap%init(info,dir=psb_asort_down_)
+
+    ! Keep at least the diagonal
+    nz = 0
+
+    if (present(iheap)) then
+      if (.not.(present(ikr))) then
+        write(psb_err_unit,*) 'Error: if IHEAP then also IKR'
+        info = -1
+        return
+      end if
+      last_i = -1
+      do
+        call iheap%get_first(i,iret)
+        if (iret < 0) exit
+        ! An index may have been put on the heap more than once.
+        if (i == last_i) cycle
+        last_i = i
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i)  = dzero
+        ikr(i) = 0
+      end do
+
+    else
+
+      do i=istart_, n
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i) = dzero
+      end do
+    end if
+
+    k = 1
+    do
+      if (k == nzrmax) exit
+      call heap%get_first(witem,widx,info)
+      if (info == -1) then
+        info = psb_success_
+        exit
+      endif
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        return
+      end if
+      k = k + 1
+      xw(k)   = witem
+      xwid(k) = widx
+    end do
+    call heap%free(info)
+    nz = k
+    call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+!!$    write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh
+    do i=1, nz
+      valz(i) = xw(indx(i))
+      iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+    end do
+
+  end if
+
+  return
+
+end subroutine psb_d_sparsify
+
+
+subroutine psb_d_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+  use psb_base_mod
+  implicit none
+
+  real(psb_dpk_), intent(in)      :: sp_thresh
+  integer(psb_ipk_), intent(in)     :: idiag, n, nzrmax
+  real(psb_dpk_), intent(inout)    :: zw(:)
+  integer(psb_ipk_), intent(out)    :: nz
+  integer(psb_ipk_), intent(out)    :: iz(:)
+  real(psb_dpk_), intent(out)       :: valz(:)
+  integer(psb_ipk_), intent(out)    :: info
+  integer(psb_ipk_), intent(inout)  :: lhead, listv(:)
+  integer(psb_ipk_)                 :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k,current, next
+  real(psb_dpk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_dpk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+  istart_ = 1
+  allocate(xw(n),xwid(n),indx(n),stat=info)
+
+  current = lhead
+  lhead = -1
+  i = 0
+  do while (current >0)
+    i = i + 1
+    xw(i)   = zw(current)
+    xwid(i) = current
+
+    if (current == idiag) then
+      ! Bring the diagona into first position
+      witem = xw(1)
+      widx  = xwid(1)
+      xw(1)   = xw(i)
+      xwid(1) = xwid(i)
+      xw(i)   = witem
+      xwid(i) = widx
+    end if
+
+    zw(current)  = dzero
+    ikr(current) = 0
+
+    next = listv(current)
+    listv(current) = -1
+    current = next
+  end do
+  nz = i
+  if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),&
+       & dir=psb_asort_down_,flag=psb_sort_keep_idx_)
+!!$  write(0,*) 'Done first msort '
+!!$  write(0,*) '   after first msort for idiag ',idiag,' :',nz,sp_thresh
+!!$  do i=1, nz
+!!$    write(0,*) '         ',xwid(i),xw(i)
+!!$  end do
+
+  i = 2
+  do while (i<=nz)
+    if (abs(xw(i)) < sp_thresh) exit
+    i = i + 1
+  end do
+!!$  write(0,*) 'NZ ',nz, i, nzrmax
+  nz = max(1,min(i-1,nzrmax))
+  call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_)
+!!$  write(0,*) 'Done second msort '
+
+!!$  write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh
+  do i=1, nz
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+  end do
+
+  return
+
+end subroutine psb_d_sparsify_list
diff --git a/prec/impl/psb_drwclip.f90 b/prec/impl/psb_drwclip.f90
new file mode 100644
index 00000000..97aea428
--- /dev/null
+++ b/prec/impl/psb_drwclip.f90
@@ -0,0 +1,90 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_d_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+  use psb_base_mod
+
+  implicit none
+  integer(psb_ipk_), intent(inout)  :: nz
+  integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+  real(psb_dpk_), intent(inout)    :: val(*)
+  integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+
+  integer(psb_ipk_) :: i,j
+
+  j = 0
+  do i=1, nz
+    if ((imin <= ia(i)).and.&
+         & (ia(i) <= imax).and.&
+         & (jmin <= ja(i)).and.&
+         & (ja(i) <= jmax) ) then
+      j = j + 1
+      ia(j) = ia(i)
+      ja(j) = ja(i)
+      val(j) = val(i)
+    end if
+  end do
+  nz = j
+end subroutine psb_d_rwclip
diff --git a/prec/impl/psb_dsparse_biconjg_llk.F90 b/prec/impl/psb_dsparse_biconjg_llk.F90
new file mode 100644
index 00000000..919f64ab
--- /dev/null
+++ b/prec/impl/psb_dsparse_biconjg_llk.F90
@@ -0,0 +1,366 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_dsparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_d_ainv_tools_mod
+  use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_llk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  real(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_dpk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       & ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_d_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = dzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = dzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+!!$        write(0,*) i, ' Outer inserting ',ac%ia(j)
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! ! write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+!!$                write(0,*) j,i,' Inner inserting ',nextj
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_dsparse_biconjg_llk
diff --git a/prec/impl/psb_dsparse_biconjg_llk_noth.F90 b/prec/impl/psb_dsparse_biconjg_llk_noth.F90
new file mode 100644
index 00000000..447cb68f
--- /dev/null
+++ b/prec/impl/psb_dsparse_biconjg_llk_noth.F90
@@ -0,0 +1,362 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_dsparse_biconjg_llk_noth(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_d_ainv_tools_mod
+  use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_llk_noth
+
+  !
+  ! Left-looking variant, with NO drop rule on p(i)/p(j)
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)               :: n
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)               :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  real(psb_dpk_), intent(out)                :: p(:)
+  integer(psb_ipk_), intent(out)              :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_dpk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_d_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk_noth'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = dzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = dzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.true.) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.true.) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_dsparse_biconjg_llk_noth
diff --git a/prec/impl/psb_dsparse_biconjg_mlk.F90 b/prec/impl/psb_dsparse_biconjg_mlk.F90
new file mode 100644
index 00000000..aae56a5a
--- /dev/null
+++ b/prec/impl/psb_dsparse_biconjg_mlk.F90
@@ -0,0 +1,501 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_dsparse_biconjg_mlk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_d_ainv_tools_mod
+  use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_mlk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  real(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:)
+  real(psb_dpk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_d_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_biconjg_mlk'
+  logical, parameter :: debug=.false., test_merge=.true.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), &
+       & hlist(n),rwlist(n),bfr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i)  = 0
+    izcr(i)  = 0
+    zval(i)  = dzero
+    hlist(i) = -1
+    rwlist(i) = -1
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = dzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    rwhead  = i
+
+    hlhead = -1
+
+    kkc = 0
+    ilst  = ac%icp(i)-1
+    ifrst = ac%icp(i)
+    do j = ac%icp(i+1)-1, ac%icp(i), -1
+      if (ac%ia(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (.true..or.debug) then
+!!$      write(0,*) 'Outer Before insertion  : ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+    if (kkc > 0) then
+!!$      write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      mj      = hlhead
+      if (mj > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outer
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+
+      if (debug) write(0,*) 'update loop, using row: ',j,i,mj
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+!!$      write(0,*) '   Current list is : ',hlhead
+!!$      call printlist(hlhead,hlist)
+!!$
+
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=z%icp(j)
+        ilst=z%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst))
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+!!$            call hlmerge(rwhead,rwlist,(/kr/))
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = ac%icp(kr)-1
+            ifrst = ac%icp(kr+1)
+            kkc = 0
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              if ((ac%ia(kc) < i).and.(ac%ia(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+            end if
+            if (ilst >= ifrst) then
+!!$              write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst)
+              call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+            end if
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    rwhead  = i
+    hlhead = -1
+
+    kkc = 0
+    ilst  = a%irp(i)-1
+    ifrst = a%irp(i)
+    do j = a%irp(i+1)-1, a%irp(i), -1
+      if (a%ja(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (debug) then
+      write(0,*) 'Outer Before insertion: '
+      call printlist(hlhead,hlist)
+      write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst)
+    end if
+    if (kkc > 0 ) then
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      mj      = hlhead
+      if (hlhead > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outerw
+
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=w%icp(j)
+        ilst=w%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst))
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = a%irp(kr)-1
+            ifrst = a%irp(kr+1)
+            kkc = 0
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              if ((a%ja(kc) < i).and.(a%ja(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst)
+            end if
+
+            call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+contains
+
+  subroutine hlmerge(head,listv,vals)
+    integer(psb_ipk_), intent(inout) :: head, listv(:)
+    integer(psb_ipk_), intent(in)    :: vals(:)
+    integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph
+
+    nv  = size(vals)
+    lh  = head
+    flh = -1
+    lv  = 1
+    if ((head < 0).and.(nv > 0)) then
+      ! Adjust head if empty
+      head = vals(lv)
+      lv = lv + 1
+    else if ((head > 0) .and. (nv >0)) then
+      ! Adjust head if first item less than it
+      if (head > vals(lv)) then
+        listv(vals(lv)) = head
+        head = vals(lv)
+        lv   = lv + 1
+      end if
+    end if
+
+    lh = head
+    ph = lh
+    do while ((lh > 0) .and. (lv <= nv))
+      if (lh == vals(lv)) then
+        lv = lv + 1
+      else if (lh > vals(lv)) then
+        listv(vals(lv)) = lh
+        listv(ph) = vals(lv)
+        lh = vals(lv)
+        lv = lv + 1
+      else
+        ph = lh
+        lh = listv(lh)
+      end if
+    end do
+    lh = ph
+    do while (lv <= nv)
+      listv(lh) = vals(lv)
+      lh = listv(lh)
+      lv = lv + 1
+    end do
+  end subroutine hlmerge
+
+
+  subroutine printlist(head,listv)
+    integer(psb_ipk_), intent(in) :: head, listv(:)
+    integer(psb_ipk_) :: li
+
+    li = head
+    do while (li > 0)
+      write(0,*) 'Item: ', li
+      li = listv(li)
+    end do
+  end subroutine printlist
+
+end subroutine psb_dsparse_biconjg_mlk
diff --git a/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90
new file mode 100644
index 00000000..fbf4bc02
--- /dev/null
+++ b/prec/impl/psb_dsparse_biconjg_s_ft_llk.F90
@@ -0,0 +1,414 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_dsparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_d_ainv_tools_mod
+  use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_s_ft_llk
+
+  !
+  ! Left-looking variant, stabilized i.e. product by A is applied
+  ! to compute the diagonal elements.
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  real(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:)
+  real(psb_dpk_), allocatable    :: zval(:),val(:), q(:),  ww(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, nzrw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_d_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha, tmpq,tmpq2
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! Init pointers to:
+  !  ljr(i): last occupied column index within row  I
+  !  izcr(i): first occupied row index within column I
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = dzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = dzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j)<i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+      izcr(j) = 0
+      if (j>=i) exit outer
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = w%icp(j)
+      ip2 = w%icp(j+1) - 1
+      nzra = max(0,ip2 - ip1 + 1)
+      nzww = 0
+      call psb_d_spvspm(done,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),&
+           & dzero,nzww,iww,ww,info)
+
+      p(i) =  psb_spge_dot(nzww,iww,ww,zval)
+
+      ipz1 = z%icp(j)
+      ipz2 = z%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-p(i)/p(j))
+!!$        write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outer
+
+    if (.false.) then
+      ! We can't do the proper thing until we have bot Z_i and W_i.
+      call a%csget(i,i,nzra,ia,ja,val,info)
+      call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+      p(i) = psb_spge_dot(nzra,ja,val,zval)
+      if (abs(p(i)) < d_epstol) &
+           & p(i) = 1.d-3
+    end if
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+!!$      write(0,*) 'Inserting into heap ',i
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j)<i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) exit outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      if (.false.) then
+        ip1 = ac%icp(j)
+        ip2 = ac%icp(j+1) - 1
+        do
+          if (ip2 < ip1 ) exit
+          if (ac%ia(ip2) <= n) exit
+          ip2 = ip2 -1
+        end do
+        nzra = max(0,ip2 - ip1 + 1)
+        q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+        ! !$          write(psb_err_unit,*) j,i,p(i)
+      else
+        ip1 = z%icp(j)
+        ip2 = z%icp(j+1) - 1
+        nzra = max(0,ip2 - ip1 + 1)
+        nzww = 0
+        call psb_d_spmspv(done,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),&
+             & dzero,nzww,iww,ww,info)
+
+        q(i) =  psb_spge_dot(nzww,iww,ww,zval)
+      end if
+
+      ipz1 = w%icp(j)
+      ipz2 = w%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-q(i)/q(j))
+!!$        write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outerw
+
+!!$      ip1 = ac%icp(i)
+!!$      ip2 = ac%icp(i+1) - 1
+!!$      do
+!!$        if (ip2 < ip1 ) exit
+!!$        if (ac%ia(ip2) <= n) exit
+!!$        ip2 = ip2 -1
+!!$      end do
+!!$      nzra = max(0,ip2 - ip1 + 1)
+!!$
+!!$      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+!!$      if (abs(q(i)) < d_epstol) &
+!!$           & q(i) = 1.d-3
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrw,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrw, w%ia,  info)
+    call psb_ensure_size(nzw+nzrw, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrw
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrw
+    nzw        = nzw + nzrw
+
+!!$      !
+!!$      ! Ok, now compute w_i^T A z_i
+!!$      !
+    nzww = 0
+    nzrz = z%icp(i+1)-z%icp(i)
+    ipz1 = z%icp(i)
+    call psb_d_spmspv(done,ac,&
+         & nzrz,z%ia(ipz1:ipz1+nzrz-1),z%val(ipz1:ipz1+nzrz-1),&
+         & dzero,nzww,iww,ww,info)
+    tmpq  = psb_spdot_srtd(nzww,iww,ww,nzrw,ia,val)
+    q(i) = tmpq
+!    if (tmpq <0) then
+!!$        write(0,*) 'On negative dot prod at ',i
+!!$        write(0,*) 'On negative dot prod a ',ia(1:nzrw),val(1:nzrw)
+!!$        write(0,*) 'On negative dot prod w ',iww(1:nzww),ww(1:nzww)
+!!$        ip1 = ac%icp(i)
+!!$        ip2 = ac%icp(i+1) - 1
+!!$        do
+!!$          if (ip2 < ip1 ) exit
+!!$          if (ac%ia(ip2) <= n) exit
+!!$          ip2 = ip2 -1
+!!$        end do
+!!$        nzra = max(0,ip2 - ip1 + 1)
+!!$        write(0,*) 'On negative dot prod a ',ac%ia(ip1:ip2),ac%val(ip1:ip2)
+!
+!    end if
+
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+    p(i) = q(i)
+
+  end do
+
+end subroutine psb_dsparse_biconjg_s_ft_llk
diff --git a/prec/impl/psb_dsparse_biconjg_s_llk.F90 b/prec/impl/psb_dsparse_biconjg_s_llk.F90
new file mode 100644
index 00000000..72257d44
--- /dev/null
+++ b/prec/impl/psb_dsparse_biconjg_s_llk.F90
@@ -0,0 +1,248 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_dsparse_biconjg_s_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_d_ainv_tools_mod
+  use psb_d_biconjg_mod, psb_protect_name => psb_dsparse_biconjg_s_llk
+
+  !
+  ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned!
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_d_csr_sparse_mat), intent(in)    :: a
+  type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  real(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_dpk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw,kk
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_d_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha, zvalmax
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = dzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = dzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1) = 1
+  z%icp(2) = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz      = 1
+  zvalmax  = done
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = dzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+!!$          if (abs(zval(kr)) > 1e16) then
+!!$            write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr)
+!!$          end if
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+!!$    if ((1761<=i).and.(i<=1780)) then
+!!$      write(0,*) 'Dot product terms at ',i,nzra
+!!$      do kk=1,nzra
+!!$        write(0,*) kk,ja(kk),val(kk),zval(ja(kk))
+!!$      end do
+!!$    end if
+
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+    ! !$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+!!$      zvalmax = max(zvalmax,abs(val(j)))
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+!!$    write(0,*) ' Dot: ',i,p(i),zvalmax
+
+  end do
+
+  call z%cp_to_fmt(w,info)
+
+end subroutine psb_dsparse_biconjg_s_llk
diff --git a/prec/impl/psb_s_ainv_bld.f90 b/prec/impl/psb_s_ainv_bld.f90
new file mode 100644
index 00000000..0ed9e9e8
--- /dev/null
+++ b/prec/impl/psb_s_ainv_bld.f90
@@ -0,0 +1,225 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+subroutine psb_s_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+
+  use psb_base_mod
+  use psb_prec_const_mod
+  use psb_s_biconjg_mod
+
+  implicit none
+
+  ! Arguments
+  type(psb_sspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)                 :: fillin,alg
+  real(psb_spk_), intent(in)                  :: thresh
+  type(psb_sspmat_type), intent(inout)        :: wmat, zmat
+  real(psb_spk_), allocatable                  :: d(:)
+  Type(psb_desc_type), Intent(in)               :: desc
+  integer(psb_ipk_), intent(out)                :: info
+  type(psb_sspmat_type), intent(in), optional :: blck
+  integer(psb_ipk_), intent(in), optional       :: iscale
+  !
+  integer(psb_ipk_)             :: i, nztota, err_act, n_row, nrow_a
+  type(psb_s_coo_sparse_mat)  :: acoo
+  type(psb_s_csr_sparse_mat)  :: acsr
+  type(psb_sspmat_type)       :: atmp
+  real(psb_spk_), allocatable :: arws(:), acls(:)
+  real(psb_spk_), allocatable  :: pq(:), ad(:)
+  integer(psb_ipk_)             :: debug_level, debug_unit
+  type(psb_ctxt_type)           :: ctxt
+  integer(psb_ipk_)             :: np,me
+  integer(psb_ipk_)             :: nzrmax, iscale_
+  real(psb_spk_)              :: sp_thresh
+  real(psb_spk_)               :: weight
+  character(len=20)             :: name, ch_err
+
+
+  info = psb_success_
+  name = 'psb_sainv_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  iscale_ = psb_ilu_scale_none_
+  if (present(iscale)) iscale_ = iscale
+  weight = sone
+  !
+  ! Check the memory available to hold the W and Z factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = desc%get_local_rows()
+  allocate(pq(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  !
+  ! Ok, let's start first with Z (i.e. Upper)
+  !
+  call a%csclip(acoo,info,imax=n_row,jmax=n_row)
+  call acsr%mv_from_coo(acoo,info)
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+
+  case(psb_ilu_scale_maxval_)
+    weight = acsr%maxval()
+    weight = sone/weight
+    call acsr%scal(weight,info)
+
+  case(psb_ilu_scale_arcsum_)
+    allocate(arws(n_row),acls(n_row),ad(n_row),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    end if
+    call acsr%arwsum(arws)
+    call acsr%aclsum(acls)
+    ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row)))
+    ad(1:n_row) = sone/ad(1:n_row)
+    call acsr%scal(ad,info,side='L')
+    call acsr%scal(ad,info,side='R')
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  !
+  ! Here for the actual workhorses.
+  ! Only biconjg is surviving for now....
+  !
+    call psb_sparse_biconjg(alg,n_row,acsr,pq,&
+         &   zmat,wmat,nzrmax,sp_thresh,info)
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='sparse_biconjg')
+    goto 9999
+  end if
+  call atmp%mv_from(acsr)
+
+  !
+  ! Is this right???
+  !
+  do i=1, n_row
+    if (abs(pq(i)) < d_epstol) then
+      pq(i) = sone
+    else
+      pq(i) = sone/pq(i)
+    end if
+  end do
+
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+  case(psb_ilu_scale_maxval_)
+    pq(:) = pq(:)*weight
+
+  case(psb_ilu_scale_arcsum_)
+    call zmat%scal(ad,info,side='L')
+    call wmat%scal(ad,info,side='R')
+
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  call psb_move_alloc(pq,d,info)
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_ainv_bld
diff --git a/prec/impl/psb_s_bjacprec_impl.f90 b/prec/impl/psb_s_bjacprec_impl.f90
index 72ac6048..d4a4fd17 100644
--- a/prec/impl/psb_s_bjacprec_impl.f90
+++ b/prec/impl/psb_s_bjacprec_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,13 +27,13 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 
 subroutine psb_s_bjac_dump(prec,info,prefix,head)
   use psb_base_mod
   use psb_s_bjacprec, psb_protect_name => psb_s_bjac_dump
-  implicit none 
+  implicit none
   class(psb_s_bjac_prec_type), intent(in) :: prec
   integer(psb_ipk_), intent(out)                    :: info
   character(len=*), intent(in), optional  :: prefix,head
@@ -43,13 +43,13 @@ subroutine psb_s_bjac_dump(prec,info,prefix,head)
   character(len=80)  :: prefix_
   character(len=120) :: fname ! len should be at least 20 more than
 
-  !  len of prefix_ 
+  !  len of prefix_
 
   info = 0
   ctxt = prec%get_ctxt()
   call psb_info(ctxt,iam,np)
 
-  if (present(prefix)) then 
+  if (present(prefix)) then
     prefix_ = trim(prefix(1:min(len(prefix),len(prefix_))))
   else
     prefix_ = "dump_fact_s"
@@ -74,7 +74,7 @@ end subroutine psb_s_bjac_dump
 subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_s_bjacprec, psb_protect_name => psb_s_bjac_apply_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_s_bjac_prec_type), intent(inout)  :: prec
   real(psb_spk_),intent(in)         :: alpha,beta
@@ -118,12 +118,12 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (x%get_nrows() < n_row) then 
+  if (x%get_nrows() < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (y%get_nrows() < n_row) then 
+  if (y%get_nrows() < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -140,9 +140,9 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
@@ -152,19 +152,19 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     allocate(ww(n_col),aux(4*n_col),stat=info)
   endif
 
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
 
   do_alloc_wrk = .not.prec%is_allocated_wrk()
   if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v)
 
   associate (wv => prec%wrk(1), wv1 => prec%wrk(2))
-    
+
     select case(prec%iprcparm(psb_f_type_))
-    case(psb_f_ilu_n_) 
-      
+    case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
+
       select case(trans_)
       case('N')
         call psb_spsm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,&
@@ -172,31 +172,58 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
         if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
       case('T')
         call psb_spsm(sone,prec%av(psb_u_pr_),x,szero,wv,desc_data,info,&
              & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux)
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
       case('C')
-        
+
         call psb_spsm(sone,prec%av(psb_u_pr_),x,szero,wv,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
         call wv1%mlt(sone,prec%dv,wv,szero,info,conjgx=trans_)
-        
+
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
+      end select
+      if (info /= psb_success_) then
+        ch_err="psb_spsm"
+        goto 9999
+      end if
+
+    case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+      ! Application of approximate inverse preconditioner, just some spmm
+
+      select case(trans_)
+      case('N')
+        call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+        if (info == psb_success_) call wv1%mlt(sone,prec%dv,wv,szero,info)
+        if(info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,&
+             & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.)
+
+       case('T','C')
+         call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,wv,desc_data,info,&
+              & trans=trans_,work=aux,doswap=.false.)
+         if (info == psb_success_) call wv1%mlt(sone,prec%dv,wv,szero,info)
+         if (info == psb_success_) &
+              & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, &
+              & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.)
+
       end select
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         ch_err="psb_spsm"
         goto 9999
       end if
-      
+
     case default
       info = psb_err_internal_error_
       call psb_errpush(info,name,a_err='Invalid factorization')
@@ -204,12 +231,12 @@ subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     end select
 
   end associate
-  
+
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
   if (do_alloc_wrk) call prec%free_wrk(info)
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -231,7 +258,7 @@ end subroutine psb_s_bjac_apply_vect
 subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_s_bjacprec, psb_protect_name => psb_s_bjac_apply
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_s_bjac_prec_type), intent(inout)  :: prec
   real(psb_spk_),intent(in)         :: alpha,beta
@@ -244,6 +271,7 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   ! Local variables
   integer(psb_ipk_) :: n_row,n_col
   real(psb_spk_), pointer :: ww(:), aux(:)
+  type(psb_d_vect_type) :: tx,ty
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
   integer(psb_ipk_) :: err_act, ierr(5)
@@ -273,12 +301,12 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (size(x) < n_row) then 
+  if (size(x) < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (size(y) < n_row) then 
+  if (size(y) < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -295,29 +323,29 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-        goto 9999      
+        goto 9999
       end if
 
     endif
   else
     allocate(ww(n_col),aux(4*n_col),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
   endif
 
 
   select case(prec%iprcparm(psb_f_type_))
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
 
     select case(trans_)
     case('N')
@@ -344,11 +372,42 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
            & trans=trans_,scale='U',choice=psb_none_,work=aux)
 
     end select
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       ch_err="psb_spsm"
       goto 9999
     end if
 
+  case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+    ! Application of approximate inverse preconditioner, just some spmm
+
+    select case(trans_)
+
+      case('N')
+        call psb_spmm(sone,prec%av(psb_l_pr_),x,szero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('T')
+        call psb_spmm(sone,prec%av(psb_u_pr_),x,szero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('C')
+        call psb_spmm(sone,prec%av(psb_u_pr_),x,szero,ww,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * (prec%dv%v%v(1:n_row))
+        if (info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+    end select
+
 
   case default
     info = psb_err_internal_error_
@@ -358,8 +417,8 @@ subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
 
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -397,11 +456,26 @@ subroutine psb_s_bjac_precinit(prec,info)
     call psb_Errpush(info,name)
     goto 9999
   end if
+  call psb_realloc(psb_rfpsz,prec%rprcparm,info)
+  if (info /= psb_success_) then
+    info = psb_err_alloc_dealloc_
+    call psb_Errpush(info,name)
+    goto 9999
+  end if
 
   prec%iprcparm(:)                = 0
   prec%iprcparm(psb_p_type_)      = psb_bjac_
   prec%iprcparm(psb_f_type_)      = psb_f_ilu_n_
   prec%iprcparm(psb_ilu_fill_in_) = 0
+  prec%iprcparm(psb_ilu_ialg_)    = psb_ilu_n_
+  prec%iprcparm(psb_ilu_scale_)   = psb_ilu_scale_none_
+  prec%iprcparm(psb_inv_fillin_)  = 0
+  prec%iprcparm(psb_ainv_alg_)    = psb_ainv_llk_
+
+
+  prec%rprcparm(:)                = 0
+  prec%rprcparm(psb_fact_eps_)    = 1E-1_psb_spk_
+  prec%rprcparm(psb_inv_thresh_)  = 1E-1_psb_spk_
 
 
   call psb_erractionrestore(err_act)
@@ -416,7 +490,8 @@ end subroutine psb_s_bjac_precinit
 subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
 
   use psb_base_mod
-  use psb_prec_mod, only : psb_ilu_fct
+  use psb_s_ilu_fact_mod
+  use psb_s_ainv_fact_mod
   use psb_s_bjacprec, psb_protect_name => psb_s_bjac_precbld
   Implicit None
 
@@ -428,12 +503,13 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   class(psb_s_base_vect_type), intent(in), optional  :: vmold
   class(psb_i_base_vect_type), intent(in), optional  :: imold
 
-  !     .. Local Scalars ..                                                       
-  integer(psb_ipk_) ::    i, m
+  !     .. Local Scalars ..
+  integer(psb_ipk_) ::    i, m, ialg, fill_in, iscale, inv_fill, iinvalg
   integer(psb_ipk_) ::    ierr(5)
   character ::        trans, unitd
-  type(psb_s_csr_sparse_mat), allocatable  :: lf, uf
+  type(psb_sspmat_type), allocatable :: lf, uf
   real(psb_spk_), allocatable :: dd(:)
+  real(psb_spk_) :: fact_eps, inv_thresh
   integer(psb_ipk_) :: nztota,  err_act, n_row, nrow_a,n_col, nhalo
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -462,19 +538,114 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   trans = 'N'
   unitd = 'U'
 
+  ! We check if all the information contained in the preconditioner structure
+  ! are meaningful, otherwise we give an error and get out of the build
+  ! procedure
+  ialg    = prec%iprcparm(psb_ilu_ialg_)  ! Integer for ILU type algorithm
+  iinvalg = prec%iprcparm(psb_ainv_alg_)  ! Integer for AINV type algorithm
+  iscale = prec%iprcparm(psb_ilu_scale_)  ! Integer for scaling of matrix
+  fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization
+  inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse
+  fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization
+  inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization
+
+  ! Check if the type of scaling is known, pay attention that not all the
+  ! scalings make sense for all the factorization, if something that does not
+  ! make sense is required the factorization routine will fail in an
+  ! unnrecoverable way.
+  if ((iscale == psb_ilu_scale_none_).or.&
+      (iscale == psb_ilu_scale_maxval_).or.&
+      (iscale == psb_ilu_scale_diag_).or.&
+      (iscale == psb_ilu_scale_arwsum_).or.&
+      (iscale == psb_ilu_scale_aclsum_).or.&
+      (iscale == psb_ilu_scale_arcsum_)) then
+  ! Do nothing: admissible request
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_ilu_scale_'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+  select case (prec%iprcparm(psb_f_type_))
+    case (psb_f_ainv_)
+      ! Check if the variant for the AINV is known to the library
+      select case (iinvalg)
+      case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,&
+        & psb_ainv_mlk_)
+        ! Do nothing these are okay
+      case default
+        info=psb_err_from_subroutine_
+        ch_err='psb_ainv_alg_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end select ! AINV Variant
+      ! Check if the drop-tolerance make sense
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_ilu_t_)
+      if (fact_eps > 1) then
+        ! Check if the drop-tolerance make sense
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_invt_)
+      ! Check both tolerances
+      if (fact_eps > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case default
+  end select
+
+
+  ! Checks relative to the fill-in parameters
+  if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then
+    if(fill_in < 0) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilu_fill_in_'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    else if (fill_in == 0) then
+      ! If the requested level of fill is equal to zero, we default to the
+      ! specialized ILU(0) routine
+      prec%iprcparm(psb_f_type_) = psb_f_ilu_n_
+    end if
+  end if
+  ! If no limit on the fill_in is required we allow every fill, this is needed
+  ! since this quantity is used to allocate the auxiliary vectors for the
+  ! factorization
+  if (inv_fill <= 0) inv_fill = m
+
+  ! Select on the type of factorization to be used
   select case(prec%iprcparm(psb_f_type_))
 
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_)
+  ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix
+  ! is equal to the one of the input matrix
 
-    if (allocated(prec%av)) then 
-      if (size(prec%av) < psb_bp_ilu_avsz) then 
-        do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
           call prec%av(i)%free()
         enddo
         deallocate(prec%av,stat=info)
       endif
     end if
-    if (.not.allocated(prec%av)) then 
+    if (.not.allocated(prec%av)) then
       allocate(prec%av(psb_max_avsz),stat=info)
       if (info /= psb_success_) then
         call psb_errpush(psb_err_alloc_dealloc_,name)
@@ -501,27 +672,27 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     end if
 
     allocate(dd(n_row),stat=info)
-    if (info == psb_success_) then 
+    if (info == psb_success_) then
       allocate(prec%dv, stat=info)
-      if (info == 0) then 
-        if (present(vmold)) then 
-          allocate(prec%dv%v,mold=vmold,stat=info) 
-        else       
-          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info) 
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
         end if
       end if
     end if
 
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     endif
-    ! This is where we have no renumbering, thus no need 
-    call psb_ilu_fct(a,lf,uf,dd,info)
+    ! This is where we have no renumbering, thus no need
+    call psb_ilu0_fact(ialg,a,lf,uf,dd,info)
 
     if(info == psb_success_) then
-      call prec%av(psb_l_pr_)%mv_from(lf)
-      call prec%av(psb_u_pr_)%mv_from(uf)
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
       call prec%av(psb_l_pr_)%set_asb()
       call prec%av(psb_u_pr_)%set_asb()
       call prec%av(psb_l_pr_)%trim()
@@ -530,12 +701,375 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       ! call move_alloc(dd,prec%d)
     else
       info=psb_err_from_subroutine_
-      ch_err='psb_ilu_fct'
+      ch_err='psb_ilu0_fact'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
 
-  case(psb_f_none_) 
+  case(psb_f_ilu_k_)
+  ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on
+  ! the type of the variant of the algorithm the may be forgotten or added to
+  ! the diagonal (MILU)
+
+  if (allocated(prec%av)) then
+    if (size(prec%av) < psb_bp_ilu_avsz) then
+      do i=1,size(prec%av)
+        call prec%av(i)%free()
+      enddo
+      deallocate(prec%av,stat=info)
+    endif
+  end if
+  if (.not.allocated(prec%av)) then
+    allocate(prec%av(psb_max_avsz),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_alloc_dealloc_,name)
+      goto 9999
+    end if
+  endif
+
+  nrow_a = desc_a%get_local_rows()
+  nztota = a%get_nzeros()
+
+  n_col  = desc_a%get_local_cols()
+  nhalo  = n_col-nrow_a
+  n_row  = nrow_a
+
+  allocate(lf,uf,stat=info)
+  if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+  if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+  if(info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  allocate(dd(n_row),stat=info)
+  if (info == psb_success_) then
+    allocate(prec%dv, stat=info)
+    if (info == 0) then
+      if (present(vmold)) then
+        allocate(prec%dv%v,mold=vmold,stat=info)
+      else
+        allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
+      end if
+    end if
+  end if
+
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  endif
+  ! This is where we have no renumbering, thus no need
+  call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info)
+
+  if(info == psb_success_) then
+    call prec%av(psb_l_pr_)%mv_from(lf%a)
+    call prec%av(psb_u_pr_)%mv_from(uf%a)
+    call prec%av(psb_l_pr_)%set_asb()
+    call prec%av(psb_u_pr_)%set_asb()
+    call prec%av(psb_l_pr_)%trim()
+    call prec%av(psb_u_pr_)%trim()
+    call prec%dv%bld(dd)
+    ! call move_alloc(dd,prec%d)
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_iluk_fact'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  case(psb_f_ilu_t_)
+  ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill
+
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! This is where we have no renumbering, thus no need
+    call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_ainv_)
+    ! Approximate Inverse Factorizations based on variants of the incomplete
+    ! biconjugation algorithms
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invk_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(n) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invt_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(eps) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_s_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,&
+      & desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_none_)
     info=psb_err_from_subroutine_
     ch_err='Inconsistent prec  psb_f_none_'
     call psb_errpush(info,name,a_err=ch_err)
@@ -548,7 +1082,7 @@ subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     goto 9999
   end select
 
-  if (present(amold)) then 
+  if (present(amold)) then
     call prec%av(psb_l_pr_)%cscnv(info,mold=amold)
     call prec%av(psb_u_pr_)%cscnv(info,mold=amold)
   end if
@@ -567,8 +1101,8 @@ subroutine psb_s_bjac_precseti(prec,what,val,info)
   Implicit None
 
   class(psb_s_bjac_prec_type),intent(inout) :: prec
-  integer(psb_ipk_), intent(in)                      :: what 
-  integer(psb_ipk_), intent(in)                      :: val 
+  integer(psb_ipk_), intent(in)                      :: what
+  integer(psb_ipk_), intent(in)                      :: val
   integer(psb_ipk_), intent(out)                     :: info
   integer(psb_ipk_) :: err_act, nrow
   character(len=20)  :: name='s_bjac_precset'
@@ -576,34 +1110,33 @@ subroutine psb_s_bjac_precseti(prec,what,val,info)
   call psb_erractionsave(err_act)
 
   info = psb_success_
-  if (.not.allocated(prec%iprcparm)) then 
+  if (.not.allocated(prec%iprcparm)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
 
   select case(what)
-  case (psb_f_type_) 
-    if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_f_type_)
     prec%iprcparm(psb_f_type_)     = val
 
-  case (psb_ilu_fill_in_) 
-    if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.&
-         & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_ilu_fill_in_)
     prec%iprcparm(psb_ilu_fill_in_) = val
 
+  case (psb_ilu_ialg_)
+    prec%iprcparm(psb_ilu_ialg_) = val
+
+  case (psb_ilu_scale_)
+    prec%iprcparm(psb_ilu_scale_) = val
+
+  case (psb_ainv_alg_)
+    prec%iprcparm(psb_ainv_alg_) = val
+
+  case(psb_inv_fillin_)
+    prec%iprcparm(psb_inv_fillin_) = val
+
   case default
-    write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification'
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
 
   end select
 
@@ -613,3 +1146,45 @@ subroutine psb_s_bjac_precseti(prec,what,val,info)
 9999 call psb_error_handler(err_act)
   return
 end subroutine psb_s_bjac_precseti
+
+subroutine psb_s_bjac_precsetr(prec,what,val,info)
+
+  use psb_base_mod
+  use psb_s_bjacprec, psb_protect_name => psb_s_bjac_precsetr
+  Implicit None
+
+  class(psb_s_bjac_prec_type),intent(inout) :: prec
+  integer(psb_ipk_), intent(in)                      :: what
+  real(psb_spk_), intent(in)                       :: val
+  integer(psb_ipk_), intent(out)                     :: info
+  integer(psb_ipk_) :: err_act, nrow
+  character(len=20)  :: name='s_bjac_precset'
+
+  call psb_erractionsave(err_act)
+
+  info = psb_success_
+  if (.not.allocated(prec%rprcparm)) then
+    info = 1124
+    call psb_errpush(info,name,a_err="preconditioner")
+    goto 9999
+  end if
+
+  select case(what)
+
+  case (psb_fact_eps_)
+    prec%rprcparm(psb_fact_eps_) = val
+
+  case (psb_inv_thresh_)
+    prec%rprcparm(psb_inv_thresh_) = val
+
+  case default
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
+
+  end select
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_bjac_precsetr
diff --git a/prec/impl/psb_s_ilu0_fact.f90 b/prec/impl/psb_s_ilu0_fact.f90
index bc2dd5ab..b6f442e9 100644
--- a/prec/impl/psb_s_ilu0_fact.f90
+++ b/prec/impl/psb_s_ilu0_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_silu0_fact.f90
 !
 ! Subroutine: psb_silu0_fact
@@ -75,7 +75,7 @@
 !
 !  This routine computes either the ILU(0) or the MILU(0) factorization of
 !  the diagonal blocks of a distributed matrix. These factorizations are used
-!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver, 
+!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a given level of a
 !  multilevel preconditioner.
 !
@@ -83,10 +83,10 @@
 !    Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition,
 !    SIAM, 2003, Chapter 10.
 !
-!  The local matrix is stored into a and blck, as specified in the description 
-!  of the arguments below. The storage format for both the L and U factors is CSR. 
-!  The diagonal of the U factor is stored separately (actually, the inverse of the 
-!  diagonal entries is stored; this is then managed in the solve stage associated 
+!  The local matrix is stored into a and blck, as specified in the description
+!  of the arguments below. The storage format for both the L and U factors is CSR.
+!  The diagonal of the U factor is stored separately (actually, the inverse of the
+!  diagonal entries is stored; this is then managed in the solve stage associated
 !  to the ILU(0)/MILU(0) factorization).
 !
 !  The routine copies and factors "on the fly" from a and blck into l (L factor),
@@ -94,7 +94,7 @@
 !
 !  This implementation of ILU(0)/MILU(0) is faster than the implementation in
 !  psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)).
-!  
+!
 !
 ! Arguments:
 !    ialg    -  integer, input.
@@ -121,7 +121,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                         
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_sspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -129,7 +129,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck is empty.
-!  
+!
 subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd)
 
   use psb_base_mod
@@ -157,30 +157,30 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(upd)) then 
-    upd_ = psb_toupper(upd) 
+  if (present(upd)) then
+    upd_ = psb_toupper(upd)
   else
     upd_ = 'F'
   end if
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -212,12 +212,12 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
     if(info.ne.0) then
@@ -226,7 +226,7 @@ subroutine psb_silu0_fact(ialg,a,l,u,d,info,blck, upd)
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
-    deallocate(blck_) 
+    deallocate(blck_)
   endif
 
   call psb_erractionrestore(err_act)
@@ -243,9 +243,9 @@ contains
   ! Version:    real
   ! Note: internal subroutine of psb_silu0_fact.
   !
-  !  This routine computes either the ILU(0) or the MILU(0) factorization of the 
-  !  diagonal blocks of a distributed matrix. 
-  !  These factorizations are used to build the 'base preconditioner' 
+  !  This routine computes either the ILU(0) or the MILU(0) factorization of the
+  !  diagonal blocks of a distributed matrix.
+  !  These factorizations are used to build the 'base preconditioner'
   !  (block-Jacobi preconditioner/solver, Additive Schwarz
   !  preconditioner) corresponding to a given level of a multilevel preconditioner.
   !
@@ -257,7 +257,7 @@ contains
   !
   !  The routine copies and factors "on the fly" from the sparse matrix structures a
   !  and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U).
-  !  
+  !
   !
   ! Arguments:
   !    ialg    -  integer, input.
@@ -296,7 +296,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -   real(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -305,18 +305,18 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output.
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output.
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_silu0_factint(ialg,a,b,&
        & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info)
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)     :: ialg
@@ -332,7 +332,7 @@ contains
     real(psb_spk_) :: dia,temp
     integer(psb_ipk_), parameter :: nrb=16
     type(psb_s_coo_sparse_mat) :: trw
-    integer(psb_ipk_)   :: int_err(5) 
+    integer(psb_ipk_)   :: int_err(5)
     character(len=20)   :: name, ch_err
 
     name='psb_silu0_factint'
@@ -346,10 +346,10 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione,ialg,izero,izero,izero/))
       goto 9999
     end select
@@ -363,7 +363,7 @@ contains
     end if
     m = ma+mb
 
-    if (psb_toupper(upd) == 'F' ) then 
+    if (psb_toupper(upd) == 'F' ) then
       lirp(1) = 1
       uirp(1) = 1
       l1      = 0
@@ -379,14 +379,14 @@ contains
         if (i <= ma) then
           !
           ! Copy the i-th local row of the matrix, stored in a,
-          ! into lval/d(i)/uval 
+          ! into lval/d(i)/uval
           !
           call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
         else
           !
           ! Copy the i-th local row of the matrix, stored in b
-          ! (as (i-ma)-th row), into lval/d(i)/uval 
+          ! (as (i-ma)-th row), into lval/d(i)/uval
           !
           call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
@@ -437,7 +437,7 @@ contains
               !
               dia = dia - temp*uval(jj)
               cycle updateloop
-              !     
+              !
             else if (j > i) then
               !
               ! search u(i,*) (i-th row of U) for a matching index j
@@ -454,23 +454,23 @@ contains
                 end if
               enddo
             end if
-            !     
-            ! If we get here we missed the cycle updateloop, which means 
+            !
+            ! If we get here we missed the cycle updateloop, which means
             ! that this entry does not match; thus we accumulate on the
             ! diagonal for MILU(0).
             !
-            if (ialg == psb_milu_n_) then 
+            if (ialg == psb_milu_n_) then
               dia = dia - temp*uval(jj)
             end if
           enddo updateloop
         enddo
-        !     
+        !
         ! Check the pivot size
-        !     
+        !
         if (abs(dia) < s_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') abs(dia)
@@ -493,7 +493,7 @@ contains
     else
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
@@ -515,7 +515,7 @@ contains
   ! Version:    real
   ! Note: internal subroutine of psb_silu0_fact
   !
-  !  This routine copies a row of a sparse matrix A, stored in the psb_sspmat_type 
+  !  This routine copies a row of a sparse matrix A, stored in the psb_sspmat_type
   !  data structure a, into the arrays lval and uval and into the scalar variable
   !  dia, corresponding to the lower and upper triangles of A and to the diagonal
   !  entry of the row, respectively. The entries in lval and uval are stored
@@ -529,14 +529,14 @@ contains
   !
   !  The routine is used by psb_silu0_factint in the computation of the ILU(0)/MILU(0)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !  TODO: modify the routine to allow copying into output L and U that are
   !  already filled with indices; this would allow computing an ILU(k) pattern,
-  !  then use the ILU(0) internal for subsequent calls with the same pattern. 
+  !  then use the ILU(0) internal for subsequent calls with the same pattern.
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from  the 
+  !               The local index of the row to be extracted from  the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -570,13 +570,13 @@ contains
   !               to the CSR storage format.
   !    uval   -  real(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
-  !               upper triangle are copied. 
+  !               upper triangle are copied.
   !    ktrw    -  integer, input/output.
   !               The index identifying the last entry taken from the
   !               staging buffer trw. See below.
   !    trw     -  type(psb_sspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -608,10 +608,10 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    if (psb_toupper(upd) == 'F') then 
+    if (psb_toupper(upd) == 'F') then
 
-      select type(aa => a%a) 
-      type is (psb_s_csr_sparse_mat) 
+      select type(aa => a%a)
+      type is (psb_s_csr_sparse_mat)
 
         !
         ! Take a fast shortcut if the matrix is stored in CSR format
@@ -633,19 +633,19 @@ contains
           end if
         enddo
 
-      class default 
+      class default
 
         !
-        ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+        ! Otherwise use psb_sp_getblk, slower but able (in principle) of
         ! handling any format. In this case, a block of rows is extracted
         ! instead of a single row, for performance reasons, and these
         ! rows are copied one by one into lval, dia, uval, through
         ! successive calls to ilu_copyin.
         !
 
-        if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+        if ((mod(i,nrb) == 1).or.(nrb == 1)) then
           irb = min(m-i+1,nrb)
-          call aa%csget(i,i+irb-1,trw,info) 
+          call aa%csget(i,i+irb-1,trw,info)
           if(info /= psb_success_) then
             info=psb_err_from_subroutine_
             ch_err='csget'
@@ -656,7 +656,7 @@ contains
         end if
 
         nz = trw%get_nzeros()
-        do 
+        do
           if (ktrw > nz) exit
           if (trw%ia(ktrw) > i) exit
           k = trw%ja(ktrw)
@@ -680,7 +680,7 @@ contains
 
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
diff --git a/prec/impl/psb_s_iluk_fact.f90 b/prec/impl/psb_s_iluk_fact.f90
index 4b9f1f3f..6129663b 100644
--- a/prec/impl/psb_s_iluk_fact.f90
+++ b/prec/impl/psb_s_iluk_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_siluk_fact.f90
 !
 ! Subroutine: psb_siluk_fact
@@ -74,7 +74,7 @@
 ! Contains:   psb_siluk_factint, iluk_copyin, iluk_fact, iluk_copyout.
 !
 !  This routine computes either the ILU(k) or the MILU(k) factorization of the
-!  diagonal blocks of a distributed matrix. These factorizations are used to 
+!  diagonal blocks of a distributed matrix. These factorizations are used to
 !  build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a certain level of a
 !  multilevel preconditioner.
@@ -88,7 +88,7 @@
 !  U factors is CSR. The diagonal of the U factor is stored separately (actually,
 !  the inverse of the diagonal entries is stored; this is then managed in the solve
 !  stage associated to the ILU(k)/MILU(k) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -118,7 +118,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_sspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -126,7 +126,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   use psb_base_mod
@@ -143,7 +143,7 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   real(psb_spk_), intent(inout)    ::  d(:)
   !     Local Variables
   integer(psb_ipk_)   :: l1, l2, m, err_act
-  
+
   type(psb_sspmat_type), pointer  :: blck_
   type(psb_s_csr_sparse_mat)      :: ll, uu
   character(len=20)   :: name, ch_err
@@ -152,17 +152,17 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') 
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
@@ -170,7 +170,7 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -203,15 +203,15 @@ subroutine psb_siluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -280,7 +280,7 @@ contains
   !               according to the CSR storage format.
   !    lia2    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in laspk, according to the CSR storage format. 
+  !               of the L factor in laspk, according to the CSR storage format.
   !    uval   -   real(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -289,12 +289,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in laspk.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_siluk_factint(fill_in,ialg,a,b,&
@@ -304,7 +304,7 @@ contains
 
     implicit none
 
-  ! Arguments 
+  ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in, ialg
     type(psb_sspmat_type),intent(in)              :: a,b
     integer(psb_ipk_),intent(inout)               :: l1,l2,info
@@ -330,14 +330,14 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name,&
            & i_err=(/itwo,ialg,izero,izero,izero/))
       goto 9999
     end select
-    if (fill_in < 0) then 
+    if (fill_in < 0) then
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name, &
            & i_err=(/ione,fill_in,izero,izero,izero/))
@@ -349,7 +349,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the iluk_copyin function 
+    ! Allocate a temporary buffer for the iluk_copyin function
     !
 
     call trw%allocate(izero,izero,ione)
@@ -361,7 +361,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -381,34 +381,34 @@ contains
     uplevs(:)  = m+1
     row(:)     = szero
     rowlevs(:) = -(m+1)
-    
+
     !
     ! Cycle over the matrix rows
     !
     do i = 1, m
-      
+
       !
       ! At each iteration of the loop we keep in a heap the column indices
       ! affected by the factorization. The heap is initialized and filled
       ! in the iluk_copyin routine, and updated during the elimination, in
       ! the iluk_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = szero
       if (i<=ma) then
         !
-        ! Copy into trw the i-th local row of the matrix, stored in a 
-        ! 
+        ! Copy into trw the i-th local row of the matrix, stored in a
+        !
         call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info)
       else
         !
         ! Copy into trw the i-th local row of the matrix, stored in b
-        ! (as (i-ma)-th row) 
-        ! 
+        ! (as (i-ma)-th row)
+        !
         call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info)
       endif
-      
+
       ! Do an elimination step on the current row. It turns out we only
       ! need to keep track of fill levels for the upper triangle, hence we
       ! do not have a lowlevs variable.
@@ -417,7 +417,7 @@ contains
            & d,uja,uirp,uval,uplevs,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info)
       if (info /= psb_success_) then
@@ -474,11 +474,11 @@ contains
   !
   !  This routine is used by psb_siluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -510,7 +510,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_sspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -521,8 +521,8 @@ contains
     use psb_base_mod
 
     implicit none
-  
-  ! Arguments 
+
+  ! Arguments
     type(psb_sspmat_type), intent(in)         :: a
     type(psb_s_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)           :: i,m,jmin,jmax
@@ -542,17 +542,17 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    call heap%init(info) 
+    call heap%init(info)
 
-    select type (aa=> a%a) 
-    type is (psb_s_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_s_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
-      
+
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = aa%val(j)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -562,14 +562,14 @@ contains
     class default
 
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to iluk_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -581,11 +581,11 @@ contains
         ktrw=1
       end if
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -674,10 +674,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i, fill_in
     integer(psb_ipk_), intent(inout)              :: nidx,info
     integer(psb_ipk_), intent(inout)              :: rowlevs(:)
@@ -690,7 +690,7 @@ contains
     real(psb_spk_)   :: rwk
 
     info = psb_success_
-    if (.not.allocated(idxs)) then 
+    if (.not.allocated(idxs)) then
       allocate(idxs(200),stat=info)
       if (info /= psb_success_) return
     endif
@@ -702,34 +702,34 @@ contains
     !
     do
       ! Beware: (iret < 0) means that the heap is empty, not an error.
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) return
 
-      ! 
+      !
       ! Just in case an index has been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       nidx = nidx + 1
-      if (nidx>size(idxs)) then 
+      if (nidx>size(idxs)) then
         call psb_realloc(nidx+psb_heap_resize,idxs,info)
         if (info /= psb_success_) return
       end if
       idxs(nidx) = k
-      
-      if ((row(k) /= szero).and.(rowlevs(k) <= fill_in).and.(k<i)) then 
+
+      if ((row(k) /= szero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
         !
         ! Note: since U is scaled while copying it out (see iluk_copyout),
         ! we can use rwk in the update below
-        ! 
+        !
         rwk    = row(k)
-        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)          
+        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)
         lrwk   = rowlevs(k)
-          
+
         do jj=uirp(k),uirp(k+1)-1
           j = uja(jj)
-          if (j<=k) then 
+          if (j<=k) then
             info = -i
             return
           endif
@@ -738,9 +738,9 @@ contains
           ! The fill levels are initialized to a negative value. If we find
           ! one, it means that it is an as yet untouched index, so we need
           ! to insert it; otherwise it is already on the heap, there is no
-          ! need to insert it more than once. 
+          ! need to insert it more than once.
           !
-          if (rowlevs(j)<0) then 
+          if (rowlevs(j)<0) then
             call heap%insert(j,info)
             if (info /= psb_success_) return
             rowlevs(j) = abs(rowlevs(j))
@@ -781,7 +781,7 @@ contains
   !
   !  This routine is used by psb_siluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -822,14 +822,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_siluk_factint), according to the CSR storage format.
   !    lval   -  real(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  real(psb_spk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_siluk_factint), according
@@ -850,7 +850,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                  :: fill_in, ialg, i, m, nidx
@@ -880,17 +880,17 @@ contains
       if (j<i) then
         !
         ! Copy the lower part of the row
-        !  
-        if (rowlevs(j) <= fill_in) then 
-          l1     = l1 + 1 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l1     = l1 + 1
           if (size(lval) < l1) then
-            ! 
+            !
             ! Figure out a good reallocation size!
-            ! 
+            !
             isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-            call psb_realloc(isz,lval,info) 
-            if (info == psb_success_) call psb_realloc(isz,lja,info) 
-            if (info /= psb_success_) then 
+            call psb_realloc(isz,lval,info)
+            if (info == psb_success_) call psb_realloc(isz,lja,info)
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -914,26 +914,26 @@ contains
         !
         ! Copy the diagonal entry of the row and re-initialize
         ! row(j) and rowlevs(j)
-        !  
-        d(i)       = d(i) + row(i) 
+        !
+        d(i)       = d(i) + row(i)
         row(i)     = szero
         rowlevs(i) = -(m+1)
 
-      else if (j>i) then 
+      else if (j>i) then
         !
         ! Copy the upper part of the row
-        ! 
-        if (rowlevs(j) <= fill_in) then 
-          l2     = l2 + 1 
-          if (size(uval) < l2) then 
-            ! 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l2     = l2 + 1
+          if (size(uval) < l2) then
+            !
             ! Figure out a good reallocation size!
             !
             isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-            call psb_realloc(isz,uval,info) 
-            if (info == psb_success_) call psb_realloc(isz,uja,info) 
+            call psb_realloc(isz,uval,info)
+            if (info == psb_success_) call psb_realloc(isz,uja,info)
             if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1))
-            if (info /= psb_success_) then 
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -941,11 +941,11 @@ contains
           end if
           uja(l2)   = j
           uval(l2)  = row(j)
-          uplevs(l2) = rowlevs(j) 
+          uplevs(l2) = rowlevs(j)
         else if (ialg == psb_milu_n_) then
           !
           ! MILU(k): add discarded entries to the diagonal one
-          ! 
+          !
           d(i) = d(i) + row(j)
         end if
         !
@@ -963,13 +963,13 @@ contains
     lirp(i+1) = l1 + 1
     uirp(i+1) = l2 + 1
 
-    !     
+    !
     ! Check the pivot size
     !
     if (abs(d(i)) < s_epstol) then
       !
       ! Too small pivot: unstable factorization
-      !     
+      !
       info = psb_err_pivot_too_small_
       int_err(1) = i
       write(ch_err,'(g20.10)') d(i)
diff --git a/prec/impl/psb_s_ilut_fact.f90 b/prec/impl/psb_s_ilut_fact.f90
index 33b4374c..43cacf41 100644
--- a/prec/impl/psb_s_ilut_fact.f90
+++ b/prec/impl/psb_s_ilut_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_silut_fact.f90
 !
 ! Subroutine: psb_silut_fact
@@ -87,7 +87,7 @@
 !  CSR. The diagonal of the U factor is stored separately (actually, the
 !  inverse of the diagonal entries is stored; this is then managed in the
 !  solve stage associated to the ILU(k,t) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -114,7 +114,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_sspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -122,9 +122,9 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
-  
+
   use psb_base_mod
   use psb_s_ilu_fact_mod, psb_protect_name => psb_silut_fact
 
@@ -141,7 +141,7 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   integer(psb_ipk_), intent(in), optional       :: iscale
   !     Local Variables
   integer(psb_ipk_)   ::  l1, l2, m, err_act, iscale_
-  
+
   type(psb_sspmat_type), pointer  :: blck_
   type(psb_s_csr_sparse_mat)       :: ll, uu
   real(psb_spk_)      :: scale
@@ -151,34 +151,34 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  if (fill_in < 0) then 
+  if (fill_in < 0) then
     info=psb_err_input_asize_invalid_i_
     call psb_errpush(info,name, &
          & i_err=(/ione,fill_in,izero,izero,izero/))
     goto 9999
   end if
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(iscale)) then 
+  if (present(iscale)) then
     iscale_ = iscale
   else
     iscale_ = psb_ilu_scale_none_
   end if
-  
-  select case(iscale_) 
+
+  select case(iscale_)
   case(psb_ilu_scale_none_)
     scale = sone
   case(psb_ilu_scale_maxval_)
@@ -189,10 +189,10 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
     call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/))
     goto 9999
   end select
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -225,15 +225,15 @@ subroutine psb_silut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -298,7 +298,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -  real(psb_spk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -307,12 +307,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_silut_factint(fill_in,thres,a,b,&
@@ -320,7 +320,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in
@@ -355,7 +355,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the ilut_copyin function 
+    ! Allocate a temporary buffer for the ilut_copyin function
     !
     call trw%allocate(izero,izero,ione)
     if (info == psb_success_) call psb_ensure_size(m+1,lirp,info)
@@ -366,7 +366,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -396,10 +396,10 @@ contains
       ! in the ilut_copyin function, and updated during the elimination, in
       ! the ilut_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = czero
-      if (i<=ma) then 
+      if (i<=ma) then
         call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,&
              & row,heap,ktrw,trw,info)
       else
@@ -414,7 +414,7 @@ contains
            & d,uja,uirp,uval,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,&
            & nlw,nup,jmaxup,nrmi,row,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info)
@@ -429,7 +429,7 @@ contains
     !
     ! Adjust diagonal accounting for scale factor
     !
-    if (weight /= sone) then 
+    if (weight /= sone) then
       d(1:m) = d(1:m)*weight
     end if
 
@@ -469,7 +469,7 @@ contains
   !  - storing into a heap the column indices of the nonzero entries of the copied
   !    row;
   !  - computing the column index of the first entry with maximum absolute value
-  !    in the part of the row belonging to the upper triangle;            
+  !    in the part of the row belonging to the upper triangle;
   !  - computing the 2-norm of the row.
   !  The output array row is such that it contains a full row of A, i.e. it contains
   !  also the zero entries of the row. This is useful for the elimination step
@@ -482,11 +482,11 @@ contains
   !
   !  This routine is used by psb_silut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -533,7 +533,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_sspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -542,7 +542,7 @@ contains
   subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,&
        & nrmi,weight,row,heap,ktrw,trw,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_sspmat_type), intent(in)         :: a
     type(psb_s_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)               :: i, m,jmin,jmax,jd
@@ -584,29 +584,29 @@ contains
     dmaxup = szero
     nrmi   = szero
 
-    select type (aa=> a%a) 
-    type is (psb_s_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_s_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
 
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
-          row(k)     = aa%val(j)*weight 
+        if ((jmin<=k).and.(k<=jmax)) then
+          row(k)     = aa%val(j)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
           end if
         end if
       end do
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         info=psb_err_from_subroutine_
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
@@ -617,16 +617,16 @@ contains
 
 
     class default
-      
+
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to ilut_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -639,18 +639,18 @@ contains
 
       kin = ktrw
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
@@ -734,10 +734,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i
     integer(psb_ipk_), intent(inout)              :: nidx,info
     real(psb_spk_), intent(in)                     :: thres,nrmi
@@ -753,23 +753,23 @@ contains
     call psb_ensure_size(200*ione,idxs,info)
     if (info /= psb_success_) return
     nidx  = 0
-    lastk = -1 
+    lastk = -1
     !
     ! Do while there are indices to be processed
     !
     do
 
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) exit
 
-      ! 
+      !
       ! An index may have been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       lowert: if (k<i)  then
- 
+
         !
         ! Dropping rule based on the threshold: compare the absolute
         ! value of each updated entry of row with thres * 2-norm of row.
@@ -777,7 +777,7 @@ contains
         rwk    = row(k)
         row(k) = row(k) * d(k)
         if (abs(row(k)) < thres*nrmi) then
-          ! 
+          !
           ! Drop the entry.
           !
           row(k) = czero
@@ -786,11 +786,11 @@ contains
           !
           ! Note: since U is scaled while copying it out (see ilut_copyout),
           ! we can use rwk in the update below.
-          !           
+          !
           do jj=uirp(k),uirp(k+1)-1
             j = uja(jj)
-            if (j<=k) then 
-              info = -i 
+            if (j<=k) then
+              info = -i
               return
             endif
             !
@@ -799,7 +799,7 @@ contains
             !
             row(j)     = row(j) - rwk * uval(jj)
             if (abs(row(j)) < thres*nrmi) then
-              ! 
+              !
               ! Drop the entry.
               !
               row(j) = czero
@@ -818,7 +818,7 @@ contains
       ! If we get here it is an index we need to keep on copyout.
       !
       nidx       = nidx + 1
-      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)      
+      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
       if (info /= psb_success_) return
       idxs(nidx) = k
 
@@ -852,7 +852,7 @@ contains
   !
   !  This routine is used by psb_silut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -895,14 +895,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_silut_factint), according to the CSR storage format.
   !    lval   -  real(psb_spk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  real(psb_spk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_silut_factint), according
@@ -921,7 +921,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                       :: fill_in,i,m,nidx,nlw,nup,jmaxup
@@ -950,22 +950,22 @@ contains
     end if
 
     !
-    ! Here we need to apply also the dropping rule base on the fill-in. 
+    ! Here we need to apply also the dropping rule base on the fill-in.
     ! We do it by putting into a heap the elements that are not dropped
-    ! by using the 2-norm rule, and then copying them out. 
+    ! by using the 2-norm rule, and then copying them out.
     !
     ! The heap goes down on the entry absolute value, so the first item
-    ! is the largest absolute value. 
+    ! is the largest absolute value.
     !
 
     call heap%init(info,dir=psb_asort_down_)
 
     if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info=psb_err_alloc_request_
       call psb_errpush(info,name,i_err=(/3*nidx,izero,izero,izero,izero/),&
            & a_err='real(psb_spk_)')
-      goto 9999      
+      goto 9999
     end if
 
     !
@@ -976,7 +976,7 @@ contains
     idxp = 0
 
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       if (idxs(idxp) >= i) exit
@@ -987,8 +987,8 @@ contains
       !
       if (abs(witem) < thres*nrmi) cycle
 
-      nz       = nz + 1 
-      xw(nz)   = witem 
+      nz       = nz + 1
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1000,9 +1000,9 @@ contains
 
     !
     ! Now we have to take out the first nlw+fill_in entries
-    ! 
+    !
     if (nz <= nlw+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw, and it is already ordered
       !
     else
@@ -1014,7 +1014,7 @@ contains
           call psb_errpush(info,name,a_err='psb_heap_get_first')
           goto 9999
         end if
-      
+
         xw(k)   = witem
         xwid(k) = widx
       end do
@@ -1029,15 +1029,15 @@ contains
     ! Copy out the lower part of the row
     !
     do k=1,nz
-      l1     = l1 + 1 
+      l1     = l1 + 1
       if (size(lval) < l1) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-        call psb_realloc(isz,lval,info) 
-        if (info == psb_success_) call psb_realloc(isz,lja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,lval,info)
+        if (info == psb_success_) call psb_realloc(isz,lja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
@@ -1046,25 +1046,25 @@ contains
       lja(l1)   = xwid(k)
       lval(l1)  = xw(indx(k))
     end do
-    
+
     !
     ! Make sure idxp points to the diagonal entry
     !
-    if (idxp <= size(idxs)) then 
-      if (idxs(idxp) < i) then 
-        do 
+    if (idxp <= size(idxs)) then
+      if (idxs(idxp) < i) then
+        do
           idxp = idxp + 1
           if (idxp > nidx) exit
           if (idxs(idxp) >= i) exit
         end do
       end if
     end if
-    if (idxp > size(idxs)) then 
+    if (idxp > size(idxs)) then
 !!$      write(0,*) 'Warning: missing diagonal element in the row '
     else
-      if (idxs(idxp) > i) then 
+      if (idxs(idxp) > i) then
 !!$        write(0,*) 'Warning: missing diagonal element in the row '
-      else if (idxs(idxp) /= i) then 
+      else if (idxs(idxp) /= i) then
 !!$        write(0,*) 'Warning: impossible error: diagonal has vanished'
       else
         !
@@ -1076,7 +1076,7 @@ contains
         if (abs(d(i)) < s_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') d(i)
@@ -1092,7 +1092,7 @@ contains
     end if
 
     !
-    ! Now the upper part 
+    ! Now the upper part
     !
 
     call heap%init(info,dir=psb_asort_down_)
@@ -1104,15 +1104,15 @@ contains
 
     nz       = 0
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       widx      = idxs(idxp)
-      if (widx <= i) then 
+      if (widx <= i) then
 !!$        write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
         cycle
       end if
-      if (widx > m) then 
+      if (widx > m) then
 !!$        write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp)
         cycle
       end if
@@ -1120,12 +1120,12 @@ contains
       !
       ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
       !
-      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then 
-        cycle 
+      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then
+        cycle
       end if
 
       nz       = nz + 1
-      xw(nz)   = witem 
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1133,7 +1133,7 @@ contains
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
       end if
-      
+
     end do
 
     !
@@ -1141,7 +1141,7 @@ contains
     ! we include entry jmaxup.
     !
     if (nz <= nup+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw
       !
       fndmaxup=.true.
@@ -1155,13 +1155,13 @@ contains
         if (widx == jmaxup) fndmaxup=.true.
       end do
     end if
-    if ((i<jmaxup).and.(jmaxup<=m)) then 
-      if (.not.fndmaxup) then 
-        ! 
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
         ! Include entry jmaxup, if it is not already there.
-        ! Put it in the place of the smallest coefficient. 
+        ! Put it in the place of the smallest coefficient.
         !
-        xw(nz)   = row(jmaxup) 
+        xw(nz)   = row(jmaxup)
         xwid(nz) = jmaxup
       endif
     end if
@@ -1175,15 +1175,15 @@ contains
     ! Copy out the upper part of the row
     !
     do k=1,nz
-      l2     = l2 + 1 
+      l2     = l2 + 1
       if (size(uval) < l2) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-        call psb_realloc(isz,uval,info) 
-        if (info == psb_success_) call psb_realloc(isz,uja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,uval,info)
+        if (info == psb_success_) call psb_realloc(isz,uja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
diff --git a/prec/impl/psb_s_invk_fact.f90 b/prec/impl/psb_s_invk_fact.f90
new file mode 100644
index 00000000..4e7bb755
--- /dev/null
+++ b/prec/impl/psb_s_invk_fact.f90
@@ -0,0 +1,495 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_s_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_s_invk_fact_mod, psb_protect_name =>  psb_s_invk_bld
+  use psb_s_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_sspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fill1, fill2
+  type(psb_sspmat_type), intent(inout)        :: lmat, umat
+  real(psb_spk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_sspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_)      :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_sspmat_type)  :: atmp
+  real(psb_spk_), allocatable :: pq(:), pd(:)
+  integer(psb_ipk_), allocatable :: uplevs(:)
+  integer(psb_ipk_)   :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  character(len=20)   :: name, ch_err
+
+
+  info = psb_success_
+  name='psb_sinvk_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+
+  call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+       !,uplevs=uplevs)
+  !call psb_siluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+
+  !
+  ! Compute the aprox U^-1  and L^-1
+  !
+  call psb_sparse_invk(n_row,umat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invk_bld
+
+subroutine psb_ssparse_invk(n,a,z,fill_in,info,inlevs)
+
+  use psb_base_mod
+  use psb_s_invk_fact_mod, psb_protect_name => psb_ssparse_invk
+
+  integer(psb_ipk_), intent(in)           :: n
+  type(psb_sspmat_type), intent(in)       :: a
+  type(psb_sspmat_type), intent(inout)    :: z
+  integer(psb_ipk_), intent(in)           :: fill_in
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: inlevs(:)
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), iz(:), jz(:)
+  real(psb_spk_), allocatable :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable        :: uplevs(:), rowlevs(:), idxs(:)
+  real(psb_spk_), allocatable :: row(:)
+  type(psb_s_coo_sparse_mat)  :: trw
+  type(psb_s_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx
+  type(psb_i_heap)            :: heap
+
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_sp_invk'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  allocate(uplevs(acsr%get_nzeros()),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='Allocate')
+    goto 9999
+  end if
+  uplevs(:)  = 0
+  row(:)     = szero
+  rowlevs(:) = -(n+1)
+
+  call zcsr%allocate(n,n,n*fill_in)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  call psb_ensure_size(n+1, idxs,  info)
+
+
+  !
+  !
+  zcsr%irp(1)  = 1
+  nzz          = 0
+
+  l2 = 0
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,&
+         & sign=-sone,inlevs=inlevs)
+    row(i)     = sone
+    rowlevs(i) = 0
+
+    ! Update loop
+    call psb_invk_inv(fill_in,i,row,rowlevs,heap,&
+         & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info)
+
+    call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,&
+         & l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = sone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+  call zcsr%set_sorted()
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_ssparse_invk
+
+subroutine psb_s_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs)
+
+  use psb_base_mod
+  use psb_s_invk_fact_mod, psb_protect_name => psb_s_invk_copyin
+
+  implicit none
+
+  ! Arguments
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+  integer(psb_ipk_), intent(inout)          :: ktrw,info
+  integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+  real(psb_spk_), intent(inout)             :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_spk_), optional, intent(in)      :: sign
+  integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+
+  ! Local variables
+  integer(psb_ipk_)             :: k,j,irb,err_act, nz
+  integer(psb_ipk_), parameter  :: nrb=16
+  real(psb_spk_)                :: sign_
+  character(len=20), parameter  :: name='invk_copyin'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+
+  if (present(sign)) then
+    sign_ = sign
+  else
+    sign_ = sone
+  end if
+
+
+  !
+  ! Take a fast shortcut if the matrix is stored in CSR format
+  !
+  if (present(inlevs)) then
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = inlevs(j)
+        call heap%insert(k,info)
+      end if
+    end do
+  else
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = 0
+        call heap%insert(k,info)
+      end if
+    end do
+  end if
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invk_copyin
+
+
+subroutine psb_s_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+     &  l2,uia1,uia2,uaspk,info)
+
+  use psb_base_mod
+  use psb_s_invk_fact_mod, psb_protect_name => psb_s_invk_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+  real(psb_spk_), allocatable, intent(inout)    :: uaspk(:)
+  real(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_)              :: j,isz,err_act,int_err(5),idxp
+  character(len=20), parameter  :: name='psb_siluk_factint'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  do idxp=1,nidx
+
+    j = idxs(idxp)
+
+    if (j>=i) then
+      !
+      ! Copy the upper part of the row
+      !
+      if (rowlevs(j) <= fill_in) then
+        l2     = l2 + 1
+        if (size(uaspk) < l2) then
+          !
+          ! Figure out a good reallocation size!
+          !
+          isz  = max(int(1.2*l2),l2+100)
+          call psb_realloc(isz,uaspk,info)
+          if (info == psb_success_) call psb_realloc(isz,uia1,info)
+          if (info /= psb_success_) then
+            info=psb_err_from_subroutine_
+            call psb_errpush(info,name,a_err='Allocate')
+            goto 9999
+          end if
+        end if
+        uia1(l2)   = j
+        uaspk(l2)  = row(j)
+      end if
+      !
+      ! Re-initialize row(j) and rowlevs(j)
+      !
+      row(j)     = szero
+      rowlevs(j) = -(m+1)
+    end if
+  end do
+
+  uia2(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invk_copyout
+
+subroutine psb_sinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,&
+     & nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_s_invk_fact_mod, psb_protect_name => psb_sinvk_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)               :: heap
+  integer(psb_ipk_), intent(in)                 :: i, fill_in
+  integer(psb_ipk_), intent(inout)              :: nidx,info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)                 :: ja(:),irp(:),uplevs(:)
+  real(psb_spk_), intent(in)                    :: val(:)
+  real(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret
+  real(psb_dpk_)    :: rwk
+
+
+  info = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+    ! Beware: (iret < 0) means that the heap is empty, not an error.
+    call heap%get_first(k,iret)
+    if (iret < 0) then
+!!$        write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk
+      return
+    end if
+
+    !
+    ! Just in case an index has been put on the heap more than once.
+    !
+    if (k == lastk) cycle
+
+    lastk = k
+    nidx = nidx + 1
+    if (nidx>size(idxs)) then
+      call psb_realloc(nidx+psb_heap_resize,idxs,info)
+      if (info /= psb_success_) return
+    end if
+    idxs(nidx) = k
+
+    if ((row(k) /= szero).and.(rowlevs(k) <= fill_in)) then
+      !
+      ! Note: since U is scaled while copying it out (see iluk_copyout),
+      ! we can use rwk in the update below
+      !
+      rwk    = row(k)
+      lrwk   = rowlevs(k)
+
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Insert the index into the heap for further processing.
+        ! The fill levels are initialized to a negative value. If we find
+        ! one, it means that it is an as yet untouched index, so we need
+        ! to insert it; otherwise it is already on the heap, there is no
+        ! need to insert it more than once.
+        !
+        if (rowlevs(j)<0) then
+          call heap%insert(j,info)
+          if (info /= psb_success_) return
+          rowlevs(j) = abs(rowlevs(j))
+        end if
+        !
+        ! Update row(j) and the corresponding fill level
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
+      end do
+
+    end if
+  end do
+
+end subroutine psb_sinvk_inv
diff --git a/prec/impl/psb_s_invt_fact.f90 b/prec/impl/psb_s_invt_fact.f90
new file mode 100644
index 00000000..f311860d
--- /dev/null
+++ b/prec/impl/psb_s_invt_fact.f90
@@ -0,0 +1,739 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_s_invt_bld(a,fillin,invfill,thresh,invthresh,&
+     & lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_s_invt_fact_mod, psb_protect_name =>  psb_s_invt_bld
+  use psb_s_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_sspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fillin,invfill
+  real(psb_spk_), intent(in)                :: thresh
+  real(psb_spk_), intent(in)                  :: invthresh
+  type(psb_sspmat_type), intent(inout)        :: lmat, umat
+  real(psb_spk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_sspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_sspmat_type)       :: atmp
+  real(psb_spk_), allocatable :: pq(:), pd(:), w(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  real(psb_spk_)    :: sp_thresh
+  character(len=20) :: name, ch_err, fname
+
+
+  info = psb_success_
+  name='psb_sinvt_fact'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),w(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+  if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,&
+       & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  if (.false.) then
+!!$    if (debug_level >= psb_debug_inner_) then
+    write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx'
+    call lmat%print(fname,head="INVTLOW")
+    write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx'
+    call umat%print(fname,head="INVTUPP")
+  end if
+
+  !
+  ! Compute the approx U^-1  and L^-1
+  !
+  nzrmax    = invfill
+  call psb_ssparse_invt(n_row,umat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_ssparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invt_bld
+
+subroutine psb_ssparse_invt(n,a,z,nzrmax,sp_thresh,info)
+
+  use psb_base_mod
+  use psb_s_invt_fact_mod, psb_protect_name => psb_ssparse_invt
+
+  implicit none
+  integer(psb_ipk_), intent(in)        :: n
+  type(psb_sspmat_type), intent(in)    :: a
+  type(psb_sspmat_type), intent(inout) :: z
+  integer(psb_ipk_), intent(in)        :: nzrmax
+  real(psb_spk_), intent(in)           :: sp_thresh
+  integer(psb_ipk_), intent(out)       :: info
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:)
+  real(psb_spk_), allocatable    :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:)
+  real(psb_spk_), allocatable :: row(:)
+  type(psb_s_coo_sparse_mat)  :: trw
+  type(psb_s_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx, nlw,nup,jmaxup
+  type(psb_i_heap)            :: heap
+  real(psb_spk_)     :: alpha, nrmi
+  character(len=20)  :: name='psb_sp_invt'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  call zcsr%allocate(n,n,n*nzrmax)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  !
+  !
+  nzz        = 0
+  row(:)     = szero
+  rowlevs(:) = 0
+  l2         = 0
+  zcsr%irp(1) = 1
+
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_s_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,&
+         & heap,rowlevs,ktrw,trw,info,sign=-sone)
+    if (info /= 0) exit
+    row(i) = sone
+    ! Adjust norm
+    if (nrmi < sone) then
+      nrmi = sqrt(sone + nrmi**2)
+    else
+      nrmi = nrmi*sqrt(sone+sone/(nrmi**2))
+    end if
+
+    call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,&
+         & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info)
+    if (info /= 0) exit
+!!$    write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2
+    call psb_s_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,&
+         & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+    if (info /= 0) exit
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = sone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_ssparse_invt
+
+subroutine psb_s_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+     & irwt,ktrw,trw,info,sign)
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin
+  implicit none
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+  integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+  integer(psb_ipk_), intent(inout)          :: irwt(:)
+  real(psb_spk_), intent(inout)          :: nrmi
+  real(psb_spk_), intent(inout)            :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_spk_), intent(in), optional    :: sign
+  !
+  integer(psb_ipk_)            :: k,j,irb,kin,nz, err_act
+  integer(psb_ipk_), parameter :: nrb=16
+  real(psb_dpk_)               :: dmaxup, sign_
+  real(psb_dpk_), external     :: dnrm2
+  character(len=20), parameter :: name='invt_copyin'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+  sign_ = sone
+  if (present(sign)) sign_ = sign
+  !
+  ! nrmi is the norm of the current sparse row (for the time being,
+  ! we use the 2-norm).
+  ! NOTE: the 2-norm below includes also elements that are outside
+  ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED.
+  !
+
+  nlw    = 0
+  nup    = 0
+  jmaxup = 0
+  dmaxup = szero
+  nrmi   = szero
+
+  do j = a%irp(i), a%irp(i+1) - 1
+    k = a%ja(j)
+    if ((jmin<=k).and.(k<=jmax)) then
+      row(k)     = sign_ * a%val(j)
+      call heap%insert(k,info)
+      irwt(k) = 1
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+    end if
+    if (k<jd) nlw = nlw + 1
+    if (k>jd) then
+      nup = nup + 1
+      if (abs(row(k))>dmaxup) then
+        jmaxup = k
+        dmaxup = abs(row(k))
+      end if
+    end if
+  end do
+  nz   = a%irp(i+1) - a%irp(i)
+  nrmi = dnrm2(nz,a%val(a%irp(i):),ione)
+
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invt_copyin
+
+subroutine psb_s_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+     & nidx,idxs,l2,ja,irp,val,info)
+
+  use psb_base_mod
+  use psb_s_invt_fact_mod, psb_protect_name => psb_s_invt_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+  integer(psb_ipk_), intent(in)                 :: idxs(:)
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+  real(psb_spk_), intent(in)                    :: thres,nrmi
+  real(psb_spk_),allocatable, intent(inout)     :: val(:)
+  real(psb_spk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  real(psb_dpk_),allocatable     :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  real(psb_dpk_)                 :: witem, wmin
+  integer(psb_ipk_)              :: widx
+  integer(psb_ipk_)              :: k,isz,err_act,int_err(5),idxp, nz
+  type(psb_d_idx_heap)           :: heap
+  character(len=20), parameter   :: name='invt_copyout'
+  character(len=20)              :: ch_err
+  logical                        :: fndmaxup
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  !
+  ! Here we need to apply also the dropping rule base on the fill-in.
+  ! We do it by putting into a heap the elements that are not dropped
+  ! by using the 2-norm rule, and then copying them out.
+  !
+  ! The heap goes down on the entry absolute value, so the first item
+  ! is the largest absolute value.
+  !
+!!$  write(0,*) 'invt_copyout ',nidx,nup+fill_in
+  call heap%init(info,dir=psb_asort_down_)
+
+  if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_alloc_request_
+    call psb_errpush(info,name,i_err=(/3*nidx/),&
+         & a_err='real(psb_dpk_)')
+    goto 9999
+  end if
+
+  !
+  ! First the lower part
+  !
+
+  nz   = 0
+  idxp = 0
+
+  do
+
+    idxp = idxp + 1
+    if (idxp > nidx) exit
+    if (idxs(idxp) >= i) exit
+    widx      = idxs(idxp)
+    witem     = row(widx)
+    !
+    ! Dropping rule based on the 2-norm
+    !
+    if (abs(witem) < thres*nrmi) cycle
+
+    nz       = nz + 1
+    xw(nz)   = witem
+    xwid(nz) = widx
+    call heap%insert(witem,widx,info)
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_insert_heap')
+      goto 9999
+    end if
+  end do
+
+  if (nz > 1) then
+    write(psb_err_unit,*) 'Warning: lower triangle from invt???? '
+  end if
+
+
+  if (idxp <= size(idxs)) then
+    if (idxs(idxp) < i) then
+      do
+        idxp = idxp + 1
+        if (idxp > nidx) exit
+        if (idxs(idxp) >= i) exit
+      end do
+    end if
+  end if
+  idxp = idxp - 1
+  nz   = 0
+  wmin=HUGE(wmin)
+  if (.false.) then
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if ((widx/=jmaxup).and.(nz > nup+fill_in)) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz <= nup+fill_in) then
+      !
+      ! Just copy everything from xw
+      !
+      fndmaxup=.true.
+    else
+      fndmaxup = .false.
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+        if (widx == jmaxup) fndmaxup=.true.
+      end do
+    end if
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
+        ! Include entry jmaxup, if it is not already there.
+        ! Put it in the place of the smallest coefficient.
+        !
+        xw(nz)   = row(jmaxup)
+        xwid(nz) = jmaxup
+      endif
+    end if
+
+  else if (.true.) then
+
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if (nz > nup+fill_in) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz >  nup+fill_in) then
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+      end do
+    end if
+  end if
+
+  !
+  ! Now we put things back into ascending column order
+  !
+  call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+
+  !
+  ! Copy out the upper part of the row
+  !
+  do k=1,nz
+    l2     = l2 + 1
+    if (size(val) < l2) then
+      !
+      ! Figure out a good reallocation size!
+      !
+      isz  = max(int(1.2*l2),l2+100)
+      call psb_realloc(isz,val,info)
+      if (info == psb_success_) call psb_realloc(isz,ja,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='Allocate')
+        goto 9999
+      end if
+    end if
+    ja(l2)   = xwid(k)
+    val(l2)  = xw(indx(k))
+  end do
+
+  !
+  ! Set row to zero
+  !
+  do idxp=1,nidx
+    row(idxs(idxp)) = szero
+  end do
+
+  irp(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_s_invt_copyout
+
+subroutine psb_s_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_s_invt_fact_mod, psb_protect_name => psb_s_invt_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)     :: heap
+  integer(psb_ipk_), intent(in)       :: i
+  integer(psb_ipk_), intent(inout)    :: nidx,info
+  integer(psb_ipk_), intent(inout)    :: irwt(:)
+  real(psb_spk_), intent(in)          :: thres,nrmi
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+  real(psb_spk_), intent(in)          :: val(:)
+  real(psb_spk_), intent(inout)       :: row(:)
+
+  ! Local Variables
+  integer(psb_ipk_) :: k,j,jj,lastk,iret
+  real(psb_dpk_)    :: rwk, alpha
+
+  info  = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+  irwt(i) = 1
+!!$  write(0,*) 'Drop Threshold ',thres*nrmi
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+
+    call heap%get_first(k,iret)
+    if (iret < 0)  exit
+
+    !
+    ! An index may have been put on the heap more than once.
+    ! Should not happen, but just in case.
+    !
+    if (k == lastk)  cycle
+    lastk = k
+
+    !
+    ! Dropping rule based on the threshold: compare the absolute
+    ! value of each updated entry of row with thres * 2-norm of row.
+    !
+    rwk    = row(k)
+
+    if (abs(rwk) < thres*nrmi) then
+      !
+      ! Drop the entry.
+      !
+      row(k)  = dzero
+      irwt(k) = 0
+      cycle
+    else
+      !
+      ! Note: since U is scaled while copying it out (see ilut_copyout),
+      ! we can use rwk in the update below.
+      !
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Update row(j) and, if it is not to be discarded, insert
+        ! its index into the heap for further processing.
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        if (irwt(j) == 0) then
+          if (abs(row(j)) < thres*nrmi) then
+            !
+            ! Drop the entry.
+            !
+            row(j)  = dzero
+          else
+            !
+            ! Do the insertion.
+            !
+            call heap%insert(j,info)
+            if (info /= psb_success_) return
+            irwt(j) = 1
+          end if
+        end if
+      end do
+    end if
+
+    !
+    ! If we get here it is an index we need to keep on copyout.
+    !
+
+    nidx       = nidx + 1
+    call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
+    if (info /= psb_success_) return
+    idxs(nidx) = k
+    irwt(k)    = 0
+  end do
+
+  irwt(i) = 0
+end subroutine psb_s_invt_inv
diff --git a/prec/impl/psb_s_prec_type_impl.f90 b/prec/impl/psb_s_prec_type_impl.f90
index 4379322b..4272ba75 100644
--- a/prec/impl/psb_s_prec_type_impl.f90
+++ b/prec/impl/psb_s_prec_type_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,14 +27,14 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-!   
+!
+!
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -46,7 +46,7 @@
 !!$    3. The name of the PSBLAS group or the names of its contributors may
 !!$       not be used to endorse or promote products derived from this
 !!$       software without specific written permission.
-!!$ 
+!!$
 !!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -58,14 +58,14 @@
 !!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
+!!$
+!!$
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_s_prec_type, psb_protect_name => psb_s_apply2_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_sprec_type), intent(inout) :: prec
   type(psb_s_vect_type),intent(inout)  :: x
@@ -74,7 +74,7 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional           :: trans
   real(psb_spk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   real(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -88,25 +88,25 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -115,13 +115,13 @@ subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work)
   call prec%prec%apply(sone,x,szero,y,desc_data,info,&
        & trans=trans_,work=work_)
 
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -136,7 +136,7 @@ end subroutine psb_s_apply2_vect
 subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work)
   use psb_base_mod
   use psb_s_prec_type, psb_protect_name => psb_s_apply1_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_sprec_type), intent(inout) :: prec
   type(psb_s_vect_type),intent(inout)  :: x
@@ -145,7 +145,7 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work)
   real(psb_spk_),intent(inout), optional, target :: work(:)
 
   type(psb_s_vect_type)       :: ww
-  character     :: trans_ 
+  character     :: trans_
   real(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -159,25 +159,25 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -188,13 +188,13 @@ subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work)
        & trans=trans_,work=work_)
   if (info == 0) call psb_geaxpby(sone,ww,szero,x,desc_data,info)
   call psb_gefree(ww,desc_data,info)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -209,7 +209,7 @@ end subroutine psb_s_apply1_vect
 subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_s_prec_type, psb_protect_name => psb_s_apply2v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_sprec_type), intent(inout) :: prec
   real(psb_spk_),intent(inout)   :: x(:)
@@ -218,7 +218,7 @@ subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional        :: trans
   real(psb_spk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   real(psb_spk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -232,37 +232,37 @@ subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=trans
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   call prec%prec%apply(sone,x,szero,y,desc_data,info,trans_,work=work_)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -277,7 +277,7 @@ end subroutine psb_s_apply2v
 subroutine psb_s_apply1v(prec,x,desc_data,info,trans)
   use psb_base_mod
   use psb_s_prec_type, psb_protect_name => psb_s_apply1v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_sprec_type), intent(inout) :: prec
   real(psb_spk_),intent(inout)   :: x(:)
@@ -293,36 +293,34 @@ subroutine psb_s_apply1v(prec,x,desc_data,info,trans)
   name='psb_s_apply1v'
   info = psb_success_
   call psb_erractionsave(err_act)
-
-
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   allocate(ww(size(x)),w1(size(x)),stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
   call prec%prec%apply(sone,x,szero,ww,desc_data,info,&
        & trans_,work=w1)
   if(info /= psb_success_) goto 9999
   x(:) = ww(:)
   deallocate(ww,W1,stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='DeAllocate')
-    goto 9999      
+    goto 9999
   end if
 
 
@@ -336,3 +334,159 @@ subroutine psb_s_apply1v(prec,x,desc_data,info,trans)
 
 end subroutine psb_s_apply1v
 
+subroutine psb_scprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_s_prec_type, psb_protect_name => psb_scprecseti
+  implicit none
+
+  class(psb_sprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  integer(psb_ipk_), intent(in)            :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precseti'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_FILLIN')
+      call prec%prec%precset(psb_ilu_fill_in_,val,info)
+    case ('INV_FILLIN')
+      call prec%prec%precset(psb_inv_fillin_,val,info)
+    case default
+      info = psb_err_invalid_args_combination_
+      write(psb_err_unit,*) name,&
+           & ': Error: uninitialized preconditioner,',&
+           &' should call prec%init'
+      return
+  end select
+
+end subroutine psb_scprecseti
+
+subroutine psb_scprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_s_prec_type, psb_protect_name => psb_scprecsetr
+  implicit none
+
+  class(psb_sprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  real(psb_spk_), intent(in)             :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetr'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+  case('SUB_ILUTHRS')
+    call prec%prec%precset(psb_fact_eps_,val,info)
+  case('INV_THRESH')
+    call prec%prec%precset(psb_inv_thresh_,val,info)
+  case default
+    info = psb_err_invalid_args_combination_
+    write(psb_err_unit,*) name,&
+         & ': Error: uninitialized preconditioner,',&
+         &' should call prec%init'
+    return
+  end select
+
+end subroutine psb_scprecsetr
+
+subroutine psb_scprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_s_prec_type, psb_protect_name => psb_scprecsetc
+  implicit none
+
+  class(psb_sprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  character(len=*), intent(in)             :: string
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetc'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_SOLVE')
+      ! We select here the type of solver on the block
+      select case (psb_toupper(trim(string)))
+        case("ILU")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+        case("ILUT")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info)
+        case("AINV")
+            call prec%prec%precset(psb_f_type_,psb_f_ainv_,info)
+        case("INVK")
+            call prec%prec%precset(psb_f_type_,psb_f_invk_,info)
+        case("INVT")
+            call prec%prec%precset(psb_f_type_,psb_f_invt_,info)
+        case default
+          ! Default to ILU(0) factorization
+          call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info)
+          call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+      end select
+    case ("ILU_ALG")
+      select case (psb_toupper(trim(string)))
+        case ("MILU")
+          call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info)
+        case default
+          ! Do nothing
+      end select
+    case ("ILUT_SCALE")
+      select case (psb_toupper(trim(string)))
+      case ("MAXVAL")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info)
+      case ("DIAG")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info)
+      case ("ARWSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info)
+      case ("ARCSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info)
+      case ("ACLSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info)
+      case ("NONE")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      case default
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      end select
+    case ("AINV_ALG")
+      select case (psb_toupper(trim(string)))
+      case("LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      case("SYM-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info)
+      case("STAB-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info)
+      case("MLK","LMX")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info)
+      case default
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      end select
+    case default
+
+  end select
+
+end subroutine psb_scprecsetc
diff --git a/prec/impl/psb_s_sp_drop.f90 b/prec/impl/psb_s_sp_drop.f90
new file mode 100644
index 00000000..bc297d08
--- /dev/null
+++ b/prec/impl/psb_s_sp_drop.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_s_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+  use psb_base_mod
+  implicit none
+  real(psb_spk_), intent(in)       :: sp_thresh
+  integer(psb_ipk_), intent(in)    :: idiag, nzrmax
+  integer(psb_ipk_), intent(inout) :: nz
+  integer(psb_ipk_), intent(inout) :: iz(:)
+  real(psb_spk_), intent(inout)    :: valz(:)
+  integer(psb_ipk_), intent(out)   :: info
+  !
+  integer(psb_ipk_)              :: i, j, idf, nw
+  real(psb_spk_)                 :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_spk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+
+  if (nz > min(size(iz),size(valz))) then
+    write(0,*) 'Serious size problem ',nz,size(iz),size(valz)
+    info = -2
+    return
+  end if
+  allocate(xw(nz),xwid(nz),indx(nz),stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info
+    return
+  endif
+
+  ! Always keep the diagonal element
+  idf = -1
+  do i=1, nz
+    if (iz(i) == idiag) then
+      idf     = i
+      witem   = valz(i)
+      widx    = iz(i)
+      valz(i) = valz(1)
+      iz(i)   = iz(1)
+      valz(1) = witem
+      iz(1)   = widx
+      exit
+    end if
+  end do
+
+  if (idf == -1) then
+
+    xw(1:nz) = valz(1:nz)
+    call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_)
+    do i=1, nz
+      xwid(i) = iz(indx(i))
+    end do
+    nw = min(nz,nzrmax)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+    nw = max(nw, 1)
+
+  else
+
+    nw = nz-1
+
+    xw(1:nw) = valz(2:nz)
+
+    call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_)
+    nw = min(nw,nzrmax-1)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+
+    do i=1, nw
+      xwid(i) = iz(1+indx(i))
+    end do
+    nw       = nw + 1
+    xw(nw)   = valz(1)
+    xwid(nw) = iz(1)
+  end if
+
+  call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_)
+
+  do i=1, nw
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+  end do
+  nz = nw
+  if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz
+  deallocate(xw,xwid,indx,stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info
+    return
+  endif
+  return
+end subroutine psb_s_sp_drop
diff --git a/prec/impl/psb_s_sparsify.f90 b/prec/impl/psb_s_sparsify.f90
new file mode 100644
index 00000000..f829fbf2
--- /dev/null
+++ b/prec/impl/psb_s_sparsify.f90
@@ -0,0 +1,260 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_s_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr)
+  use psb_base_mod
+  implicit none
+
+  real(psb_spk_), intent(in)              :: sp_thresh
+  integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+  real(psb_spk_), intent(inout)           :: zw(:)
+  integer(psb_ipk_), intent(out)          :: nz
+  integer(psb_ipk_), intent(out)          :: iz(:)
+  real(psb_spk_), intent(out)             :: valz(:)
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: istart
+  type(psb_i_heap), optional              :: iheap
+  integer(psb_ipk_), optional             :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k
+  real(psb_spk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_spk_), allocatable   :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  type(psb_s_idx_heap)         :: heap
+
+
+  info = psb_success_
+  istart_ = 1
+  if (present(istart)) istart_ = max(1,istart)
+  if (.false.) then
+    nz = 0
+    do i=istart_, n
+      if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then
+        nz       = nz + 1
+        iz(nz)   = i
+        valz(nz) = zw(i)
+      end if
+    end do
+
+  else
+    allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info)
+    if (info /= psb_success_) then
+      return
+    end if
+
+    call heap%init(info,dir=psb_asort_down_)
+
+    ! Keep at least the diagonal
+    nz = 0
+
+    if (present(iheap)) then
+      if (.not.(present(ikr))) then
+        write(psb_err_unit,*) 'Error: if IHEAP then also IKR'
+        info = -1
+        return
+      end if
+      last_i = -1
+      do
+        call iheap%get_first(i,iret)
+        if (iret < 0) exit
+        ! An index may have been put on the heap more than once.
+        if (i == last_i) cycle
+        last_i = i
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i)  = dzero
+        ikr(i) = 0
+      end do
+
+    else
+
+      do i=istart_, n
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i) = dzero
+      end do
+    end if
+
+    k = 1
+    do
+      if (k == nzrmax) exit
+      call heap%get_first(witem,widx,info)
+      if (info == -1) then
+        info = psb_success_
+        exit
+      endif
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        return
+      end if
+      k = k + 1
+      xw(k)   = witem
+      xwid(k) = widx
+    end do
+    call heap%free(info)
+    nz = k
+    call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+!!$    write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh
+    do i=1, nz
+      valz(i) = xw(indx(i))
+      iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+    end do
+
+  end if
+
+  return
+
+end subroutine psb_s_sparsify
+
+
+subroutine psb_s_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+  use psb_base_mod
+  implicit none
+
+  real(psb_spk_), intent(in)      :: sp_thresh
+  integer(psb_ipk_), intent(in)     :: idiag, n, nzrmax
+  real(psb_spk_), intent(inout)    :: zw(:)
+  integer(psb_ipk_), intent(out)    :: nz
+  integer(psb_ipk_), intent(out)    :: iz(:)
+  real(psb_spk_), intent(out)       :: valz(:)
+  integer(psb_ipk_), intent(out)    :: info
+  integer(psb_ipk_), intent(inout)  :: lhead, listv(:)
+  integer(psb_ipk_)                 :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k,current, next
+  real(psb_spk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  real(psb_spk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+  istart_ = 1
+  allocate(xw(n),xwid(n),indx(n),stat=info)
+
+  current = lhead
+  lhead = -1
+  i = 0
+  do while (current >0)
+    i = i + 1
+    xw(i)   = zw(current)
+    xwid(i) = current
+
+    if (current == idiag) then
+      ! Bring the diagona into first position
+      witem = xw(1)
+      widx  = xwid(1)
+      xw(1)   = xw(i)
+      xwid(1) = xwid(i)
+      xw(i)   = witem
+      xwid(i) = widx
+    end if
+
+    zw(current)  = dzero
+    ikr(current) = 0
+
+    next = listv(current)
+    listv(current) = -1
+    current = next
+  end do
+  nz = i
+  if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),&
+       & dir=psb_asort_down_,flag=psb_sort_keep_idx_)
+!!$  write(0,*) 'Done first msort '
+!!$  write(0,*) '   after first msort for idiag ',idiag,' :',nz,sp_thresh
+!!$  do i=1, nz
+!!$    write(0,*) '         ',xwid(i),xw(i)
+!!$  end do
+
+  i = 2
+  do while (i<=nz)
+    if (abs(xw(i)) < sp_thresh) exit
+    i = i + 1
+  end do
+!!$  write(0,*) 'NZ ',nz, i, nzrmax
+  nz = max(1,min(i-1,nzrmax))
+  call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_)
+!!$  write(0,*) 'Done second msort '
+
+!!$  write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh
+  do i=1, nz
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+  end do
+
+  return
+
+end subroutine psb_s_sparsify_list
diff --git a/prec/impl/psb_srwclip.f90 b/prec/impl/psb_srwclip.f90
new file mode 100644
index 00000000..f57207d7
--- /dev/null
+++ b/prec/impl/psb_srwclip.f90
@@ -0,0 +1,90 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_s_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+  use psb_base_mod
+
+  implicit none
+  integer(psb_ipk_), intent(inout)  :: nz
+  integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+  real(psb_spk_), intent(inout)    :: val(*)
+  integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+
+  integer(psb_ipk_) :: i,j
+
+  j = 0
+  do i=1, nz
+    if ((imin <= ia(i)).and.&
+         & (ia(i) <= imax).and.&
+         & (jmin <= ja(i)).and.&
+         & (ja(i) <= jmax) ) then
+      j = j + 1
+      ia(j) = ia(i)
+      ja(j) = ja(i)
+      val(j) = val(i)
+    end if
+  end do
+  nz = j
+end subroutine psb_s_rwclip
diff --git a/prec/impl/psb_ssparse_biconjg_llk.F90 b/prec/impl/psb_ssparse_biconjg_llk.F90
new file mode 100644
index 00000000..6269cdc8
--- /dev/null
+++ b/prec/impl/psb_ssparse_biconjg_llk.F90
@@ -0,0 +1,366 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_ssparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_s_ainv_tools_mod
+  use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_llk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  real(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_spk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       & ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_s_csc_sparse_mat) :: ac
+  real(psb_spk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = szero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = szero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = sone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = sone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+!!$        write(0,*) i, ' Outer inserting ',ac%ia(j)
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! ! write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+!!$                write(0,*) j,i,' Inner inserting ',nextj
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_ssparse_biconjg_llk
diff --git a/prec/impl/psb_ssparse_biconjg_llk_noth.F90 b/prec/impl/psb_ssparse_biconjg_llk_noth.F90
new file mode 100644
index 00000000..0683750a
--- /dev/null
+++ b/prec/impl/psb_ssparse_biconjg_llk_noth.F90
@@ -0,0 +1,362 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_ssparse_biconjg_llk_noth(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_s_ainv_tools_mod
+  use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_llk_noth
+
+  !
+  ! Left-looking variant, with NO drop rule on p(i)/p(j)
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)               :: n
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)               :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  real(psb_spk_), intent(out)                :: p(:)
+  integer(psb_ipk_), intent(out)              :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_spk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_s_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk_noth'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = szero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = szero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.true.) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.true.) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_ssparse_biconjg_llk_noth
diff --git a/prec/impl/psb_ssparse_biconjg_mlk.F90 b/prec/impl/psb_ssparse_biconjg_mlk.F90
new file mode 100644
index 00000000..7fc2db48
--- /dev/null
+++ b/prec/impl/psb_ssparse_biconjg_mlk.F90
@@ -0,0 +1,501 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_ssparse_biconjg_mlk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_s_ainv_tools_mod
+  use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_mlk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  real(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:)
+  real(psb_spk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_s_csc_sparse_mat) :: ac
+  real(psb_spk_)     :: alpha
+  character(len=20)  :: name='psb_biconjg_mlk'
+  logical, parameter :: debug=.false., test_merge=.true.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), &
+       & hlist(n),rwlist(n),bfr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i)  = 0
+    izcr(i)  = 0
+    zval(i)  = szero
+    hlist(i) = -1
+    rwlist(i) = -1
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = szero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = sone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = sone
+  nzw       = 1
+
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    rwhead  = i
+
+    hlhead = -1
+
+    kkc = 0
+    ilst  = ac%icp(i)-1
+    ifrst = ac%icp(i)
+    do j = ac%icp(i+1)-1, ac%icp(i), -1
+      if (ac%ia(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (.true..or.debug) then
+!!$      write(0,*) 'Outer Before insertion  : ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+    if (kkc > 0) then
+!!$      write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      mj      = hlhead
+      if (mj > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outer
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+
+      if (debug) write(0,*) 'update loop, using row: ',j,i,mj
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+!!$      write(0,*) '   Current list is : ',hlhead
+!!$      call printlist(hlhead,hlist)
+!!$
+
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=z%icp(j)
+        ilst=z%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst))
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+!!$            call hlmerge(rwhead,rwlist,(/kr/))
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = ac%icp(kr)-1
+            ifrst = ac%icp(kr+1)
+            kkc = 0
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              if ((ac%ia(kc) < i).and.(ac%ia(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+            end if
+            if (ilst >= ifrst) then
+!!$              write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst)
+              call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+            end if
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    rwhead  = i
+    hlhead = -1
+
+    kkc = 0
+    ilst  = a%irp(i)-1
+    ifrst = a%irp(i)
+    do j = a%irp(i+1)-1, a%irp(i), -1
+      if (a%ja(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (debug) then
+      write(0,*) 'Outer Before insertion: '
+      call printlist(hlhead,hlist)
+      write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst)
+    end if
+    if (kkc > 0 ) then
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      mj      = hlhead
+      if (hlhead > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outerw
+
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=w%icp(j)
+        ilst=w%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst))
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = a%irp(kr)-1
+            ifrst = a%irp(kr+1)
+            kkc = 0
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              if ((a%ja(kc) < i).and.(a%ja(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst)
+            end if
+
+            call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+contains
+
+  subroutine hlmerge(head,listv,vals)
+    integer(psb_ipk_), intent(inout) :: head, listv(:)
+    integer(psb_ipk_), intent(in)    :: vals(:)
+    integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph
+
+    nv  = size(vals)
+    lh  = head
+    flh = -1
+    lv  = 1
+    if ((head < 0).and.(nv > 0)) then
+      ! Adjust head if empty
+      head = vals(lv)
+      lv = lv + 1
+    else if ((head > 0) .and. (nv >0)) then
+      ! Adjust head if first item less than it
+      if (head > vals(lv)) then
+        listv(vals(lv)) = head
+        head = vals(lv)
+        lv   = lv + 1
+      end if
+    end if
+
+    lh = head
+    ph = lh
+    do while ((lh > 0) .and. (lv <= nv))
+      if (lh == vals(lv)) then
+        lv = lv + 1
+      else if (lh > vals(lv)) then
+        listv(vals(lv)) = lh
+        listv(ph) = vals(lv)
+        lh = vals(lv)
+        lv = lv + 1
+      else
+        ph = lh
+        lh = listv(lh)
+      end if
+    end do
+    lh = ph
+    do while (lv <= nv)
+      listv(lh) = vals(lv)
+      lh = listv(lh)
+      lv = lv + 1
+    end do
+  end subroutine hlmerge
+
+
+  subroutine printlist(head,listv)
+    integer(psb_ipk_), intent(in) :: head, listv(:)
+    integer(psb_ipk_) :: li
+
+    li = head
+    do while (li > 0)
+      write(0,*) 'Item: ', li
+      li = listv(li)
+    end do
+  end subroutine printlist
+
+end subroutine psb_ssparse_biconjg_mlk
diff --git a/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90
new file mode 100644
index 00000000..e8287e84
--- /dev/null
+++ b/prec/impl/psb_ssparse_biconjg_s_ft_llk.F90
@@ -0,0 +1,414 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_ssparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_s_ainv_tools_mod
+  use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_s_ft_llk
+
+  !
+  ! Left-looking variant, stabilized i.e. product by A is applied
+  ! to compute the diagonal elements.
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  real(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:)
+  real(psb_spk_), allocatable    :: zval(:),val(:), q(:),  ww(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, nzrw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_s_csc_sparse_mat) :: ac
+  real(psb_spk_)     :: alpha, tmpq,tmpq2
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! Init pointers to:
+  !  ljr(i): last occupied column index within row  I
+  !  izcr(i): first occupied row index within column I
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = szero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = szero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = sone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = sone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j)<i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+      izcr(j) = 0
+      if (j>=i) exit outer
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = w%icp(j)
+      ip2 = w%icp(j+1) - 1
+      nzra = max(0,ip2 - ip1 + 1)
+      nzww = 0
+      call psb_s_spvspm(sone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),&
+           & szero,nzww,iww,ww,info)
+
+      p(i) =  psb_spge_dot(nzww,iww,ww,zval)
+
+      ipz1 = z%icp(j)
+      ipz2 = z%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-p(i)/p(j))
+!!$        write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outer
+
+    if (.false.) then
+      ! We can't do the proper thing until we have bot Z_i and W_i.
+      call a%csget(i,i,nzra,ia,ja,val,info)
+      call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+      p(i) = psb_spge_dot(nzra,ja,val,zval)
+      if (abs(p(i)) < s_epstol) &
+           & p(i) = 1.d-3
+    end if
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+!!$      write(0,*) 'Inserting into heap ',i
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j)<i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) exit outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      if (.false.) then
+        ip1 = ac%icp(j)
+        ip2 = ac%icp(j+1) - 1
+        do
+          if (ip2 < ip1 ) exit
+          if (ac%ia(ip2) <= n) exit
+          ip2 = ip2 -1
+        end do
+        nzra = max(0,ip2 - ip1 + 1)
+        q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+        ! !$          write(psb_err_unit,*) j,i,p(i)
+      else
+        ip1 = z%icp(j)
+        ip2 = z%icp(j+1) - 1
+        nzra = max(0,ip2 - ip1 + 1)
+        nzww = 0
+        call psb_s_spmspv(sone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),&
+             & szero,nzww,iww,ww,info)
+
+        q(i) =  psb_spge_dot(nzww,iww,ww,zval)
+      end if
+
+      ipz1 = w%icp(j)
+      ipz2 = w%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-q(i)/q(j))
+!!$        write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outerw
+
+!!$      ip1 = ac%icp(i)
+!!$      ip2 = ac%icp(i+1) - 1
+!!$      do
+!!$        if (ip2 < ip1 ) exit
+!!$        if (ac%ia(ip2) <= n) exit
+!!$        ip2 = ip2 -1
+!!$      end do
+!!$      nzra = max(0,ip2 - ip1 + 1)
+!!$
+!!$      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+!!$      if (abs(q(i)) < s_epstol) &
+!!$           & q(i) = 1.d-3
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrw,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrw, w%ia,  info)
+    call psb_ensure_size(nzw+nzrw, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrw
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrw
+    nzw        = nzw + nzrw
+
+!!$      !
+!!$      ! Ok, now compute w_i^T A z_i
+!!$      !
+    nzww = 0
+    nzrz = z%icp(i+1)-z%icp(i)
+    ipz1 = z%icp(i)
+    call psb_s_spmspv(sone,ac,&
+         & nzrz,z%ia(ipz1:ipz1+nzrz-1),z%val(ipz1:ipz1+nzrz-1),&
+         & szero,nzww,iww,ww,info)
+    tmpq  = psb_spdot_srtd(nzww,iww,ww,nzrw,ia,val)
+    q(i) = tmpq
+!    if (tmpq <0) then
+!!$        write(0,*) 'On negative dot prod at ',i
+!!$        write(0,*) 'On negative dot prod a ',ia(1:nzrw),val(1:nzrw)
+!!$        write(0,*) 'On negative dot prod w ',iww(1:nzww),ww(1:nzww)
+!!$        ip1 = ac%icp(i)
+!!$        ip2 = ac%icp(i+1) - 1
+!!$        do
+!!$          if (ip2 < ip1 ) exit
+!!$          if (ac%ia(ip2) <= n) exit
+!!$          ip2 = ip2 -1
+!!$        end do
+!!$        nzra = max(0,ip2 - ip1 + 1)
+!!$        write(0,*) 'On negative dot prod a ',ac%ia(ip1:ip2),ac%val(ip1:ip2)
+!
+!    end if
+
+    if (abs(q(i)) < s_epstol) &
+         & q(i) = 1.d-3
+    p(i) = q(i)
+
+  end do
+
+end subroutine psb_ssparse_biconjg_s_ft_llk
diff --git a/prec/impl/psb_ssparse_biconjg_s_llk.F90 b/prec/impl/psb_ssparse_biconjg_s_llk.F90
new file mode 100644
index 00000000..42e71089
--- /dev/null
+++ b/prec/impl/psb_ssparse_biconjg_s_llk.F90
@@ -0,0 +1,248 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_ssparse_biconjg_s_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_s_ainv_tools_mod
+  use psb_s_biconjg_mod, psb_protect_name => psb_ssparse_biconjg_s_llk
+
+  !
+  ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned!
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_s_csr_sparse_mat), intent(in)    :: a
+  type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_spk_), intent(in)                :: sp_thresh
+  real(psb_spk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:)
+  real(psb_spk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw,kk
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_s_csc_sparse_mat) :: ac
+  real(psb_spk_)     :: alpha, zvalmax
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = szero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = szero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < s_epstol) &
+       & p(1) = 1.d-3
+
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1) = 1
+  z%icp(2) = 2
+  z%ia(1)  = 1
+  z%val(1) = sone
+  nzz      = 1
+  zvalmax  = sone
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = szero
+    ! !$        end do
+    zval(i)  = sone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+!!$          if (abs(zval(kr)) > 1e16) then
+!!$            write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr)
+!!$          end if
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+!!$    if ((1761<=i).and.(i<=1780)) then
+!!$      write(0,*) 'Dot product terms at ',i,nzra
+!!$      do kk=1,nzra
+!!$        write(0,*) kk,ja(kk),val(kk),zval(ja(kk))
+!!$      end do
+!!$    end if
+
+    if (abs(p(i)) < s_epstol) &
+         & p(i) = 1.d-3
+
+    ! !$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+!!$      zvalmax = max(zvalmax,abs(val(j)))
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+!!$    write(0,*) ' Dot: ',i,p(i),zvalmax
+
+  end do
+
+  call z%cp_to_fmt(w,info)
+
+end subroutine psb_ssparse_biconjg_s_llk
diff --git a/prec/impl/psb_z_ainv_bld.f90 b/prec/impl/psb_z_ainv_bld.f90
new file mode 100644
index 00000000..4762a5f7
--- /dev/null
+++ b/prec/impl/psb_z_ainv_bld.f90
@@ -0,0 +1,225 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+subroutine psb_z_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+
+  use psb_base_mod
+  use psb_prec_const_mod
+  use psb_z_biconjg_mod
+
+  implicit none
+
+  ! Arguments
+  type(psb_zspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)                 :: fillin,alg
+  real(psb_dpk_), intent(in)                  :: thresh
+  type(psb_zspmat_type), intent(inout)        :: wmat, zmat
+  complex(psb_dpk_), allocatable                  :: d(:)
+  Type(psb_desc_type), Intent(in)               :: desc
+  integer(psb_ipk_), intent(out)                :: info
+  type(psb_zspmat_type), intent(in), optional :: blck
+  integer(psb_ipk_), intent(in), optional       :: iscale
+  !
+  integer(psb_ipk_)             :: i, nztota, err_act, n_row, nrow_a
+  type(psb_z_coo_sparse_mat)  :: acoo
+  type(psb_z_csr_sparse_mat)  :: acsr
+  type(psb_zspmat_type)       :: atmp
+  real(psb_dpk_), allocatable :: arws(:), acls(:)
+  complex(psb_dpk_), allocatable  :: pq(:), ad(:)
+  integer(psb_ipk_)             :: debug_level, debug_unit
+  type(psb_ctxt_type)           :: ctxt
+  integer(psb_ipk_)             :: np,me
+  integer(psb_ipk_)             :: nzrmax, iscale_
+  real(psb_dpk_)              :: sp_thresh
+  complex(psb_dpk_)               :: weight
+  character(len=20)             :: name, ch_err
+
+
+  info = psb_success_
+  name = 'psb_zainv_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  iscale_ = psb_ilu_scale_none_
+  if (present(iscale)) iscale_ = iscale
+  weight = zone
+  !
+  ! Check the memory available to hold the W and Z factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = desc%get_local_rows()
+  allocate(pq(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  !
+  ! Ok, let's start first with Z (i.e. Upper)
+  !
+  call a%csclip(acoo,info,imax=n_row,jmax=n_row)
+  call acsr%mv_from_coo(acoo,info)
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+
+  case(psb_ilu_scale_maxval_)
+    weight = acsr%maxval()
+    weight = zone/weight
+    call acsr%scal(weight,info)
+
+  case(psb_ilu_scale_arcsum_)
+    allocate(arws(n_row),acls(n_row),ad(n_row),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    end if
+    call acsr%arwsum(arws)
+    call acsr%aclsum(acls)
+    ad(1:n_row) = sqrt(sqrt(arws(1:n_row)*acls(1:n_row)))
+    ad(1:n_row) = zone/ad(1:n_row)
+    call acsr%scal(ad,info,side='L')
+    call acsr%scal(ad,info,side='R')
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  !
+  ! Here for the actual workhorses.
+  ! Only biconjg is surviving for now....
+  !
+    call psb_sparse_biconjg(alg,n_row,acsr,pq,&
+         &   zmat,wmat,nzrmax,sp_thresh,info)
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='sparse_biconjg')
+    goto 9999
+  end if
+  call atmp%mv_from(acsr)
+
+  !
+  ! Is this right???
+  !
+  do i=1, n_row
+    if (abs(pq(i)) < d_epstol) then
+      pq(i) = zone
+    else
+      pq(i) = zone/pq(i)
+    end if
+  end do
+
+  select case(iscale_)
+  case(psb_ilu_scale_none_)
+    ! Ok, do nothing.
+  case(psb_ilu_scale_maxval_)
+    pq(:) = pq(:)*weight
+
+  case(psb_ilu_scale_arcsum_)
+    call zmat%scal(ad,info,side='L')
+    call wmat%scal(ad,info,side='R')
+
+  case default
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='wrong iscale')
+    goto 9999
+  end select
+
+  call psb_move_alloc(pq,d,info)
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_ainv_bld
diff --git a/prec/impl/psb_z_bjacprec_impl.f90 b/prec/impl/psb_z_bjacprec_impl.f90
index 70e062a5..3533f1e3 100644
--- a/prec/impl/psb_z_bjacprec_impl.f90
+++ b/prec/impl/psb_z_bjacprec_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,13 +27,13 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 
 subroutine psb_z_bjac_dump(prec,info,prefix,head)
   use psb_base_mod
   use psb_z_bjacprec, psb_protect_name => psb_z_bjac_dump
-  implicit none 
+  implicit none
   class(psb_z_bjac_prec_type), intent(in) :: prec
   integer(psb_ipk_), intent(out)                    :: info
   character(len=*), intent(in), optional  :: prefix,head
@@ -43,13 +43,13 @@ subroutine psb_z_bjac_dump(prec,info,prefix,head)
   character(len=80)  :: prefix_
   character(len=120) :: fname ! len should be at least 20 more than
 
-  !  len of prefix_ 
+  !  len of prefix_
 
   info = 0
   ctxt = prec%get_ctxt()
   call psb_info(ctxt,iam,np)
 
-  if (present(prefix)) then 
+  if (present(prefix)) then
     prefix_ = trim(prefix(1:min(len(prefix),len(prefix_))))
   else
     prefix_ = "dump_fact_z"
@@ -74,7 +74,7 @@ end subroutine psb_z_bjac_dump
 subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_z_bjacprec, psb_protect_name => psb_z_bjac_apply_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_z_bjac_prec_type), intent(inout)  :: prec
   complex(psb_dpk_),intent(in)         :: alpha,beta
@@ -118,12 +118,12 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (x%get_nrows() < n_row) then 
+  if (x%get_nrows() < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (y%get_nrows() < n_row) then 
+  if (y%get_nrows() < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -140,9 +140,9 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
@@ -152,19 +152,19 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     allocate(ww(n_col),aux(4*n_col),stat=info)
   endif
 
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
 
   do_alloc_wrk = .not.prec%is_allocated_wrk()
   if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v)
 
   associate (wv => prec%wrk(1), wv1 => prec%wrk(2))
-    
+
     select case(prec%iprcparm(psb_f_type_))
-    case(psb_f_ilu_n_) 
-      
+    case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
+
       select case(trans_)
       case('N')
         call psb_spsm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,&
@@ -172,31 +172,58 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
         if(info == psb_success_) call psb_spsm(alpha,prec%av(psb_u_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
       case('T')
         call psb_spsm(zone,prec%av(psb_u_pr_),x,zzero,wv,desc_data,info,&
              & trans=trans_,scale='L',diag=prec%dv,choice=psb_none_, work=aux)
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
       case('C')
-        
+
         call psb_spsm(zone,prec%av(psb_u_pr_),x,zzero,wv,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_, work=aux)
-        
+
         call wv1%mlt(zone,prec%dv,wv,zzero,info,conjgx=trans_)
-        
+
         if(info == psb_success_)  call psb_spsm(alpha,prec%av(psb_l_pr_),wv1,&
              & beta,y,desc_data,info,&
              & trans=trans_,scale='U',choice=psb_none_,work=aux)
-        
+
+      end select
+      if (info /= psb_success_) then
+        ch_err="psb_spsm"
+        goto 9999
+      end if
+
+    case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+      ! Application of approximate inverse preconditioner, just some spmm
+
+      select case(trans_)
+      case('N')
+        call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+        if (info == psb_success_) call wv1%mlt(zone,prec%dv,wv,zzero,info)
+        if(info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1,&
+             & beta,y,desc_data,info, trans=trans_, work=aux,doswap=.false.)
+
+       case('T','C')
+         call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,wv,desc_data,info,&
+              & trans=trans_,work=aux,doswap=.false.)
+         if (info == psb_success_) call wv1%mlt(zone,prec%dv,wv,zzero,info)
+         if (info == psb_success_) &
+              & call psb_spmm(alpha,prec%av(psb_u_pr_),wv1, &
+              & beta,y,desc_data,info,trans=trans_,work=aux,doswap=.false.)
+
       end select
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         ch_err="psb_spsm"
         goto 9999
       end if
-      
+
     case default
       info = psb_err_internal_error_
       call psb_errpush(info,name,a_err='Invalid factorization')
@@ -204,12 +231,12 @@ subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
     end select
 
   end associate
-  
+
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
   if (do_alloc_wrk) call prec%free_wrk(info)
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -231,7 +258,7 @@ end subroutine psb_z_bjac_apply_vect
 subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_z_bjacprec, psb_protect_name => psb_z_bjac_apply
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_z_bjac_prec_type), intent(inout)  :: prec
   complex(psb_dpk_),intent(in)         :: alpha,beta
@@ -244,6 +271,7 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   ! Local variables
   integer(psb_ipk_) :: n_row,n_col
   complex(psb_dpk_), pointer :: ww(:), aux(:)
+  type(psb_d_vect_type) :: tx,ty
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
   integer(psb_ipk_) :: err_act, ierr(5)
@@ -273,12 +301,12 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   n_row = desc_data%get_local_rows()
   n_col = desc_data%get_local_cols()
 
-  if (size(x) < n_row) then 
+  if (size(x) < n_row) then
     info = 36; ierr(1) = 2; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
   end if
-  if (size(y) < n_row) then 
+  if (size(y) < n_row) then
     info = 36; ierr(1) = 3; ierr(2) = n_row;
     call psb_errpush(info,name,i_err=ierr)
     goto 9999
@@ -295,29 +323,29 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
   end if
 
 
-  if (n_col <= size(work)) then 
+  if (n_col <= size(work)) then
     ww => work(1:n_col)
-    if ((4*n_col+n_col) <= size(work)) then 
+    if ((4*n_col+n_col) <= size(work)) then
       aux => work(n_col+1:)
     else
       allocate(aux(4*n_col),stat=info)
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-        goto 9999      
+        goto 9999
       end if
 
     endif
   else
     allocate(ww(n_col),aux(4*n_col),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
   endif
 
 
   select case(prec%iprcparm(psb_f_type_))
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_,psb_f_ilu_k_,psb_f_ilu_t_)
 
     select case(trans_)
     case('N')
@@ -344,11 +372,42 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
            & trans=trans_,scale='U',choice=psb_none_,work=aux)
 
     end select
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       ch_err="psb_spsm"
       goto 9999
     end if
 
+  case(psb_f_ainv_,psb_f_invt_,psb_f_invk_)
+    ! Application of approximate inverse preconditioner, just some spmm
+
+    select case(trans_)
+
+      case('N')
+        call psb_spmm(zone,prec%av(psb_l_pr_),x,zzero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_u_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('T')
+        call psb_spmm(zone,prec%av(psb_u_pr_),x,zzero,ww,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * prec%dv%v%v(1:n_row)
+        if (info == psb_success_) &
+            & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+            & trans=trans_,work=aux,doswap=.false.)
+
+      case('C')
+        call psb_spmm(zone,prec%av(psb_u_pr_),x,zzero,ww,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+        ww(1:n_row) = ww(1:n_row) * conjg(prec%dv%v%v(1:n_row))
+        if (info == psb_success_) &
+             & call psb_spmm(alpha,prec%av(psb_l_pr_),ww,beta,y,desc_data,info,&
+             & trans=trans_,work=aux,doswap=.false.)
+
+    end select
+
 
   case default
     info = psb_err_internal_error_
@@ -358,8 +417,8 @@ subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
 
   call psb_halo(y,desc_data,info,data=psb_comm_mov_)
 
-  if (n_col <= size(work)) then 
-    if ((4*n_col+n_col) <= size(work)) then 
+  if (n_col <= size(work)) then
+    if ((4*n_col+n_col) <= size(work)) then
     else
       deallocate(aux)
     endif
@@ -397,11 +456,26 @@ subroutine psb_z_bjac_precinit(prec,info)
     call psb_Errpush(info,name)
     goto 9999
   end if
+  call psb_realloc(psb_rfpsz,prec%rprcparm,info)
+  if (info /= psb_success_) then
+    info = psb_err_alloc_dealloc_
+    call psb_Errpush(info,name)
+    goto 9999
+  end if
 
   prec%iprcparm(:)                = 0
   prec%iprcparm(psb_p_type_)      = psb_bjac_
   prec%iprcparm(psb_f_type_)      = psb_f_ilu_n_
   prec%iprcparm(psb_ilu_fill_in_) = 0
+  prec%iprcparm(psb_ilu_ialg_)    = psb_ilu_n_
+  prec%iprcparm(psb_ilu_scale_)   = psb_ilu_scale_none_
+  prec%iprcparm(psb_inv_fillin_)  = 0
+  prec%iprcparm(psb_ainv_alg_)    = psb_ainv_llk_
+
+
+  prec%rprcparm(:)                = 0
+  prec%rprcparm(psb_fact_eps_)    = 1E-1_psb_dpk_
+  prec%rprcparm(psb_inv_thresh_)  = 1E-1_psb_dpk_
 
 
   call psb_erractionrestore(err_act)
@@ -416,7 +490,8 @@ end subroutine psb_z_bjac_precinit
 subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
 
   use psb_base_mod
-  use psb_prec_mod, only : psb_ilu_fct
+  use psb_z_ilu_fact_mod
+  use psb_z_ainv_fact_mod
   use psb_z_bjacprec, psb_protect_name => psb_z_bjac_precbld
   Implicit None
 
@@ -428,12 +503,13 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   class(psb_z_base_vect_type), intent(in), optional  :: vmold
   class(psb_i_base_vect_type), intent(in), optional  :: imold
 
-  !     .. Local Scalars ..                                                       
-  integer(psb_ipk_) ::    i, m
+  !     .. Local Scalars ..
+  integer(psb_ipk_) ::    i, m, ialg, fill_in, iscale, inv_fill, iinvalg
   integer(psb_ipk_) ::    ierr(5)
   character ::        trans, unitd
-  type(psb_z_csr_sparse_mat), allocatable  :: lf, uf
+  type(psb_zspmat_type), allocatable :: lf, uf
   complex(psb_dpk_), allocatable :: dd(:)
+  real(psb_dpk_) :: fact_eps, inv_thresh
   integer(psb_ipk_) :: nztota,  err_act, n_row, nrow_a,n_col, nhalo
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -462,19 +538,114 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
   trans = 'N'
   unitd = 'U'
 
+  ! We check if all the information contained in the preconditioner structure
+  ! are meaningful, otherwise we give an error and get out of the build
+  ! procedure
+  ialg    = prec%iprcparm(psb_ilu_ialg_)  ! Integer for ILU type algorithm
+  iinvalg = prec%iprcparm(psb_ainv_alg_)  ! Integer for AINV type algorithm
+  iscale = prec%iprcparm(psb_ilu_scale_)  ! Integer for scaling of matrix
+  fact_eps = prec%rprcparm(psb_fact_eps_) ! Drop-tolerance for factorization
+  inv_thresh = prec%rprcparm(psb_inv_thresh_) ! Drop-tolerance for inverse
+  fill_in = prec%iprcparm(psb_ilu_fill_in_) ! Fill-In for factorization
+  inv_fill = prec%iprcparm(psb_inv_fillin_) ! Fill-In for inverse factorization
+
+  ! Check if the type of scaling is known, pay attention that not all the
+  ! scalings make sense for all the factorization, if something that does not
+  ! make sense is required the factorization routine will fail in an
+  ! unnrecoverable way.
+  if ((iscale == psb_ilu_scale_none_).or.&
+      (iscale == psb_ilu_scale_maxval_).or.&
+      (iscale == psb_ilu_scale_diag_).or.&
+      (iscale == psb_ilu_scale_arwsum_).or.&
+      (iscale == psb_ilu_scale_aclsum_).or.&
+      (iscale == psb_ilu_scale_arcsum_)) then
+  ! Do nothing: admissible request
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_ilu_scale_'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+  select case (prec%iprcparm(psb_f_type_))
+    case (psb_f_ainv_)
+      ! Check if the variant for the AINV is known to the library
+      select case (iinvalg)
+      case(psb_ainv_llk_,psb_ainv_s_llk_,psb_ainv_s_ft_llk_,psb_ainv_llk_noth_,&
+        & psb_ainv_mlk_)
+        ! Do nothing these are okay
+      case default
+        info=psb_err_from_subroutine_
+        ch_err='psb_ainv_alg_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end select ! AINV Variant
+      ! Check if the drop-tolerance make sense
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_ilu_t_)
+      if (fact_eps > 1) then
+        ! Check if the drop-tolerance make sense
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case (psb_f_invt_)
+      ! Check both tolerances
+      if (fact_eps > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_fact_eps_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+      if( inv_thresh > 1) then
+        info=psb_err_from_subroutine_
+        ch_err='psb_inv_thresh_'
+        call psb_errpush(info,name,a_err=ch_err)
+        goto 9999
+      end if
+    case default
+  end select
+
+
+  ! Checks relative to the fill-in parameters
+  if (prec%iprcparm(psb_f_type_) == psb_f_ilu_n_) then
+    if(fill_in < 0) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilu_fill_in_'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    else if (fill_in == 0) then
+      ! If the requested level of fill is equal to zero, we default to the
+      ! specialized ILU(0) routine
+      prec%iprcparm(psb_f_type_) = psb_f_ilu_n_
+    end if
+  end if
+  ! If no limit on the fill_in is required we allow every fill, this is needed
+  ! since this quantity is used to allocate the auxiliary vectors for the
+  ! factorization
+  if (inv_fill <= 0) inv_fill = m
+
+  ! Select on the type of factorization to be used
   select case(prec%iprcparm(psb_f_type_))
 
-  case(psb_f_ilu_n_) 
+  case(psb_f_ilu_n_)
+  ! ILU(0) Factorization: the total number of nonzeros of the factorized matrix
+  ! is equal to the one of the input matrix
 
-    if (allocated(prec%av)) then 
-      if (size(prec%av) < psb_bp_ilu_avsz) then 
-        do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
           call prec%av(i)%free()
         enddo
         deallocate(prec%av,stat=info)
       endif
     end if
-    if (.not.allocated(prec%av)) then 
+    if (.not.allocated(prec%av)) then
       allocate(prec%av(psb_max_avsz),stat=info)
       if (info /= psb_success_) then
         call psb_errpush(psb_err_alloc_dealloc_,name)
@@ -501,27 +672,27 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     end if
 
     allocate(dd(n_row),stat=info)
-    if (info == psb_success_) then 
+    if (info == psb_success_) then
       allocate(prec%dv, stat=info)
-      if (info == 0) then 
-        if (present(vmold)) then 
-          allocate(prec%dv%v,mold=vmold,stat=info) 
-        else       
-          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info) 
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
         end if
       end if
     end if
 
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     endif
-    ! This is where we have no renumbering, thus no need 
-    call psb_ilu_fct(a,lf,uf,dd,info)
+    ! This is where we have no renumbering, thus no need
+    call psb_ilu0_fact(ialg,a,lf,uf,dd,info)
 
     if(info == psb_success_) then
-      call prec%av(psb_l_pr_)%mv_from(lf)
-      call prec%av(psb_u_pr_)%mv_from(uf)
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
       call prec%av(psb_l_pr_)%set_asb()
       call prec%av(psb_u_pr_)%set_asb()
       call prec%av(psb_l_pr_)%trim()
@@ -530,12 +701,375 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       ! call move_alloc(dd,prec%d)
     else
       info=psb_err_from_subroutine_
-      ch_err='psb_ilu_fct'
+      ch_err='psb_ilu0_fact'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
 
-  case(psb_f_none_) 
+  case(psb_f_ilu_k_)
+  ! ILU(N) Incomplete LU-factorization with N levels of fill-in. Depending on
+  ! the type of the variant of the algorithm the may be forgotten or added to
+  ! the diagonal (MILU)
+
+  if (allocated(prec%av)) then
+    if (size(prec%av) < psb_bp_ilu_avsz) then
+      do i=1,size(prec%av)
+        call prec%av(i)%free()
+      enddo
+      deallocate(prec%av,stat=info)
+    endif
+  end if
+  if (.not.allocated(prec%av)) then
+    allocate(prec%av(psb_max_avsz),stat=info)
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_alloc_dealloc_,name)
+      goto 9999
+    end if
+  endif
+
+  nrow_a = desc_a%get_local_rows()
+  nztota = a%get_nzeros()
+
+  n_col  = desc_a%get_local_cols()
+  nhalo  = n_col-nrow_a
+  n_row  = nrow_a
+
+  allocate(lf,uf,stat=info)
+  if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+  if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+  if(info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  allocate(dd(n_row),stat=info)
+  if (info == psb_success_) then
+    allocate(prec%dv, stat=info)
+    if (info == 0) then
+      if (present(vmold)) then
+        allocate(prec%dv%v,mold=vmold,stat=info)
+      else
+        allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
+      end if
+    end if
+  end if
+
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  endif
+  ! This is where we have no renumbering, thus no need
+  call psb_iluk_fact(fill_in,ialg,a,lf,uf,dd,info)
+
+  if(info == psb_success_) then
+    call prec%av(psb_l_pr_)%mv_from(lf%a)
+    call prec%av(psb_u_pr_)%mv_from(uf%a)
+    call prec%av(psb_l_pr_)%set_asb()
+    call prec%av(psb_u_pr_)%set_asb()
+    call prec%av(psb_l_pr_)%trim()
+    call prec%av(psb_u_pr_)%trim()
+    call prec%dv%bld(dd)
+    ! call move_alloc(dd,prec%d)
+  else
+    info=psb_err_from_subroutine_
+    ch_err='psb_iluk_fact'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  case(psb_f_ilu_t_)
+  ! ILU(N,E) Incomplete LU factorization with thresholding and level of fill
+
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! This is where we have no renumbering, thus no need
+    call psb_ilut_fact(fill_in,fact_eps,a,lf,uf,dd,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_ainv_)
+    ! Approximate Inverse Factorizations based on variants of the incomplete
+    ! biconjugation algorithms
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_ainv_fact(a,iinvalg,inv_fill,inv_thresh,lf,dd,uf,desc_a,info,iscale=iscale)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invk_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(n) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invk_fact(a,fill_in, inv_fill,lf,dd,uf,desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_invt_)
+    ! Approximate Inverse Factorizations based on the sparse inversion of
+    ! triangular factors of an ILU(eps) factorization
+    if (allocated(prec%av)) then
+      if (size(prec%av) < psb_bp_ilu_avsz) then
+        do i=1,size(prec%av)
+          call prec%av(i)%free()
+        enddo
+        deallocate(prec%av,stat=info)
+      endif
+    end if
+    if (.not.allocated(prec%av)) then
+      allocate(prec%av(psb_max_avsz),stat=info)
+      if (info /= psb_success_) then
+        call psb_errpush(psb_err_alloc_dealloc_,name)
+        goto 9999
+      end if
+    endif
+
+    nrow_a = desc_a%get_local_rows()
+    nztota = a%get_nzeros()
+
+    n_col  = desc_a%get_local_cols()
+    nhalo  = n_col-nrow_a
+    n_row  = nrow_a
+
+    allocate(lf,uf,stat=info)
+    if (info == psb_success_) call lf%allocate(n_row,n_row,nztota)
+    if (info == psb_success_) call uf%allocate(n_row,n_row,nztota)
+
+    if(info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      ch_err='psb_sp_all'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+    allocate(dd(n_row),stat=info)
+    if (info == psb_success_) then
+      allocate(prec%dv, stat=info)
+      if (info == 0) then
+        if (present(vmold)) then
+          allocate(prec%dv%v,mold=vmold,stat=info)
+        else
+          allocate(psb_z_base_vect_type :: prec%dv%v,stat=info)
+        end if
+      end if
+    end if
+
+    if (info /= psb_success_) then
+      call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+      goto 9999
+    endif
+    ! Computing the factorization
+    call psb_invt_fact(a,fill_in,inv_fill,fact_eps,inv_thresh,lf,dd,uf,&
+      & desc_a,info)
+
+    if(info == psb_success_) then
+      call prec%av(psb_l_pr_)%mv_from(lf%a)
+      call prec%av(psb_u_pr_)%mv_from(uf%a)
+      call prec%av(psb_l_pr_)%set_asb()
+      call prec%av(psb_u_pr_)%set_asb()
+      call prec%av(psb_l_pr_)%trim()
+      call prec%av(psb_u_pr_)%trim()
+      call prec%dv%bld(dd)
+      ! call move_alloc(dd,prec%d)
+    else
+      info=psb_err_from_subroutine_
+      ch_err='psb_ilut_fact'
+      call psb_errpush(info,name,a_err=ch_err)
+      goto 9999
+    end if
+
+  case(psb_f_none_)
     info=psb_err_from_subroutine_
     ch_err='Inconsistent prec  psb_f_none_'
     call psb_errpush(info,name,a_err=ch_err)
@@ -548,7 +1082,7 @@ subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
     goto 9999
   end select
 
-  if (present(amold)) then 
+  if (present(amold)) then
     call prec%av(psb_l_pr_)%cscnv(info,mold=amold)
     call prec%av(psb_u_pr_)%cscnv(info,mold=amold)
   end if
@@ -567,8 +1101,8 @@ subroutine psb_z_bjac_precseti(prec,what,val,info)
   Implicit None
 
   class(psb_z_bjac_prec_type),intent(inout) :: prec
-  integer(psb_ipk_), intent(in)                      :: what 
-  integer(psb_ipk_), intent(in)                      :: val 
+  integer(psb_ipk_), intent(in)                      :: what
+  integer(psb_ipk_), intent(in)                      :: val
   integer(psb_ipk_), intent(out)                     :: info
   integer(psb_ipk_) :: err_act, nrow
   character(len=20)  :: name='z_bjac_precset'
@@ -576,34 +1110,33 @@ subroutine psb_z_bjac_precseti(prec,what,val,info)
   call psb_erractionsave(err_act)
 
   info = psb_success_
-  if (.not.allocated(prec%iprcparm)) then 
+  if (.not.allocated(prec%iprcparm)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
 
   select case(what)
-  case (psb_f_type_) 
-    if (prec%iprcparm(psb_p_type_) /= psb_bjac_) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_f_type_)
     prec%iprcparm(psb_f_type_)     = val
 
-  case (psb_ilu_fill_in_) 
-    if ((prec%iprcparm(psb_p_type_) /= psb_bjac_).or.&
-         & (prec%iprcparm(psb_f_type_) /= psb_f_ilu_n_)) then 
-      write(psb_err_unit,*) 'WHAT is invalid for current preconditioner ',&
-           & prec%iprcparm(psb_p_type_),&
-           & 'ignoring user specification'
-      return
-    endif
+  case (psb_ilu_fill_in_)
     prec%iprcparm(psb_ilu_fill_in_) = val
 
+  case (psb_ilu_ialg_)
+    prec%iprcparm(psb_ilu_ialg_) = val
+
+  case (psb_ilu_scale_)
+    prec%iprcparm(psb_ilu_scale_) = val
+
+  case (psb_ainv_alg_)
+    prec%iprcparm(psb_ainv_alg_) = val
+
+  case(psb_inv_fillin_)
+    prec%iprcparm(psb_inv_fillin_) = val
+
   case default
-    write(psb_err_unit,*) 'WHAT is invalid, ignoring user specification'
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
 
   end select
 
@@ -613,3 +1146,45 @@ subroutine psb_z_bjac_precseti(prec,what,val,info)
 9999 call psb_error_handler(err_act)
   return
 end subroutine psb_z_bjac_precseti
+
+subroutine psb_z_bjac_precsetr(prec,what,val,info)
+
+  use psb_base_mod
+  use psb_z_bjacprec, psb_protect_name => psb_z_bjac_precsetr
+  Implicit None
+
+  class(psb_z_bjac_prec_type),intent(inout) :: prec
+  integer(psb_ipk_), intent(in)                      :: what
+  real(psb_dpk_), intent(in)                       :: val
+  integer(psb_ipk_), intent(out)                     :: info
+  integer(psb_ipk_) :: err_act, nrow
+  character(len=20)  :: name='z_bjac_precset'
+
+  call psb_erractionsave(err_act)
+
+  info = psb_success_
+  if (.not.allocated(prec%rprcparm)) then
+    info = 1124
+    call psb_errpush(info,name,a_err="preconditioner")
+    goto 9999
+  end if
+
+  select case(what)
+
+  case (psb_fact_eps_)
+    prec%rprcparm(psb_fact_eps_) = val
+
+  case (psb_inv_thresh_)
+    prec%rprcparm(psb_inv_thresh_) = val
+
+  case default
+    write(psb_err_unit,'(i0," is invalid, ignoring user specification")') what
+
+  end select
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_bjac_precsetr
diff --git a/prec/impl/psb_z_ilu0_fact.f90 b/prec/impl/psb_z_ilu0_fact.f90
index 867da777..26322e95 100644
--- a/prec/impl/psb_z_ilu0_fact.f90
+++ b/prec/impl/psb_z_ilu0_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_zilu0_fact.f90
 !
 ! Subroutine: psb_zilu0_fact
@@ -75,7 +75,7 @@
 !
 !  This routine computes either the ILU(0) or the MILU(0) factorization of
 !  the diagonal blocks of a distributed matrix. These factorizations are used
-!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver, 
+!  to build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a given level of a
 !  multilevel preconditioner.
 !
@@ -83,10 +83,10 @@
 !    Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition,
 !    SIAM, 2003, Chapter 10.
 !
-!  The local matrix is stored into a and blck, as specified in the description 
-!  of the arguments below. The storage format for both the L and U factors is CSR. 
-!  The diagonal of the U factor is stored separately (actually, the inverse of the 
-!  diagonal entries is stored; this is then managed in the solve stage associated 
+!  The local matrix is stored into a and blck, as specified in the description
+!  of the arguments below. The storage format for both the L and U factors is CSR.
+!  The diagonal of the U factor is stored separately (actually, the inverse of the
+!  diagonal entries is stored; this is then managed in the solve stage associated
 !  to the ILU(0)/MILU(0) factorization).
 !
 !  The routine copies and factors "on the fly" from a and blck into l (L factor),
@@ -94,7 +94,7 @@
 !
 !  This implementation of ILU(0)/MILU(0) is faster than the implementation in
 !  psb_ziluk_fct (the latter routine performs the more general ILU(k)/MILU(k)).
-!  
+!
 !
 ! Arguments:
 !    ialg    -  integer, input.
@@ -121,7 +121,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                         
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_zspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -129,7 +129,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck is empty.
-!  
+!
 subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd)
 
   use psb_base_mod
@@ -157,30 +157,30 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(upd)) then 
-    upd_ = psb_toupper(upd) 
+  if (present(upd)) then
+    upd_ = psb_toupper(upd)
   else
     upd_ = 'F'
   end if
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -212,12 +212,12 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
     if(info.ne.0) then
@@ -226,7 +226,7 @@ subroutine psb_zilu0_fact(ialg,a,l,u,d,info,blck, upd)
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
-    deallocate(blck_) 
+    deallocate(blck_)
   endif
 
   call psb_erractionrestore(err_act)
@@ -243,9 +243,9 @@ contains
   ! Version:    complex
   ! Note: internal subroutine of psb_zilu0_fact.
   !
-  !  This routine computes either the ILU(0) or the MILU(0) factorization of the 
-  !  diagonal blocks of a distributed matrix. 
-  !  These factorizations are used to build the 'base preconditioner' 
+  !  This routine computes either the ILU(0) or the MILU(0) factorization of the
+  !  diagonal blocks of a distributed matrix.
+  !  These factorizations are used to build the 'base preconditioner'
   !  (block-Jacobi preconditioner/solver, Additive Schwarz
   !  preconditioner) corresponding to a given level of a multilevel preconditioner.
   !
@@ -257,7 +257,7 @@ contains
   !
   !  The routine copies and factors "on the fly" from the sparse matrix structures a
   !  and b into the arrays lval, uval, d (L, U without its diagonal, diagonal of U).
-  !  
+  !
   !
   ! Arguments:
   !    ialg    -  integer, input.
@@ -296,7 +296,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -   complex(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -305,18 +305,18 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output.
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output.
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_zilu0_factint(ialg,a,b,&
        & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info)
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)     :: ialg
@@ -332,7 +332,7 @@ contains
     complex(psb_dpk_) :: dia,temp
     integer(psb_ipk_), parameter :: nrb=16
     type(psb_z_coo_sparse_mat) :: trw
-    integer(psb_ipk_)   :: int_err(5) 
+    integer(psb_ipk_)   :: int_err(5)
     character(len=20)   :: name, ch_err
 
     name='psb_zilu0_factint'
@@ -346,10 +346,10 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione,ialg,izero,izero,izero/))
       goto 9999
     end select
@@ -363,7 +363,7 @@ contains
     end if
     m = ma+mb
 
-    if (psb_toupper(upd) == 'F' ) then 
+    if (psb_toupper(upd) == 'F' ) then
       lirp(1) = 1
       uirp(1) = 1
       l1      = 0
@@ -379,14 +379,14 @@ contains
         if (i <= ma) then
           !
           ! Copy the i-th local row of the matrix, stored in a,
-          ! into lval/d(i)/uval 
+          ! into lval/d(i)/uval
           !
           call ilu_copyin(i,ma,a,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
         else
           !
           ! Copy the i-th local row of the matrix, stored in b
-          ! (as (i-ma)-th row), into lval/d(i)/uval 
+          ! (as (i-ma)-th row), into lval/d(i)/uval
           !
           call ilu_copyin(i-ma,mb,b,i,ione,m,l1,lja,lval,&
                & d(i),l2,uja,uval,ktrw,trw,upd)
@@ -437,7 +437,7 @@ contains
               !
               dia = dia - temp*uval(jj)
               cycle updateloop
-              !     
+              !
             else if (j > i) then
               !
               ! search u(i,*) (i-th row of U) for a matching index j
@@ -454,23 +454,23 @@ contains
                 end if
               enddo
             end if
-            !     
-            ! If we get here we missed the cycle updateloop, which means 
+            !
+            ! If we get here we missed the cycle updateloop, which means
             ! that this entry does not match; thus we accumulate on the
             ! diagonal for MILU(0).
             !
-            if (ialg == psb_milu_n_) then 
+            if (ialg == psb_milu_n_) then
               dia = dia - temp*uval(jj)
             end if
           enddo updateloop
         enddo
-        !     
+        !
         ! Check the pivot size
-        !     
+        !
         if (abs(dia) < d_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') abs(dia)
@@ -493,7 +493,7 @@ contains
     else
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
@@ -515,7 +515,7 @@ contains
   ! Version:    complex
   ! Note: internal subroutine of psb_zilu0_fact
   !
-  !  This routine copies a row of a sparse matrix A, stored in the psb_zspmat_type 
+  !  This routine copies a row of a sparse matrix A, stored in the psb_zspmat_type
   !  data structure a, into the arrays lval and uval and into the scalar variable
   !  dia, corresponding to the lower and upper triangles of A and to the diagonal
   !  entry of the row, respectively. The entries in lval and uval are stored
@@ -529,14 +529,14 @@ contains
   !
   !  The routine is used by psb_zilu0_factint in the computation of the ILU(0)/MILU(0)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !  TODO: modify the routine to allow copying into output L and U that are
   !  already filled with indices; this would allow computing an ILU(k) pattern,
-  !  then use the ILU(0) internal for subsequent calls with the same pattern. 
+  !  then use the ILU(0) internal for subsequent calls with the same pattern.
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from  the 
+  !               The local index of the row to be extracted from  the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -570,13 +570,13 @@ contains
   !               to the CSR storage format.
   !    uval   -  complex(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
-  !               upper triangle are copied. 
+  !               upper triangle are copied.
   !    ktrw    -  integer, input/output.
   !               The index identifying the last entry taken from the
   !               staging buffer trw. See below.
   !    trw     -  type(psb_zspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -608,10 +608,10 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    if (psb_toupper(upd) == 'F') then 
+    if (psb_toupper(upd) == 'F') then
 
-      select type(aa => a%a) 
-      type is (psb_z_csr_sparse_mat) 
+      select type(aa => a%a)
+      type is (psb_z_csr_sparse_mat)
 
         !
         ! Take a fast shortcut if the matrix is stored in CSR format
@@ -633,19 +633,19 @@ contains
           end if
         enddo
 
-      class default 
+      class default
 
         !
-        ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+        ! Otherwise use psb_sp_getblk, slower but able (in principle) of
         ! handling any format. In this case, a block of rows is extracted
         ! instead of a single row, for performance reasons, and these
         ! rows are copied one by one into lval, dia, uval, through
         ! successive calls to ilu_copyin.
         !
 
-        if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+        if ((mod(i,nrb) == 1).or.(nrb == 1)) then
           irb = min(m-i+1,nrb)
-          call aa%csget(i,i+irb-1,trw,info) 
+          call aa%csget(i,i+irb-1,trw,info)
           if(info /= psb_success_) then
             info=psb_err_from_subroutine_
             ch_err='csget'
@@ -656,7 +656,7 @@ contains
         end if
 
         nz = trw%get_nzeros()
-        do 
+        do
           if (ktrw > nz) exit
           if (trw%ia(ktrw) > i) exit
           k = trw%ja(ktrw)
@@ -680,7 +680,7 @@ contains
 
       write(0,*) 'Update not implemented '
       info = 31
-      call psb_errpush(info,name,& 
+      call psb_errpush(info,name,&
            & i_err=(/ione*13,izero,izero,izero,izero/),a_err=upd)
       goto 9999
 
diff --git a/prec/impl/psb_z_iluk_fact.f90 b/prec/impl/psb_z_iluk_fact.f90
index fe9e92d9..1a398cda 100644
--- a/prec/impl/psb_z_iluk_fact.f90
+++ b/prec/impl/psb_z_iluk_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_ziluk_fact.f90
 !
 ! Subroutine: psb_ziluk_fact
@@ -74,7 +74,7 @@
 ! Contains:   psb_ziluk_factint, iluk_copyin, iluk_fact, iluk_copyout.
 !
 !  This routine computes either the ILU(k) or the MILU(k) factorization of the
-!  diagonal blocks of a distributed matrix. These factorizations are used to 
+!  diagonal blocks of a distributed matrix. These factorizations are used to
 !  build the 'base preconditioner' (block-Jacobi preconditioner/solver,
 !  Additive Schwarz preconditioner) corresponding to a certain level of a
 !  multilevel preconditioner.
@@ -88,7 +88,7 @@
 !  U factors is CSR. The diagonal of the U factor is stored separately (actually,
 !  the inverse of the diagonal entries is stored; this is then managed in the solve
 !  stage associated to the ILU(k)/MILU(k) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -118,7 +118,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_zspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -126,7 +126,7 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   use psb_base_mod
@@ -143,7 +143,7 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   complex(psb_dpk_), intent(inout)    ::  d(:)
   !     Local Variables
   integer(psb_ipk_)   :: l1, l2, m, err_act
-  
+
   type(psb_zspmat_type), pointer  :: blck_
   type(psb_z_csr_sparse_mat)      :: ll, uu
   character(len=20)   :: name, ch_err
@@ -152,17 +152,17 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR') 
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
@@ -170,7 +170,7 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck)
 
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -203,15 +203,15 @@ subroutine psb_ziluk_fact(fill_in,ialg,a,l,u,d,info,blck)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -280,7 +280,7 @@ contains
   !               according to the CSR storage format.
   !    lia2    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in laspk, according to the CSR storage format. 
+  !               of the L factor in laspk, according to the CSR storage format.
   !    uval   -   complex(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -289,12 +289,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in laspk.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_ziluk_factint(fill_in,ialg,a,b,&
@@ -304,7 +304,7 @@ contains
 
     implicit none
 
-  ! Arguments 
+  ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in, ialg
     type(psb_zspmat_type),intent(in)              :: a,b
     integer(psb_ipk_),intent(inout)               :: l1,l2,info
@@ -330,14 +330,14 @@ contains
 
     select case(ialg)
     case(psb_ilu_n_,psb_milu_n_)
-      ! Ok 
+      ! Ok
     case default
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name,&
            & i_err=(/itwo,ialg,izero,izero,izero/))
       goto 9999
     end select
-    if (fill_in < 0) then 
+    if (fill_in < 0) then
       info=psb_err_input_asize_invalid_i_
       call psb_errpush(info,name, &
            & i_err=(/ione,fill_in,izero,izero,izero/))
@@ -349,7 +349,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the iluk_copyin function 
+    ! Allocate a temporary buffer for the iluk_copyin function
     !
 
     call trw%allocate(izero,izero,ione)
@@ -361,7 +361,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -381,34 +381,34 @@ contains
     uplevs(:)  = m+1
     row(:)     = zzero
     rowlevs(:) = -(m+1)
-    
+
     !
     ! Cycle over the matrix rows
     !
     do i = 1, m
-      
+
       !
       ! At each iteration of the loop we keep in a heap the column indices
       ! affected by the factorization. The heap is initialized and filled
       ! in the iluk_copyin routine, and updated during the elimination, in
       ! the iluk_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = zzero
       if (i<=ma) then
         !
-        ! Copy into trw the i-th local row of the matrix, stored in a 
-        ! 
+        ! Copy into trw the i-th local row of the matrix, stored in a
+        !
         call iluk_copyin(i,ma,a,ione,m,row,rowlevs,heap,ktrw,trw,info)
       else
         !
         ! Copy into trw the i-th local row of the matrix, stored in b
-        ! (as (i-ma)-th row) 
-        ! 
+        ! (as (i-ma)-th row)
+        !
         call iluk_copyin(i-ma,mb,b,ione,m,row,rowlevs,heap,ktrw,trw,info)
       endif
-      
+
       ! Do an elimination step on the current row. It turns out we only
       ! need to keep track of fill levels for the upper triangle, hence we
       ! do not have a lowlevs variable.
@@ -417,7 +417,7 @@ contains
            & d,uja,uirp,uval,uplevs,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,uplevs,info)
       if (info /= psb_success_) then
@@ -474,11 +474,11 @@ contains
   !
   !  This routine is used by psb_ziluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -510,7 +510,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_zspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -521,8 +521,8 @@ contains
     use psb_base_mod
 
     implicit none
-  
-  ! Arguments 
+
+  ! Arguments
     type(psb_zspmat_type), intent(in)         :: a
     type(psb_z_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)           :: i,m,jmin,jmax
@@ -542,17 +542,17 @@ contains
     if (psb_errstatus_fatal()) then
       info = psb_err_internal_error_; goto 9999
     end if
-    call heap%init(info) 
+    call heap%init(info)
 
-    select type (aa=> a%a) 
-    type is (psb_z_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_z_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
-      
+
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = aa%val(j)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -562,14 +562,14 @@ contains
     class default
 
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to iluk_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -581,11 +581,11 @@ contains
         ktrw=1
       end if
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)
           rowlevs(k) = 0
           call heap%insert(k,info)
@@ -674,10 +674,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i, fill_in
     integer(psb_ipk_), intent(inout)              :: nidx,info
     integer(psb_ipk_), intent(inout)              :: rowlevs(:)
@@ -690,7 +690,7 @@ contains
     complex(psb_dpk_)   :: rwk
 
     info = psb_success_
-    if (.not.allocated(idxs)) then 
+    if (.not.allocated(idxs)) then
       allocate(idxs(200),stat=info)
       if (info /= psb_success_) return
     endif
@@ -702,34 +702,34 @@ contains
     !
     do
       ! Beware: (iret < 0) means that the heap is empty, not an error.
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) return
 
-      ! 
+      !
       ! Just in case an index has been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       nidx = nidx + 1
-      if (nidx>size(idxs)) then 
+      if (nidx>size(idxs)) then
         call psb_realloc(nidx+psb_heap_resize,idxs,info)
         if (info /= psb_success_) return
       end if
       idxs(nidx) = k
-      
-      if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then 
+
+      if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
         !
         ! Note: since U is scaled while copying it out (see iluk_copyout),
         ! we can use rwk in the update below
-        ! 
+        !
         rwk    = row(k)
-        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)          
+        row(k) = row(k) * d(k)    ! d(k) == 1/a(k,k)
         lrwk   = rowlevs(k)
-          
+
         do jj=uirp(k),uirp(k+1)-1
           j = uja(jj)
-          if (j<=k) then 
+          if (j<=k) then
             info = -i
             return
           endif
@@ -738,9 +738,9 @@ contains
           ! The fill levels are initialized to a negative value. If we find
           ! one, it means that it is an as yet untouched index, so we need
           ! to insert it; otherwise it is already on the heap, there is no
-          ! need to insert it more than once. 
+          ! need to insert it more than once.
           !
-          if (rowlevs(j)<0) then 
+          if (rowlevs(j)<0) then
             call heap%insert(j,info)
             if (info /= psb_success_) return
             rowlevs(j) = abs(rowlevs(j))
@@ -781,7 +781,7 @@ contains
   !
   !  This routine is used by psb_ziluk_factint in the computation of the
   !  ILU(k)/MILU(k) factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -822,14 +822,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_ziluk_factint), according to the CSR storage format.
   !    lval   -  complex(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  complex(psb_dpk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_ziluk_factint), according
@@ -850,7 +850,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                  :: fill_in, ialg, i, m, nidx
@@ -880,17 +880,17 @@ contains
       if (j<i) then
         !
         ! Copy the lower part of the row
-        !  
-        if (rowlevs(j) <= fill_in) then 
-          l1     = l1 + 1 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l1     = l1 + 1
           if (size(lval) < l1) then
-            ! 
+            !
             ! Figure out a good reallocation size!
-            ! 
+            !
             isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-            call psb_realloc(isz,lval,info) 
-            if (info == psb_success_) call psb_realloc(isz,lja,info) 
-            if (info /= psb_success_) then 
+            call psb_realloc(isz,lval,info)
+            if (info == psb_success_) call psb_realloc(isz,lja,info)
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -914,26 +914,26 @@ contains
         !
         ! Copy the diagonal entry of the row and re-initialize
         ! row(j) and rowlevs(j)
-        !  
-        d(i)       = d(i) + row(i) 
+        !
+        d(i)       = d(i) + row(i)
         row(i)     = zzero
         rowlevs(i) = -(m+1)
 
-      else if (j>i) then 
+      else if (j>i) then
         !
         ! Copy the upper part of the row
-        ! 
-        if (rowlevs(j) <= fill_in) then 
-          l2     = l2 + 1 
-          if (size(uval) < l2) then 
-            ! 
+        !
+        if (rowlevs(j) <= fill_in) then
+          l2     = l2 + 1
+          if (size(uval) < l2) then
+            !
             ! Figure out a good reallocation size!
             !
             isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-            call psb_realloc(isz,uval,info) 
-            if (info == psb_success_) call psb_realloc(isz,uja,info) 
+            call psb_realloc(isz,uval,info)
+            if (info == psb_success_) call psb_realloc(isz,uja,info)
             if (info == psb_success_) call psb_realloc(isz,uplevs,info,pad=(m+1))
-            if (info /= psb_success_) then 
+            if (info /= psb_success_) then
               info=psb_err_from_subroutine_
               call psb_errpush(info,name,a_err='Allocate')
               goto 9999
@@ -941,11 +941,11 @@ contains
           end if
           uja(l2)   = j
           uval(l2)  = row(j)
-          uplevs(l2) = rowlevs(j) 
+          uplevs(l2) = rowlevs(j)
         else if (ialg == psb_milu_n_) then
           !
           ! MILU(k): add discarded entries to the diagonal one
-          ! 
+          !
           d(i) = d(i) + row(j)
         end if
         !
@@ -963,13 +963,13 @@ contains
     lirp(i+1) = l1 + 1
     uirp(i+1) = l2 + 1
 
-    !     
+    !
     ! Check the pivot size
     !
     if (abs(d(i)) < d_epstol) then
       !
       ! Too small pivot: unstable factorization
-      !     
+      !
       info = psb_err_pivot_too_small_
       int_err(1) = i
       write(ch_err,'(g20.10)') d(i)
diff --git a/prec/impl/psb_z_ilut_fact.f90 b/prec/impl/psb_z_ilut_fact.f90
index b7e8da05..291dc778 100644
--- a/prec/impl/psb_z_ilut_fact.f90
+++ b/prec/impl/psb_z_ilut_fact.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,21 +27,21 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
+!
 !    Moved here from MLD2P4, original copyright below.
-!  
-!  
-!   
+!
+!
+!
 !                             MLD2P4  version 2.2
 !    MultiLevel Domain Decomposition Parallel Preconditioners Package
 !               based on PSBLAS (Parallel Sparse BLAS version 3.5)
-!    
-!    (C) Copyright 2008-2018 
-!  
-!        Salvatore Filippone  
-!        Pasqua D'Ambra   
-!        Daniela di Serafino   
-!   
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -53,7 +53,7 @@
 !      3. The name of the MLD2P4 group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -65,8 +65,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!  
+!
+!
 ! File: psb_zilut_fact.f90
 !
 ! Subroutine: psb_zilut_fact
@@ -87,7 +87,7 @@
 !  CSR. The diagonal of the U factor is stored separately (actually, the
 !  inverse of the diagonal entries is stored; this is then managed in the
 !  solve stage associated to the ILU(k,t) factorization).
-!  
+!
 !
 ! Arguments:
 !    fill_in -  integer, input.
@@ -114,7 +114,7 @@
 !               factorization.
 !               Note: its allocation is managed by the calling routine psb_ilu_bld,
 !               hence it cannot be only intent(out).
-!    info    -  integer, output.                    
+!    info    -  integer, output.
 !               Error code.
 !    blck    -  type(psb_zspmat_type), input, optional, target.
 !               The sparse matrix structure containing the remote rows of the
@@ -122,9 +122,9 @@
 !               to build an Additive Schwarz base preconditioner with overlap
 !               greater than 0. If the overlap is 0 or the matrix has been reordered
 !               (see psb_fact_bld), then blck does not contain any row.
-!  
+!
 subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
-  
+
   use psb_base_mod
   use psb_z_ilu_fact_mod, psb_protect_name => psb_zilut_fact
 
@@ -141,7 +141,7 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   integer(psb_ipk_), intent(in), optional       :: iscale
   !     Local Variables
   integer(psb_ipk_)   ::  l1, l2, m, err_act, iscale_
-  
+
   type(psb_zspmat_type), pointer  :: blck_
   type(psb_z_csr_sparse_mat)       :: ll, uu
   real(psb_dpk_)      :: scale
@@ -151,34 +151,34 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   info = psb_success_
   call psb_erractionsave(err_act)
 
-  if (fill_in < 0) then 
+  if (fill_in < 0) then
     info=psb_err_input_asize_invalid_i_
     call psb_errpush(info,name, &
          & i_err=(/ione,fill_in,izero,izero,izero/))
     goto 9999
   end if
-  ! 
+  !
   ! Point to / allocate memory for the incomplete factorization
   !
-  if (present(blck)) then 
+  if (present(blck)) then
     blck_ => blck
   else
-    allocate(blck_,stat=info) 
-    if (info == psb_success_) call blck_%csall(izero,izero,info,ione) 
+    allocate(blck_,stat=info)
+    if (info == psb_success_) call blck_%allocate(izero,izero,info,ione,type='CSR')
     if (info /= psb_success_) then
       info=psb_err_from_subroutine_
-      ch_err='csall'
+      ch_err='allocate'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
     end if
   endif
-  if (present(iscale)) then 
+  if (present(iscale)) then
     iscale_ = iscale
   else
     iscale_ = psb_ilu_scale_none_
   end if
-  
-  select case(iscale_) 
+
+  select case(iscale_)
   case(psb_ilu_scale_none_)
     scale = sone
   case(psb_ilu_scale_maxval_)
@@ -189,10 +189,10 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
     call psb_errpush(info,name,i_err=(/ione*9,iscale_,izero,izero,izero/))
     goto 9999
   end select
-  
+
   m = a%get_nrows() + blck_%get_nrows()
   if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.&
-       & (m > size(d))    ) then 
+       & (m > size(d))    ) then
     write(0,*) 'Wrong allocation status for L,D,U? ',&
          & l%get_nrows(),size(d),u%get_nrows()
     info = -1
@@ -225,15 +225,15 @@ subroutine psb_zilut_fact(fill_in,thres,a,l,u,d,info,blck,iscale)
   call u%set_triangle()
   call u%set_unit()
   call u%set_upper()
-  
+
   !
   ! Nullify pointer / deallocate memory
   !
-  if (present(blck)) then 
-    blck_ => null() 
+  if (present(blck)) then
+    blck_ => null()
   else
     call blck_%free()
-    deallocate(blck_,stat=info) 
+    deallocate(blck_,stat=info)
     if(info.ne.0) then
       info=psb_err_from_subroutine_
       ch_err='psb_sp_free'
@@ -298,7 +298,7 @@ contains
   !               according to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor in lval, according to the CSR storage format. 
+  !               of the L factor in lval, according to the CSR storage format.
   !    uval   -  complex(psb_dpk_), dimension(:), input/output.
   !               The U factor in the incomplete factorization.
   !               The entries of U are stored according to the CSR format.
@@ -307,12 +307,12 @@ contains
   !               according to the CSR storage format.
   !    uirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the U factor in uval, according to the CSR storage format. 
+  !               of the U factor in uval, according to the CSR storage format.
   !    l1      -  integer, output
   !               The number of nonzero entries in lval.
   !    l2      -  integer, output
   !               The number of nonzero entries in uval.
-  !    info    -  integer, output.           
+  !    info    -  integer, output.
   !               Error code.
   !
   subroutine psb_zilut_factint(fill_in,thres,a,b,&
@@ -320,7 +320,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
     integer(psb_ipk_), intent(in)                 :: fill_in
@@ -355,7 +355,7 @@ contains
     m  = ma+mb
 
     !
-    ! Allocate a temporary buffer for the ilut_copyin function 
+    ! Allocate a temporary buffer for the ilut_copyin function
     !
     call trw%allocate(izero,izero,ione)
     if (info == psb_success_) call psb_ensure_size(m+1,lirp,info)
@@ -366,7 +366,7 @@ contains
       call psb_errpush(info,name,a_err='psb_sp_all')
       goto 9999
     end if
-    
+
     l1=0
     l2=0
     lirp(1) = 1
@@ -396,10 +396,10 @@ contains
       ! in the ilut_copyin function, and updated during the elimination, in
       ! the ilut_fact routine. The heap is ideal because at each step we need
       ! the lowest index, but we also need to insert new items, and the heap
-      ! allows to do both in log time. 
+      ! allows to do both in log time.
       !
       d(i) = czero
-      if (i<=ma) then 
+      if (i<=ma) then
         call ilut_copyin(i,ma,a,i,ione,m,nlw,nup,jmaxup,nrmi,weight,&
              & row,heap,ktrw,trw,info)
       else
@@ -414,7 +414,7 @@ contains
            & d,uja,uirp,uval,nidx,idxs,info)
       !
       ! Copy the row into lval/d(i)/uval
-      ! 
+      !
       if (info == psb_success_) call ilut_copyout(fill_in,thres,i,m,&
            & nlw,nup,jmaxup,nrmi,row,nidx,idxs,&
            & l1,l2,lja,lirp,lval,d,uja,uirp,uval,info)
@@ -429,7 +429,7 @@ contains
     !
     ! Adjust diagonal accounting for scale factor
     !
-    if (weight /= sone) then 
+    if (weight /= sone) then
       d(1:m) = d(1:m)*weight
     end if
 
@@ -469,7 +469,7 @@ contains
   !  - storing into a heap the column indices of the nonzero entries of the copied
   !    row;
   !  - computing the column index of the first entry with maximum absolute value
-  !    in the part of the row belonging to the upper triangle;            
+  !    in the part of the row belonging to the upper triangle;
   !  - computing the 2-norm of the row.
   !  The output array row is such that it contains a full row of A, i.e. it contains
   !  also the zero entries of the row. This is useful for the elimination step
@@ -482,11 +482,11 @@ contains
   !
   !  This routine is used by psb_zilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    i       -  integer, input.
-  !               The local index of the row to be extracted from the 
+  !               The local index of the row to be extracted from the
   !               sparse matrix structure a.
   !    m       -  integer, input.
   !               The number of rows of the local matrix stored into a.
@@ -533,7 +533,7 @@ contains
   !               staging buffer trw. See below.
   !    trw     -  type(psb_zspmat_type), input/output.
   !               A staging buffer. If the matrix A is not in CSR format, we use
-  !               the psb_sp_getblk routine and store its output in trw; when we 
+  !               the psb_sp_getblk routine and store its output in trw; when we
   !               need to call psb_sp_getblk we do it for a block of rows, and then
   !               we consume them from trw in successive calls to this routine,
   !               until we empty the buffer. Thus we will make a call to psb_sp_getblk
@@ -542,7 +542,7 @@ contains
   subroutine ilut_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,&
        & nrmi,weight,row,heap,ktrw,trw,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_zspmat_type), intent(in)         :: a
     type(psb_z_coo_sparse_mat), intent(inout) :: trw
     integer(psb_ipk_), intent(in)               :: i, m,jmin,jmax,jd
@@ -584,29 +584,29 @@ contains
     dmaxup = szero
     nrmi   = szero
 
-    select type (aa=> a%a) 
-    type is (psb_z_csr_sparse_mat) 
+    select type (aa=> a%a)
+    type is (psb_z_csr_sparse_mat)
       !
       ! Take a fast shortcut if the matrix is stored in CSR format
       !
 
       do j = aa%irp(i), aa%irp(i+1) - 1
         k          = aa%ja(j)
-        if ((jmin<=k).and.(k<=jmax)) then 
-          row(k)     = aa%val(j)*weight 
+        if ((jmin<=k).and.(k<=jmax)) then
+          row(k)     = aa%val(j)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
           end if
         end if
       end do
-      if (info /= psb_success_) then 
+      if (info /= psb_success_) then
         info=psb_err_from_subroutine_
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
@@ -617,16 +617,16 @@ contains
 
 
     class default
-      
+
       !
-      ! Otherwise use psb_sp_getblk, slower but able (in principle) of 
+      ! Otherwise use psb_sp_getblk, slower but able (in principle) of
       ! handling any format. In this case, a block of rows is extracted
       ! instead of a single row, for performance reasons, and these
       ! rows are copied one by one into the array row, through successive
       ! calls to ilut_copyin.
       !
 
-      if ((mod(i,nrb) == 1).or.(nrb == 1)) then 
+      if ((mod(i,nrb) == 1).or.(nrb == 1)) then
         irb = min(m-i+1,nrb)
         call aa%csget(i,i+irb-1,trw,info)
         if (info /= psb_success_) then
@@ -639,18 +639,18 @@ contains
 
       kin = ktrw
       nz = trw%get_nzeros()
-      do 
+      do
         if (ktrw > nz) exit
         if (trw%ia(ktrw) > i) exit
         k          = trw%ja(ktrw)
-        if ((jmin<=k).and.(k<=jmax)) then 
+        if ((jmin<=k).and.(k<=jmax)) then
           row(k)     = trw%val(ktrw)*weight
           call heap%insert(k,info)
           if (info /= psb_success_) exit
-          if (k<jd) nlw = nlw + 1 
-          if (k>jd) then 
+          if (k<jd) nlw = nlw + 1
+          if (k>jd) then
             nup = nup + 1
-            if (abs(row(k))>dmaxup) then 
+            if (abs(row(k))>dmaxup) then
               jmaxup = k
               dmaxup = abs(row(k))
             end if
@@ -734,10 +734,10 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
   ! Arguments
-    type(psb_i_heap), intent(inout)               :: heap 
+    type(psb_i_heap), intent(inout)               :: heap
     integer(psb_ipk_), intent(in)                 :: i
     integer(psb_ipk_), intent(inout)              :: nidx,info
     real(psb_dpk_), intent(in)                     :: thres,nrmi
@@ -753,23 +753,23 @@ contains
     call psb_ensure_size(200*ione,idxs,info)
     if (info /= psb_success_) return
     nidx  = 0
-    lastk = -1 
+    lastk = -1
     !
     ! Do while there are indices to be processed
     !
     do
 
-      call heap%get_first(k,iret) 
+      call heap%get_first(k,iret)
       if (iret < 0) exit
 
-      ! 
+      !
       ! An index may have been put on the heap more than once.
       !
       if (k == lastk) cycle
 
-      lastk = k 
+      lastk = k
       lowert: if (k<i)  then
- 
+
         !
         ! Dropping rule based on the threshold: compare the absolute
         ! value of each updated entry of row with thres * 2-norm of row.
@@ -777,7 +777,7 @@ contains
         rwk    = row(k)
         row(k) = row(k) * d(k)
         if (abs(row(k)) < thres*nrmi) then
-          ! 
+          !
           ! Drop the entry.
           !
           row(k) = czero
@@ -786,11 +786,11 @@ contains
           !
           ! Note: since U is scaled while copying it out (see ilut_copyout),
           ! we can use rwk in the update below.
-          !           
+          !
           do jj=uirp(k),uirp(k+1)-1
             j = uja(jj)
-            if (j<=k) then 
-              info = -i 
+            if (j<=k) then
+              info = -i
               return
             endif
             !
@@ -799,7 +799,7 @@ contains
             !
             row(j)     = row(j) - rwk * uval(jj)
             if (abs(row(j)) < thres*nrmi) then
-              ! 
+              !
               ! Drop the entry.
               !
               row(j) = czero
@@ -818,7 +818,7 @@ contains
       ! If we get here it is an index we need to keep on copyout.
       !
       nidx       = nidx + 1
-      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)      
+      call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
       if (info /= psb_success_) return
       idxs(nidx) = k
 
@@ -852,7 +852,7 @@ contains
   !
   !  This routine is used by psb_zilut_factint in the computation of the ILU(k,t)
   !  factorization of a local sparse matrix.
-  !  
+  !
   !
   ! Arguments:
   !    fill_in -  integer, input.
@@ -895,14 +895,14 @@ contains
   !               to the CSR storage format.
   !    lirp    -  integer, dimension(:), input/output.
   !               The indices identifying the first nonzero entry of each row
-  !               of the L factor, copied in lval row by row (see 
+  !               of the L factor, copied in lval row by row (see
   !               psb_zilut_factint), according to the CSR storage format.
   !    lval   -  complex(psb_dpk_), dimension(:), input/output.
   !               The array where the entries of the row corresponding to the
   !               L factor are copied.
   !    d       -  complex(psb_dpk_), dimension(:), input/output.
   !               The array where the inverse of the diagonal entry of the
-  !               row is copied (only d(i) is used by the routine). 
+  !               row is copied (only d(i) is used by the routine).
   !    uja    -  integer, dimension(:), input/output.
   !               The column indices of the nonzero entries of the U factor
   !               copied in uval row by row (see psb_zilut_factint), according
@@ -921,7 +921,7 @@ contains
 
     use psb_base_mod
 
-    implicit none 
+    implicit none
 
     ! Arguments
     integer(psb_ipk_), intent(in)                       :: fill_in,i,m,nidx,nlw,nup,jmaxup
@@ -950,22 +950,22 @@ contains
     end if
 
     !
-    ! Here we need to apply also the dropping rule base on the fill-in. 
+    ! Here we need to apply also the dropping rule base on the fill-in.
     ! We do it by putting into a heap the elements that are not dropped
-    ! by using the 2-norm rule, and then copying them out. 
+    ! by using the 2-norm rule, and then copying them out.
     !
     ! The heap goes down on the entry absolute value, so the first item
-    ! is the largest absolute value. 
+    ! is the largest absolute value.
     !
 
     call heap%init(info,dir=psb_asort_down_)
 
     if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info=psb_err_alloc_request_
       call psb_errpush(info,name,i_err=(/3*nidx,izero,izero,izero,izero/),&
            & a_err='complex(psb_dpk_)')
-      goto 9999      
+      goto 9999
     end if
 
     !
@@ -976,7 +976,7 @@ contains
     idxp = 0
 
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       if (idxs(idxp) >= i) exit
@@ -987,8 +987,8 @@ contains
       !
       if (abs(witem) < thres*nrmi) cycle
 
-      nz       = nz + 1 
-      xw(nz)   = witem 
+      nz       = nz + 1
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1000,9 +1000,9 @@ contains
 
     !
     ! Now we have to take out the first nlw+fill_in entries
-    ! 
+    !
     if (nz <= nlw+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw, and it is already ordered
       !
     else
@@ -1014,7 +1014,7 @@ contains
           call psb_errpush(info,name,a_err='psb_heap_get_first')
           goto 9999
         end if
-      
+
         xw(k)   = witem
         xwid(k) = widx
       end do
@@ -1029,15 +1029,15 @@ contains
     ! Copy out the lower part of the row
     !
     do k=1,nz
-      l1     = l1 + 1 
+      l1     = l1 + 1
       if (size(lval) < l1) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = (max((l1/i)*m,int(1.2*l1),l1+100))
-        call psb_realloc(isz,lval,info) 
-        if (info == psb_success_) call psb_realloc(isz,lja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,lval,info)
+        if (info == psb_success_) call psb_realloc(isz,lja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
@@ -1046,25 +1046,25 @@ contains
       lja(l1)   = xwid(k)
       lval(l1)  = xw(indx(k))
     end do
-    
+
     !
     ! Make sure idxp points to the diagonal entry
     !
-    if (idxp <= size(idxs)) then 
-      if (idxs(idxp) < i) then 
-        do 
+    if (idxp <= size(idxs)) then
+      if (idxs(idxp) < i) then
+        do
           idxp = idxp + 1
           if (idxp > nidx) exit
           if (idxs(idxp) >= i) exit
         end do
       end if
     end if
-    if (idxp > size(idxs)) then 
+    if (idxp > size(idxs)) then
 !!$      write(0,*) 'Warning: missing diagonal element in the row '
     else
-      if (idxs(idxp) > i) then 
+      if (idxs(idxp) > i) then
 !!$        write(0,*) 'Warning: missing diagonal element in the row '
-      else if (idxs(idxp) /= i) then 
+      else if (idxs(idxp) /= i) then
 !!$        write(0,*) 'Warning: impossible error: diagonal has vanished'
       else
         !
@@ -1076,7 +1076,7 @@ contains
         if (abs(d(i)) < d_epstol) then
           !
           ! Too small pivot: unstable factorization
-          !     
+          !
           info = psb_err_pivot_too_small_
           int_err(1) = i
           write(ch_err,'(g20.10)') d(i)
@@ -1092,7 +1092,7 @@ contains
     end if
 
     !
-    ! Now the upper part 
+    ! Now the upper part
     !
 
     call heap%init(info,dir=psb_asort_down_)
@@ -1104,15 +1104,15 @@ contains
 
     nz       = 0
     do
- 
+
       idxp = idxp + 1
       if (idxp > nidx) exit
       widx      = idxs(idxp)
-      if (widx <= i) then 
+      if (widx <= i) then
 !!$        write(0,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
         cycle
       end if
-      if (widx > m) then 
+      if (widx > m) then
 !!$        write(0,*) 'Warning: impossible value',widx,i,idxp,idxs(idxp)
         cycle
       end if
@@ -1120,12 +1120,12 @@ contains
       !
       ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
       !
-      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then 
-        cycle 
+      if ((widx /= jmaxup) .and. (abs(witem) < thres*nrmi)) then
+        cycle
       end if
 
       nz       = nz + 1
-      xw(nz)   = witem 
+      xw(nz)   = witem
       xwid(nz) = widx
       call heap%insert(witem,widx,info)
       if (info /= psb_success_) then
@@ -1133,7 +1133,7 @@ contains
         call psb_errpush(info,name,a_err='psb_insert_heap')
         goto 9999
       end if
-      
+
     end do
 
     !
@@ -1141,7 +1141,7 @@ contains
     ! we include entry jmaxup.
     !
     if (nz <= nup+fill_in) then
-      ! 
+      !
       ! Just copy everything from xw
       !
       fndmaxup=.true.
@@ -1155,13 +1155,13 @@ contains
         if (widx == jmaxup) fndmaxup=.true.
       end do
     end if
-    if ((i<jmaxup).and.(jmaxup<=m)) then 
-      if (.not.fndmaxup) then 
-        ! 
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
         ! Include entry jmaxup, if it is not already there.
-        ! Put it in the place of the smallest coefficient. 
+        ! Put it in the place of the smallest coefficient.
         !
-        xw(nz)   = row(jmaxup) 
+        xw(nz)   = row(jmaxup)
         xwid(nz) = jmaxup
       endif
     end if
@@ -1175,15 +1175,15 @@ contains
     ! Copy out the upper part of the row
     !
     do k=1,nz
-      l2     = l2 + 1 
+      l2     = l2 + 1
       if (size(uval) < l2) then
-        ! 
+        !
         ! Figure out a good reallocation size!
-        ! 
+        !
         isz  = max((l2/i)*m,int(1.2*l2),l2+100)
-        call psb_realloc(isz,uval,info) 
-        if (info == psb_success_) call psb_realloc(isz,uja,info) 
-        if (info /= psb_success_) then 
+        call psb_realloc(isz,uval,info)
+        if (info == psb_success_) call psb_realloc(isz,uja,info)
+        if (info /= psb_success_) then
           info=psb_err_from_subroutine_
           call psb_errpush(info,name,a_err='Allocate')
           goto 9999
diff --git a/prec/impl/psb_z_invk_fact.f90 b/prec/impl/psb_z_invk_fact.f90
new file mode 100644
index 00000000..99489acd
--- /dev/null
+++ b/prec/impl/psb_z_invk_fact.f90
@@ -0,0 +1,495 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_z_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_z_invk_fact_mod, psb_protect_name =>  psb_z_invk_bld
+  use psb_z_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_zspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fill1, fill2
+  type(psb_zspmat_type), intent(inout)        :: lmat, umat
+  complex(psb_dpk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_zspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_)      :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_zspmat_type)  :: atmp
+  complex(psb_dpk_), allocatable :: pq(:), pd(:)
+  integer(psb_ipk_), allocatable :: uplevs(:)
+  integer(psb_ipk_)   :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  character(len=20)   :: name, ch_err
+
+
+  info = psb_success_
+  name='psb_zinvk_bld'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+
+  call psb_iluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+       !,uplevs=uplevs)
+  !call psb_ziluk_fact(fill1,psb_ilu_n_,a,lmat,umat,pd,info,blck=blck)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+
+  !
+  ! Compute the aprox U^-1  and L^-1
+  !
+  call psb_sparse_invk(n_row,umat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_sparse_invk(n_row,lmat,atmp,fill2,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invk_bld
+
+subroutine psb_zsparse_invk(n,a,z,fill_in,info,inlevs)
+
+  use psb_base_mod
+  use psb_z_invk_fact_mod, psb_protect_name => psb_zsparse_invk
+
+  integer(psb_ipk_), intent(in)           :: n
+  type(psb_zspmat_type), intent(in)       :: a
+  type(psb_zspmat_type), intent(inout)    :: z
+  integer(psb_ipk_), intent(in)           :: fill_in
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: inlevs(:)
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), iz(:), jz(:)
+  complex(psb_dpk_), allocatable :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable        :: uplevs(:), rowlevs(:), idxs(:)
+  complex(psb_dpk_), allocatable :: row(:)
+  type(psb_z_coo_sparse_mat)  :: trw
+  type(psb_z_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx
+  type(psb_i_heap)            :: heap
+
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_sp_invk'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  allocate(uplevs(acsr%get_nzeros()),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='Allocate')
+    goto 9999
+  end if
+  uplevs(:)  = 0
+  row(:)     = zzero
+  rowlevs(:) = -(n+1)
+
+  call zcsr%allocate(n,n,n*fill_in)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  call psb_ensure_size(n+1, idxs,  info)
+
+
+  !
+  !
+  zcsr%irp(1)  = 1
+  nzz          = 0
+
+  l2 = 0
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_invk_copyin(i,n,acsr,ione,n,row,rowlevs,heap,ktrw,trw,info,&
+         & sign=-done,inlevs=inlevs)
+    row(i)     = zone
+    rowlevs(i) = 0
+
+    ! Update loop
+    call psb_invk_inv(fill_in,i,row,rowlevs,heap,&
+         & acsr%ja,acsr%irp,acsr%val,uplevs,nidx,idxs,info)
+
+    call psb_invk_copyout(fill_in,i,n,row,rowlevs,nidx,idxs,&
+         & l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = zone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+  call zcsr%set_sorted()
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_zsparse_invk
+
+subroutine psb_z_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,ktrw,trw,info,sign,inlevs)
+
+  use psb_base_mod
+  use psb_z_invk_fact_mod, psb_protect_name => psb_z_invk_copyin
+
+  implicit none
+
+  ! Arguments
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+  integer(psb_ipk_), intent(inout)          :: ktrw,info
+  integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+  complex(psb_dpk_), intent(inout)             :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_dpk_), optional, intent(in)      :: sign
+  integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+
+  ! Local variables
+  integer(psb_ipk_)             :: k,j,irb,err_act, nz
+  integer(psb_ipk_), parameter  :: nrb=16
+  real(psb_dpk_)                :: sign_
+  character(len=20), parameter  :: name='invk_copyin'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+
+  if (present(sign)) then
+    sign_ = sign
+  else
+    sign_ = done
+  end if
+
+
+  !
+  ! Take a fast shortcut if the matrix is stored in CSR format
+  !
+  if (present(inlevs)) then
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = inlevs(j)
+        call heap%insert(k,info)
+      end if
+    end do
+  else
+    do j = a%irp(i), a%irp(i+1) - 1
+      k          = a%ja(j)
+      if ((jmin<=k).and.(k<=jmax)) then
+        row(k)     = sign_ * a%val(j)
+        rowlevs(k) = 0
+        call heap%insert(k,info)
+      end if
+    end do
+  end if
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invk_copyin
+
+
+subroutine psb_z_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+     &  l2,uia1,uia2,uaspk,info)
+
+  use psb_base_mod
+  use psb_z_invk_fact_mod, psb_protect_name => psb_z_invk_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+  complex(psb_dpk_), allocatable, intent(inout)    :: uaspk(:)
+  complex(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_)              :: j,isz,err_act,int_err(5),idxp
+  character(len=20), parameter  :: name='psb_ziluk_factint'
+  character(len=20)             :: ch_err
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  do idxp=1,nidx
+
+    j = idxs(idxp)
+
+    if (j>=i) then
+      !
+      ! Copy the upper part of the row
+      !
+      if (rowlevs(j) <= fill_in) then
+        l2     = l2 + 1
+        if (size(uaspk) < l2) then
+          !
+          ! Figure out a good reallocation size!
+          !
+          isz  = max(int(1.2*l2),l2+100)
+          call psb_realloc(isz,uaspk,info)
+          if (info == psb_success_) call psb_realloc(isz,uia1,info)
+          if (info /= psb_success_) then
+            info=psb_err_from_subroutine_
+            call psb_errpush(info,name,a_err='Allocate')
+            goto 9999
+          end if
+        end if
+        uia1(l2)   = j
+        uaspk(l2)  = row(j)
+      end if
+      !
+      ! Re-initialize row(j) and rowlevs(j)
+      !
+      row(j)     = zzero
+      rowlevs(j) = -(m+1)
+    end if
+  end do
+
+  uia2(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invk_copyout
+
+subroutine psb_zinvk_inv(fill_in,i,row,rowlevs,heap,ja,irp,val,uplevs,&
+     & nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_z_invk_fact_mod, psb_protect_name => psb_zinvk_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)               :: heap
+  integer(psb_ipk_), intent(in)                 :: i, fill_in
+  integer(psb_ipk_), intent(inout)              :: nidx,info
+  integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)                 :: ja(:),irp(:),uplevs(:)
+  complex(psb_dpk_), intent(in)                    :: val(:)
+  complex(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  integer(psb_ipk_) :: k,j,lrwk,jj,lastk, iret
+  real(psb_dpk_)    :: rwk
+
+
+  info = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+    ! Beware: (iret < 0) means that the heap is empty, not an error.
+    call heap%get_first(k,iret)
+    if (iret < 0) then
+!!$        write(psb_err_unit,*) 'IINVK: ',i,' returning at ',lastk
+      return
+    end if
+
+    !
+    ! Just in case an index has been put on the heap more than once.
+    !
+    if (k == lastk) cycle
+
+    lastk = k
+    nidx = nidx + 1
+    if (nidx>size(idxs)) then
+      call psb_realloc(nidx+psb_heap_resize,idxs,info)
+      if (info /= psb_success_) return
+    end if
+    idxs(nidx) = k
+
+    if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in)) then
+      !
+      ! Note: since U is scaled while copying it out (see iluk_copyout),
+      ! we can use rwk in the update below
+      !
+      rwk    = row(k)
+      lrwk   = rowlevs(k)
+
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Insert the index into the heap for further processing.
+        ! The fill levels are initialized to a negative value. If we find
+        ! one, it means that it is an as yet untouched index, so we need
+        ! to insert it; otherwise it is already on the heap, there is no
+        ! need to insert it more than once.
+        !
+        if (rowlevs(j)<0) then
+          call heap%insert(j,info)
+          if (info /= psb_success_) return
+          rowlevs(j) = abs(rowlevs(j))
+        end if
+        !
+        ! Update row(j) and the corresponding fill level
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
+      end do
+
+    end if
+  end do
+
+end subroutine psb_zinvk_inv
diff --git a/prec/impl/psb_z_invt_fact.f90 b/prec/impl/psb_z_invt_fact.f90
new file mode 100644
index 00000000..bed713ae
--- /dev/null
+++ b/prec/impl/psb_z_invt_fact.f90
@@ -0,0 +1,739 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+
+subroutine psb_z_invt_bld(a,fillin,invfill,thresh,invthresh,&
+     & lmat,d,umat,desc,info,blck)
+
+  use psb_base_mod
+  use psb_z_invt_fact_mod, psb_protect_name =>  psb_z_invt_bld
+  use psb_z_ilu_fact_mod
+  implicit none
+
+  ! Arguments
+  type(psb_zspmat_type), intent(in), target   :: a
+  integer(psb_ipk_), intent(in)               :: fillin,invfill
+  real(psb_dpk_), intent(in)                :: thresh
+  real(psb_dpk_), intent(in)                  :: invthresh
+  type(psb_zspmat_type), intent(inout)        :: lmat, umat
+  complex(psb_dpk_), allocatable                 :: d(:)
+  Type(psb_desc_type), Intent(inout)          :: desc
+  integer(psb_ipk_), intent(out)              :: info
+  type(psb_zspmat_type), intent(in), optional :: blck
+  !
+  integer(psb_ipk_) :: i, nztota, err_act, n_row, nrow_a, n_col
+  type(psb_zspmat_type)       :: atmp
+  complex(psb_dpk_), allocatable :: pq(:), pd(:), w(:)
+  integer(psb_ipk_) :: debug_level, debug_unit
+  type(psb_ctxt_type) :: ctxt
+  integer(psb_ipk_)   :: np,me
+  integer(psb_ipk_)   :: nzrmax
+  real(psb_dpk_)    :: sp_thresh
+  character(len=20) :: name, ch_err, fname
+
+
+  info = psb_success_
+  name='psb_zinvt_fact'
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+  debug_unit  = psb_get_debug_unit()
+  debug_level = psb_get_debug_level()
+  ctxt        = psb_cd_get_context(desc)
+  call psb_info(ctxt, me, np)
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' start'
+
+  !
+  ! Check the memory available to hold the incomplete L and U factors
+  ! and allocate it if needed
+  !
+  nrow_a = a%get_nrows()
+  nztota = a%get_nzeros()
+
+  if (present(blck)) then
+    nztota = nztota + blck%get_nzeros()
+  end if
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),&
+       & ': out get_nnzeros',nrow_a,nztota,&
+       & a%get_nrows(),a%get_ncols(),a%get_nzeros()
+
+
+  n_row  = psb_cd_get_local_rows(desc)
+  n_col  = psb_cd_get_local_cols(desc)
+  allocate(pd(n_row),w(n_row),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  nzrmax    = fillin
+  sp_thresh = thresh
+
+  call lmat%allocate(n_row,n_row,info,nz=nztota)
+  if (info == psb_success_) call umat%allocate(n_row,n_row,info,nz=nztota)
+
+  if (info == 0) call psb_ilut_fact(nzrmax,sp_thresh,&
+       & a,lmat,umat,pd,info,blck=blck,iscale=psb_ilu_scale_maxval_)
+
+  if (info == psb_success_) call atmp%allocate(n_row,n_row,info,nz=nztota)
+  if(info/=0) then
+    info=psb_err_from_subroutine_
+    ch_err='psb_sp_all'
+    call psb_errpush(info,name,a_err=ch_err)
+    goto 9999
+  end if
+
+  if (.false.) then
+!!$    if (debug_level >= psb_debug_inner_) then
+    write(fname,'(a,i0,a)') 'invt-lo-',me,'.mtx'
+    call lmat%print(fname,head="INVTLOW")
+    write(fname,'(a,i0,a)') 'invt-up-',me,'.mtx'
+    call umat%print(fname,head="INVTUPP")
+  end if
+
+  !
+  ! Compute the approx U^-1  and L^-1
+  !
+  nzrmax    = invfill
+  call psb_zsparse_invt(n_row,umat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,umat,info)
+  if (info == psb_success_) call lmat%transp()
+  if (info == psb_success_) call psb_zsparse_invt(n_row,lmat,atmp,nzrmax,invthresh,info)
+  if (info == psb_success_) call psb_move_alloc(atmp,lmat,info)
+  if (info == psb_success_) call lmat%transp()
+  ! Done. Hopefully....
+
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='invt')
+    goto 9999
+  end if
+
+  call psb_move_alloc(pd,d,info)
+  call lmat%set_asb()
+  call lmat%trim()
+  call umat%set_asb()
+  call umat%trim()
+
+  if (debug_level >= psb_debug_outer_) &
+       & write(debug_unit,*) me,' ',trim(name),' end'
+
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invt_bld
+
+subroutine psb_zsparse_invt(n,a,z,nzrmax,sp_thresh,info)
+
+  use psb_base_mod
+  use psb_z_invt_fact_mod, psb_protect_name => psb_zsparse_invt
+
+  implicit none
+  integer(psb_ipk_), intent(in)        :: n
+  type(psb_zspmat_type), intent(in)    :: a
+  type(psb_zspmat_type), intent(inout) :: z
+  integer(psb_ipk_), intent(in)        :: nzrmax
+  real(psb_dpk_), intent(in)           :: sp_thresh
+  integer(psb_ipk_), intent(out)       :: info
+  !
+  integer(psb_ipk_) :: i,j,k, err_act, nz, nzra, nzrz, ipz1,ipz2, nzz, ip1, ip2, l2
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), iz(:),jz(:)
+  complex(psb_dpk_), allocatable    :: zw(:), val(:), valz(:)
+  integer(psb_ipk_), allocatable :: uplevs(:), rowlevs(:),idxs(:)
+  complex(psb_dpk_), allocatable :: row(:)
+  type(psb_z_coo_sparse_mat)  :: trw
+  type(psb_z_csr_sparse_mat)  :: acsr, zcsr
+  integer(psb_ipk_)           :: ktrw, nidx, nlw,nup,jmaxup
+  type(psb_i_heap)            :: heap
+  real(psb_dpk_)     :: alpha, nrmi
+  character(len=20)  :: name='psb_sp_invt'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  if (.not.(a%is_triangle().and.a%is_unit().and.a%is_upper())) then
+    write(psb_err_unit,*) 'Wrong A '
+    info = psb_err_internal_error_
+    call psb_errpush(psb_err_internal_error_,name,a_err='wrong A')
+    goto 9999
+  end if
+  call a%cp_to(acsr)
+  call trw%allocate(izero,izero,ione)
+  if (info == psb_success_) allocate(zw(n),iz(n),valz(n),&
+       & row(n),rowlevs(n),stat=info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    goto 9999
+  end if
+
+  call zcsr%allocate(n,n,n*nzrmax)
+  call zcsr%set_triangle()
+  call zcsr%set_unit(.false.)
+  call zcsr%set_upper()
+  !
+  !
+  nzz        = 0
+  row(:)     = zzero
+  rowlevs(:) = 0
+  l2         = 0
+  zcsr%irp(1) = 1
+
+  outer: do i = 1, n-1
+    ! ZW = e_i
+    call psb_z_invt_copyin(i,n,acsr,i,ione,n,nlw,nup,jmaxup,nrmi,row,&
+         & heap,rowlevs,ktrw,trw,info,sign=-done)
+    if (info /= 0) exit
+    row(i) = zone
+    ! Adjust norm
+    if (nrmi < done) then
+      nrmi = sqrt(done + nrmi**2)
+    else
+      nrmi = nrmi*sqrt(zone+zone/(nrmi**2))
+    end if
+
+    call psb_invt_inv(sp_thresh,i,nrmi,row,heap,rowlevs,&
+         & acsr%ja,acsr%irp,acsr%val,nidx,idxs,info)
+    if (info /= 0) exit
+!!$    write(0,*) 'Calling copyout ',nzrmax,nlw,nup,nidx,l2
+    call psb_z_invt_copyout(nzrmax,sp_thresh,i,n,nlw,nup,jmaxup,nrmi,row,&
+         & nidx,idxs,l2,zcsr%ja,zcsr%irp,zcsr%val,info)
+    if (info /= 0) exit
+    nzz = l2
+  end do outer
+  if (info /= psb_success_) then
+    info = psb_err_internal_error_
+    call psb_errpush(info,name,a_err='mainloop')
+    goto 9999
+  end if
+
+  ipz1 = nzz+1
+  call psb_ensure_size(ipz1,zcsr%val,info)
+  call psb_ensure_size(ipz1,zcsr%ja,info)
+  zcsr%val(ipz1) = zone
+  zcsr%ja(ipz1)  = n
+  zcsr%irp(n+1)  = ipz1+1
+
+  call z%mv_from(zcsr)
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_zsparse_invt
+
+subroutine psb_z_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+     & irwt,ktrw,trw,info,sign)
+  use psb_base_mod
+  use psb_d_invt_fact_mod, psb_protect_name => psb_d_invt_copyin
+  implicit none
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_coo_sparse_mat), intent(inout) :: trw
+  integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+  integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+  integer(psb_ipk_), intent(inout)          :: irwt(:)
+  real(psb_dpk_), intent(inout)          :: nrmi
+  complex(psb_dpk_), intent(inout)            :: row(:)
+  type(psb_i_heap), intent(inout)           :: heap
+  real(psb_dpk_), intent(in), optional    :: sign
+  !
+  integer(psb_ipk_)            :: k,j,irb,kin,nz, err_act
+  integer(psb_ipk_), parameter :: nrb=16
+  real(psb_dpk_)               :: dmaxup, sign_
+  real(psb_dpk_), external     :: dnrm2
+  character(len=20), parameter :: name='invt_copyin'
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  call heap%init(info)
+  if (info /= psb_success_) then
+    info=psb_err_from_subroutine_
+    call psb_errpush(info,name,a_err='psb_init_heap')
+    goto 9999
+  end if
+  sign_ = done
+  if (present(sign)) sign_ = sign
+  !
+  ! nrmi is the norm of the current sparse row (for the time being,
+  ! we use the 2-norm).
+  ! NOTE: the 2-norm below includes also elements that are outside
+  ! [jmin:jmax] strictly. Is this really important? TO BE CHECKED.
+  !
+
+  nlw    = 0
+  nup    = 0
+  jmaxup = 0
+  dmaxup = zzero
+  nrmi   = zzero
+
+  do j = a%irp(i), a%irp(i+1) - 1
+    k = a%ja(j)
+    if ((jmin<=k).and.(k<=jmax)) then
+      row(k)     = sign_ * a%val(j)
+      call heap%insert(k,info)
+      irwt(k) = 1
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+    end if
+    if (k<jd) nlw = nlw + 1
+    if (k>jd) then
+      nup = nup + 1
+      if (abs(row(k))>dmaxup) then
+        jmaxup = k
+        dmaxup = abs(row(k))
+      end if
+    end if
+  end do
+  nz   = a%irp(i+1) - a%irp(i)
+  nrmi = dnrm2(nz,a%val(a%irp(i):),ione)
+
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invt_copyin
+
+subroutine psb_z_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+     & nidx,idxs,l2,ja,irp,val,info)
+
+  use psb_base_mod
+  use psb_z_invt_fact_mod, psb_protect_name => psb_z_invt_copyout
+
+  implicit none
+
+  ! Arguments
+  integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+  integer(psb_ipk_), intent(in)                 :: idxs(:)
+  integer(psb_ipk_), intent(inout)              :: l2, info
+  integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+  real(psb_dpk_), intent(in)                    :: thres,nrmi
+  complex(psb_dpk_),allocatable, intent(inout)     :: val(:)
+  complex(psb_dpk_), intent(inout)                 :: row(:)
+
+  ! Local variables
+  real(psb_dpk_),allocatable     :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  real(psb_dpk_)                 :: witem, wmin
+  integer(psb_ipk_)              :: widx
+  integer(psb_ipk_)              :: k,isz,err_act,int_err(5),idxp, nz
+  type(psb_d_idx_heap)           :: heap
+  character(len=20), parameter   :: name='invt_copyout'
+  character(len=20)              :: ch_err
+  logical                        :: fndmaxup
+
+  info = psb_success_
+  call psb_erractionsave(err_act)
+  if (psb_errstatus_fatal()) then
+    info = psb_err_internal_error_; goto 9999
+  end if
+
+  !
+  ! Here we need to apply also the dropping rule base on the fill-in.
+  ! We do it by putting into a heap the elements that are not dropped
+  ! by using the 2-norm rule, and then copying them out.
+  !
+  ! The heap goes down on the entry absolute value, so the first item
+  ! is the largest absolute value.
+  !
+!!$  write(0,*) 'invt_copyout ',nidx,nup+fill_in
+  call heap%init(info,dir=psb_asort_down_)
+
+  if (info == psb_success_) allocate(xwid(nidx),xw(nidx),indx(nidx),stat=info)
+  if (info /= psb_success_) then
+    info=psb_err_alloc_request_
+    call psb_errpush(info,name,i_err=(/3*nidx/),&
+         & a_err='real(psb_dpk_)')
+    goto 9999
+  end if
+
+  !
+  ! First the lower part
+  !
+
+  nz   = 0
+  idxp = 0
+
+  do
+
+    idxp = idxp + 1
+    if (idxp > nidx) exit
+    if (idxs(idxp) >= i) exit
+    widx      = idxs(idxp)
+    witem     = row(widx)
+    !
+    ! Dropping rule based on the 2-norm
+    !
+    if (abs(witem) < thres*nrmi) cycle
+
+    nz       = nz + 1
+    xw(nz)   = witem
+    xwid(nz) = widx
+    call heap%insert(witem,widx,info)
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_insert_heap')
+      goto 9999
+    end if
+  end do
+
+  if (nz > 1) then
+    write(psb_err_unit,*) 'Warning: lower triangle from invt???? '
+  end if
+
+
+  if (idxp <= size(idxs)) then
+    if (idxs(idxp) < i) then
+      do
+        idxp = idxp + 1
+        if (idxp > nidx) exit
+        if (idxs(idxp) >= i) exit
+      end do
+    end if
+  end if
+  idxp = idxp - 1
+  nz   = 0
+  wmin=HUGE(wmin)
+  if (.false.) then
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= jmaxup) .and. (widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if ((widx/=jmaxup).and.(nz > nup+fill_in)) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz <= nup+fill_in) then
+      !
+      ! Just copy everything from xw
+      !
+      fndmaxup=.true.
+    else
+      fndmaxup = .false.
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+        if (widx == jmaxup) fndmaxup=.true.
+      end do
+    end if
+    if ((i<jmaxup).and.(jmaxup<=m)) then
+      if (.not.fndmaxup) then
+        !
+        ! Include entry jmaxup, if it is not already there.
+        ! Put it in the place of the smallest coefficient.
+        !
+        xw(nz)   = row(jmaxup)
+        xwid(nz) = jmaxup
+      endif
+    end if
+
+  else if (.true.) then
+
+    do
+
+      idxp = idxp + 1
+      if (idxp > nidx) exit
+      widx      = idxs(idxp)
+      if (widx < i) then
+        write(psb_err_unit,*) 'Warning: lower triangle in upper copy',widx,i,idxp,idxs(idxp)
+        cycle
+      end if
+      if (widx > m) then
+        cycle
+      end if
+      witem     = row(widx)
+      !
+      ! Dropping rule based on the 2-norm. But keep the jmaxup-th entry anyway.
+      !
+      if ((widx /= i) .and. (abs(witem) < thres*nrmi)) then
+        cycle
+      end if
+      if (nz > nup+fill_in) then
+        if (abs(witem) < wmin) cycle
+      endif
+      wmin = min(abs(witem),wmin)
+      nz       = nz + 1
+      xw(nz)   = witem
+      xwid(nz) = widx
+      call heap%insert(witem,widx,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='psb_insert_heap')
+        goto 9999
+      end if
+
+    end do
+
+    !
+    ! Now we have to take out the first nup-fill_in entries. But make sure
+    ! we include entry jmaxup.
+    !
+    if (nz >  nup+fill_in) then
+      nz = nup+fill_in
+      do k=1,nz
+        call heap%get_first(witem,widx,info)
+        xw(k)   = witem
+        xwid(k) = widx
+      end do
+    end if
+  end if
+
+  !
+  ! Now we put things back into ascending column order
+  !
+  call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+
+  !
+  ! Copy out the upper part of the row
+  !
+  do k=1,nz
+    l2     = l2 + 1
+    if (size(val) < l2) then
+      !
+      ! Figure out a good reallocation size!
+      !
+      isz  = max(int(1.2*l2),l2+100)
+      call psb_realloc(isz,val,info)
+      if (info == psb_success_) call psb_realloc(isz,ja,info)
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        call psb_errpush(info,name,a_err='Allocate')
+        goto 9999
+      end if
+    end if
+    ja(l2)   = xwid(k)
+    val(l2)  = xw(indx(k))
+  end do
+
+  !
+  ! Set row to zero
+  !
+  do idxp=1,nidx
+    row(idxs(idxp)) = zzero
+  end do
+
+  irp(i+1) = l2 + 1
+
+  call psb_erractionrestore(err_act)
+  return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_z_invt_copyout
+
+subroutine psb_z_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val,nidx,idxs,info)
+
+  use psb_base_mod
+  use psb_z_invt_fact_mod, psb_protect_name => psb_z_invt_inv
+
+  implicit none
+
+  ! Arguments
+  type(psb_i_heap), intent(inout)     :: heap
+  integer(psb_ipk_), intent(in)       :: i
+  integer(psb_ipk_), intent(inout)    :: nidx,info
+  integer(psb_ipk_), intent(inout)    :: irwt(:)
+  real(psb_dpk_), intent(in)          :: thres,nrmi
+  integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+  integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+  complex(psb_dpk_), intent(in)          :: val(:)
+  complex(psb_dpk_), intent(inout)       :: row(:)
+
+  ! Local Variables
+  integer(psb_ipk_) :: k,j,jj,lastk,iret
+  real(psb_dpk_)    :: rwk, alpha
+
+  info  = psb_success_
+
+  call psb_ensure_size(200, idxs,  info)
+  if (info /= psb_success_) return
+  nidx    = 1
+  idxs(1) = i
+  lastk   = i
+  irwt(i) = 1
+!!$  write(0,*) 'Drop Threshold ',thres*nrmi
+  !
+  ! Do while there are indices to be processed
+  !
+  do
+
+    call heap%get_first(k,iret)
+    if (iret < 0)  exit
+
+    !
+    ! An index may have been put on the heap more than once.
+    ! Should not happen, but just in case.
+    !
+    if (k == lastk)  cycle
+    lastk = k
+
+    !
+    ! Dropping rule based on the threshold: compare the absolute
+    ! value of each updated entry of row with thres * 2-norm of row.
+    !
+    rwk    = row(k)
+
+    if (abs(rwk) < thres*nrmi) then
+      !
+      ! Drop the entry.
+      !
+      row(k)  = dzero
+      irwt(k) = 0
+      cycle
+    else
+      !
+      ! Note: since U is scaled while copying it out (see ilut_copyout),
+      ! we can use rwk in the update below.
+      !
+      do jj=irp(k),irp(k+1)-1
+        j = ja(jj)
+        if (j<=k) then
+          info = -i
+          return
+        endif
+        !
+        ! Update row(j) and, if it is not to be discarded, insert
+        ! its index into the heap for further processing.
+        !
+        row(j)     = row(j) - rwk * val(jj)
+        if (irwt(j) == 0) then
+          if (abs(row(j)) < thres*nrmi) then
+            !
+            ! Drop the entry.
+            !
+            row(j)  = dzero
+          else
+            !
+            ! Do the insertion.
+            !
+            call heap%insert(j,info)
+            if (info /= psb_success_) return
+            irwt(j) = 1
+          end if
+        end if
+      end do
+    end if
+
+    !
+    ! If we get here it is an index we need to keep on copyout.
+    !
+
+    nidx       = nidx + 1
+    call psb_ensure_size(nidx,idxs,info,addsz=psb_heap_resize)
+    if (info /= psb_success_) return
+    idxs(nidx) = k
+    irwt(k)    = 0
+  end do
+
+  irwt(i) = 0
+end subroutine psb_z_invt_inv
diff --git a/prec/impl/psb_z_prec_type_impl.f90 b/prec/impl/psb_z_prec_type_impl.f90
index 9cbd32ca..00f0b05e 100644
--- a/prec/impl/psb_z_prec_type_impl.f90
+++ b/prec/impl/psb_z_prec_type_impl.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,14 +27,14 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-!   
+!
+!
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -46,7 +46,7 @@
 !!$    3. The name of the PSBLAS group or the names of its contributors may
 !!$       not be used to endorse or promote products derived from this
 !!$       software without specific written permission.
-!!$ 
+!!$
 !!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -58,14 +58,14 @@
 !!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !!$  POSSIBILITY OF SUCH DAMAGE.
-!!$ 
-!!$  
+!!$
+!!$
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
 subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_z_prec_type, psb_protect_name => psb_z_apply2_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_zprec_type), intent(inout) :: prec
   type(psb_z_vect_type),intent(inout)  :: x
@@ -74,7 +74,7 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional           :: trans
   complex(psb_dpk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -88,25 +88,25 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -115,13 +115,13 @@ subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work)
   call prec%prec%apply(zone,x,zzero,y,desc_data,info,&
        & trans=trans_,work=work_)
 
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -136,7 +136,7 @@ end subroutine psb_z_apply2_vect
 subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work)
   use psb_base_mod
   use psb_z_prec_type, psb_protect_name => psb_z_apply1_vect
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)       :: desc_data
   class(psb_zprec_type), intent(inout) :: prec
   type(psb_z_vect_type),intent(inout)  :: x
@@ -145,7 +145,7 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work)
   complex(psb_dpk_),intent(inout), optional, target :: work(:)
 
   type(psb_z_vect_type)       :: ww
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -159,25 +159,25 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
@@ -188,13 +188,13 @@ subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work)
        & trans=trans_,work=work_)
   if (info == 0) call psb_geaxpby(zone,ww,zzero,x,desc_data,info)
   call psb_gefree(ww,desc_data,info)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -209,7 +209,7 @@ end subroutine psb_z_apply1_vect
 subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work)
   use psb_base_mod
   use psb_z_prec_type, psb_protect_name => psb_z_apply2v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_zprec_type), intent(inout) :: prec
   complex(psb_dpk_),intent(inout)   :: x(:)
@@ -218,7 +218,7 @@ subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work)
   character(len=1), optional        :: trans
   complex(psb_dpk_),intent(inout), optional, target :: work(:)
 
-  character     :: trans_ 
+  character     :: trans_
   complex(psb_dpk_), pointer :: work_(:)
   type(psb_ctxt_type) :: ctxt
   integer(psb_ipk_) :: np,me
@@ -232,37 +232,37 @@ subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work)
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
 
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=trans
   else
     trans_='N'
   end if
 
-  if (present(work)) then 
+  if (present(work)) then
     work_ => work
   else
     allocate(work_(4*desc_data%get_local_cols()),stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='Allocate')
-      goto 9999      
+      goto 9999
     end if
 
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   call prec%prec%apply(zone,x,zzero,y,desc_data,info,trans_,work=work_)
-  if (present(work)) then 
+  if (present(work)) then
   else
     deallocate(work_,stat=info)
-    if (info /= psb_success_) then 
+    if (info /= psb_success_) then
       info = psb_err_from_subroutine_
       call psb_errpush(info,name,a_err='DeAllocate')
-      goto 9999      
+      goto 9999
     end if
   end if
 
@@ -277,7 +277,7 @@ end subroutine psb_z_apply2v
 subroutine psb_z_apply1v(prec,x,desc_data,info,trans)
   use psb_base_mod
   use psb_z_prec_type, psb_protect_name => psb_z_apply1v
-  implicit none 
+  implicit none
   type(psb_desc_type),intent(in)    :: desc_data
   class(psb_zprec_type), intent(inout) :: prec
   complex(psb_dpk_),intent(inout)   :: x(:)
@@ -293,36 +293,34 @@ subroutine psb_z_apply1v(prec,x,desc_data,info,trans)
   name='psb_z_apply1v'
   info = psb_success_
   call psb_erractionsave(err_act)
-
-
   ctxt = desc_data%get_context()
   call psb_info(ctxt, me, np)
-  if (present(trans)) then 
+  if (present(trans)) then
     trans_=psb_toupper(trans)
   else
     trans_='N'
   end if
 
-  if (.not.allocated(prec%prec)) then 
+  if (.not.allocated(prec%prec)) then
     info = 1124
     call psb_errpush(info,name,a_err="preconditioner")
     goto 9999
   end if
   allocate(ww(size(x)),w1(size(x)),stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='Allocate')
-    goto 9999      
+    goto 9999
   end if
   call prec%prec%apply(zone,x,zzero,ww,desc_data,info,&
        & trans_,work=w1)
   if(info /= psb_success_) goto 9999
   x(:) = ww(:)
   deallocate(ww,W1,stat=info)
-  if (info /= psb_success_) then 
+  if (info /= psb_success_) then
     info = psb_err_from_subroutine_
     call psb_errpush(info,name,a_err='DeAllocate')
-    goto 9999      
+    goto 9999
   end if
 
 
@@ -336,3 +334,159 @@ subroutine psb_z_apply1v(prec,x,desc_data,info,trans)
 
 end subroutine psb_z_apply1v
 
+subroutine psb_zcprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_z_prec_type, psb_protect_name => psb_zcprecseti
+  implicit none
+
+  class(psb_zprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  integer(psb_ipk_), intent(in)            :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precseti'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_FILLIN')
+      call prec%prec%precset(psb_ilu_fill_in_,val,info)
+    case ('INV_FILLIN')
+      call prec%prec%precset(psb_inv_fillin_,val,info)
+    case default
+      info = psb_err_invalid_args_combination_
+      write(psb_err_unit,*) name,&
+           & ': Error: uninitialized preconditioner,',&
+           &' should call prec%init'
+      return
+  end select
+
+end subroutine psb_zcprecseti
+
+subroutine psb_zcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_z_prec_type, psb_protect_name => psb_zcprecsetr
+  implicit none
+
+  class(psb_zprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  real(psb_dpk_), intent(in)             :: val
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetr'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+  case('SUB_ILUTHRS')
+    call prec%prec%precset(psb_fact_eps_,val,info)
+  case('INV_THRESH')
+    call prec%prec%precset(psb_inv_thresh_,val,info)
+  case default
+    info = psb_err_invalid_args_combination_
+    write(psb_err_unit,*) name,&
+         & ': Error: uninitialized preconditioner,',&
+         &' should call prec%init'
+    return
+  end select
+
+end subroutine psb_zcprecsetr
+
+subroutine psb_zcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+  use psb_base_mod
+  use psb_z_prec_type, psb_protect_name => psb_zcprecsetc
+  implicit none
+
+  class(psb_zprec_type), intent(inout)   :: prec
+  character(len=*), intent(in)             :: what
+  character(len=*), intent(in)             :: string
+  integer(psb_ipk_), intent(out)           :: info
+  ! This optional inputs are backport from the inputs available in AMG4PSBLAS,
+  ! they are of no actual use here a part from compatibility reasons.
+  integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+  character(len=*), optional, intent(in)   :: pos
+
+  ! Local variables
+  character(len=*), parameter            :: name='psb_precsetc'
+
+  info = psb_success_
+
+  ! We need to convert from the 'what' string to the corresponding integer
+  ! value befor passing the call to the set of the inner method.
+  select case (psb_toupper(trim(what)))
+    case ('SUB_SOLVE')
+      ! We select here the type of solver on the block
+      select case (psb_toupper(trim(string)))
+        case("ILU")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_k_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+        case("ILUT")
+            call prec%prec%precset(psb_f_type_,psb_f_ilu_t_,info)
+            call prec%prec%precset(psb_ilu_ialg_,psb_ilu_t_,info)
+        case("AINV")
+            call prec%prec%precset(psb_f_type_,psb_f_ainv_,info)
+        case("INVK")
+            call prec%prec%precset(psb_f_type_,psb_f_invk_,info)
+        case("INVT")
+            call prec%prec%precset(psb_f_type_,psb_f_invt_,info)
+        case default
+          ! Default to ILU(0) factorization
+          call prec%prec%precset(psb_f_type_,psb_f_ilu_n_,info)
+          call prec%prec%precset(psb_ilu_ialg_,psb_ilu_n_,info)
+      end select
+    case ("ILU_ALG")
+      select case (psb_toupper(trim(string)))
+        case ("MILU")
+          call prec%prec%precset(psb_ilu_ialg_,psb_milu_n_,info)
+        case default
+          ! Do nothing
+      end select
+    case ("ILUT_SCALE")
+      select case (psb_toupper(trim(string)))
+      case ("MAXVAL")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_maxval_,info)
+      case ("DIAG")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_diag_,info)
+      case ("ARWSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arwsum_,info)
+      case ("ARCSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_arcsum_,info)
+      case ("ACLSUM")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_aclsum_,info)
+      case ("NONE")
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      case default
+        call prec%prec%precset(psb_ilu_scale_,psb_ilu_scale_none_,info)
+      end select
+    case ("AINV_ALG")
+      select case (psb_toupper(trim(string)))
+      case("LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      case("SYM-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_llk_,info)
+      case("STAB-LLK")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_s_ft_llk_,info)
+      case("MLK","LMX")
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_mlk_,info)
+      case default
+        call prec%prec%precset(psb_ainv_alg_,psb_ainv_llk_,info)
+      end select
+    case default
+
+  end select
+
+end subroutine psb_zcprecsetc
diff --git a/prec/impl/psb_z_sp_drop.f90 b/prec/impl/psb_z_sp_drop.f90
new file mode 100644
index 00000000..754c76cc
--- /dev/null
+++ b/prec/impl/psb_z_sp_drop.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_z_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+  use psb_base_mod
+  implicit none
+  real(psb_dpk_), intent(in)       :: sp_thresh
+  integer(psb_ipk_), intent(in)    :: idiag, nzrmax
+  integer(psb_ipk_), intent(inout) :: nz
+  integer(psb_ipk_), intent(inout) :: iz(:)
+  complex(psb_dpk_), intent(inout)    :: valz(:)
+  integer(psb_ipk_), intent(out)   :: info
+  !
+  integer(psb_ipk_)              :: i, j, idf, nw
+  complex(psb_dpk_)                 :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_dpk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+
+  if (nz > min(size(iz),size(valz))) then
+    write(0,*) 'Serious size problem ',nz,size(iz),size(valz)
+    info = -2
+    return
+  end if
+  allocate(xw(nz),xwid(nz),indx(nz),stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory allocation failure in sp_drop',nz,info
+    return
+  endif
+
+  ! Always keep the diagonal element
+  idf = -1
+  do i=1, nz
+    if (iz(i) == idiag) then
+      idf     = i
+      witem   = valz(i)
+      widx    = iz(i)
+      valz(i) = valz(1)
+      iz(i)   = iz(1)
+      valz(1) = witem
+      iz(1)   = widx
+      exit
+    end if
+  end do
+
+  if (idf == -1) then
+
+    xw(1:nz) = valz(1:nz)
+    call psb_qsort(xw(1:nz),indx(1:nz),dir=psb_asort_down_)
+    do i=1, nz
+      xwid(i) = iz(indx(i))
+    end do
+    nw = min(nz,nzrmax)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+    nw = max(nw, 1)
+
+  else
+
+    nw = nz-1
+
+    xw(1:nw) = valz(2:nz)
+
+    call psb_qsort(xw(1:nw),indx(1:nw),dir=psb_asort_down_)
+    nw = min(nw,nzrmax-1)
+    do
+      if (nw <= 1) exit
+      if (abs(xw(nw)) < sp_thresh) then
+        nw = nw - 1
+      else
+        exit
+      end if
+    end do
+
+    do i=1, nw
+      xwid(i) = iz(1+indx(i))
+    end do
+    nw       = nw + 1
+    xw(nw)   = valz(1)
+    xwid(nw) = iz(1)
+  end if
+
+  call psb_msort(xwid(1:nw),indx(1:nw),dir=psb_sort_up_)
+
+  do i=1, nw
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+  end do
+  nz = nw
+  if (nz>nzrmax) write(0,*) 'in sp_drop: ',nw,nzrmax,nz
+  deallocate(xw,xwid,indx,stat=info)
+  if (info /= psb_success_) then
+    write(psb_err_unit,*) ' Memory deallocation failure in sp_drop',info
+    return
+  endif
+  return
+end subroutine psb_z_sp_drop
diff --git a/prec/impl/psb_z_sparsify.f90 b/prec/impl/psb_z_sparsify.f90
new file mode 100644
index 00000000..fe29230f
--- /dev/null
+++ b/prec/impl/psb_z_sparsify.f90
@@ -0,0 +1,260 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_z_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info,istart,iheap,ikr)
+  use psb_base_mod
+  implicit none
+
+  real(psb_dpk_), intent(in)              :: sp_thresh
+  integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+  complex(psb_dpk_), intent(inout)           :: zw(:)
+  integer(psb_ipk_), intent(out)          :: nz
+  integer(psb_ipk_), intent(out)          :: iz(:)
+  complex(psb_dpk_), intent(out)             :: valz(:)
+  integer(psb_ipk_), intent(out)          :: info
+  integer(psb_ipk_), intent(in), optional :: istart
+  type(psb_i_heap), optional              :: iheap
+  integer(psb_ipk_), optional             :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k
+  complex(psb_dpk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_dpk_), allocatable   :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+  type(psb_z_idx_heap)         :: heap
+
+
+  info = psb_success_
+  istart_ = 1
+  if (present(istart)) istart_ = max(1,istart)
+  if (.false.) then
+    nz = 0
+    do i=istart_, n
+      if ((i == idiag).or.(abs(zw(i)) >= sp_thresh)) then
+        nz       = nz + 1
+        iz(nz)   = i
+        valz(nz) = zw(i)
+      end if
+    end do
+
+  else
+    allocate(xw(nzrmax),xwid(nzrmax),indx(nzrmax),stat=info)
+    if (info /= psb_success_) then
+      return
+    end if
+
+    call heap%init(info,dir=psb_asort_down_)
+
+    ! Keep at least the diagonal
+    nz = 0
+
+    if (present(iheap)) then
+      if (.not.(present(ikr))) then
+        write(psb_err_unit,*) 'Error: if IHEAP then also IKR'
+        info = -1
+        return
+      end if
+      last_i = -1
+      do
+        call iheap%get_first(i,iret)
+        if (iret < 0) exit
+        ! An index may have been put on the heap more than once.
+        if (i == last_i) cycle
+        last_i = i
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i)  = dzero
+        ikr(i) = 0
+      end do
+
+    else
+
+      do i=istart_, n
+        if (i == idiag) then
+          xw(1)   = zw(i)
+          xwid(1) = i
+        else if (abs(zw(i)) >= sp_thresh) then
+          call heap%insert(zw(i),i,info)
+        end if
+        zw(i) = dzero
+      end do
+    end if
+
+    k = 1
+    do
+      if (k == nzrmax) exit
+      call heap%get_first(witem,widx,info)
+      if (info == -1) then
+        info = psb_success_
+        exit
+      endif
+      if (info /= psb_success_) then
+        info=psb_err_from_subroutine_
+        return
+      end if
+      k = k + 1
+      xw(k)   = witem
+      xwid(k) = widx
+    end do
+    call heap%free(info)
+    nz = k
+    call psb_msort(xwid(1:nz),indx(1:nz),dir=psb_sort_up_)
+!!$    write(0,*) 'sparsify output for idiag ',idiag,' :',nz,sp_thresh
+    do i=1, nz
+      valz(i) = xw(indx(i))
+      iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+    end do
+
+  end if
+
+  return
+
+end subroutine psb_z_sparsify
+
+
+subroutine psb_z_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+  use psb_base_mod
+  implicit none
+
+  real(psb_dpk_), intent(in)      :: sp_thresh
+  integer(psb_ipk_), intent(in)     :: idiag, n, nzrmax
+  complex(psb_dpk_), intent(inout)    :: zw(:)
+  integer(psb_ipk_), intent(out)    :: nz
+  integer(psb_ipk_), intent(out)    :: iz(:)
+  complex(psb_dpk_), intent(out)       :: valz(:)
+  integer(psb_ipk_), intent(out)    :: info
+  integer(psb_ipk_), intent(inout)  :: lhead, listv(:)
+  integer(psb_ipk_)                 :: ikr(:)
+  !
+  integer(psb_ipk_)              :: i, istart_, last_i, iret,k,current, next
+  complex(psb_dpk_)                :: witem
+  integer(psb_ipk_)              :: widx
+  complex(psb_dpk_), allocatable    :: xw(:)
+  integer(psb_ipk_), allocatable :: xwid(:), indx(:)
+
+
+  info = psb_success_
+  istart_ = 1
+  allocate(xw(n),xwid(n),indx(n),stat=info)
+
+  current = lhead
+  lhead = -1
+  i = 0
+  do while (current >0)
+    i = i + 1
+    xw(i)   = zw(current)
+    xwid(i) = current
+
+    if (current == idiag) then
+      ! Bring the diagona into first position
+      witem = xw(1)
+      widx  = xwid(1)
+      xw(1)   = xw(i)
+      xwid(1) = xwid(i)
+      xw(i)   = witem
+      xwid(i) = widx
+    end if
+
+    zw(current)  = dzero
+    ikr(current) = 0
+
+    next = listv(current)
+    listv(current) = -1
+    current = next
+  end do
+  nz = i
+  if (nz > 2) call psb_hsort(xw(2:nz),ix=xwid(2:nz),&
+       & dir=psb_asort_down_,flag=psb_sort_keep_idx_)
+!!$  write(0,*) 'Done first msort '
+!!$  write(0,*) '   after first msort for idiag ',idiag,' :',nz,sp_thresh
+!!$  do i=1, nz
+!!$    write(0,*) '         ',xwid(i),xw(i)
+!!$  end do
+
+  i = 2
+  do while (i<=nz)
+    if (abs(xw(i)) < sp_thresh) exit
+    i = i + 1
+  end do
+!!$  write(0,*) 'NZ ',nz, i, nzrmax
+  nz = max(1,min(i-1,nzrmax))
+  call psb_msort(xwid(1:nz),ix=indx(1:nz),dir=psb_sort_up_)
+!!$  write(0,*) 'Done second msort '
+
+!!$  write(0,*) 'sparsify output for idiag ',idiag,' :',nz,i,sp_thresh
+  do i=1, nz
+    valz(i) = xw(indx(i))
+    iz(i)   = xwid(i)
+!!$      write(0,*) '         ',iz(i),valz(i)
+  end do
+
+  return
+
+end subroutine psb_z_sparsify_list
diff --git a/prec/impl/psb_zrwclip.f90 b/prec/impl/psb_zrwclip.f90
new file mode 100644
index 00000000..574ebcf8
--- /dev/null
+++ b/prec/impl/psb_zrwclip.f90
@@ -0,0 +1,90 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_z_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+  use psb_base_mod
+
+  implicit none
+  integer(psb_ipk_), intent(inout)  :: nz
+  integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+  complex(psb_dpk_), intent(inout)    :: val(*)
+  integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+
+  integer(psb_ipk_) :: i,j
+
+  j = 0
+  do i=1, nz
+    if ((imin <= ia(i)).and.&
+         & (ia(i) <= imax).and.&
+         & (jmin <= ja(i)).and.&
+         & (ja(i) <= jmax) ) then
+      j = j + 1
+      ia(j) = ia(i)
+      ja(j) = ja(i)
+      val(j) = val(i)
+    end if
+  end do
+  nz = j
+end subroutine psb_z_rwclip
diff --git a/prec/impl/psb_zsparse_biconjg_llk.F90 b/prec/impl/psb_zsparse_biconjg_llk.F90
new file mode 100644
index 00000000..2d3a90cb
--- /dev/null
+++ b/prec/impl/psb_zsparse_biconjg_llk.F90
@@ -0,0 +1,366 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_zsparse_biconjg_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_z_ainv_tools_mod
+  use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_llk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  complex(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_dpk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       & ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_z_csc_sparse_mat) :: ac
+  complex(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = zzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = zzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = zone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = zone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+!!$        write(0,*) i, ' Outer inserting ',ac%ia(j)
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! ! write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+!!$                write(0,*) j,i,' Inner inserting ',nextj
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_zsparse_biconjg_llk
diff --git a/prec/impl/psb_zsparse_biconjg_llk_noth.F90 b/prec/impl/psb_zsparse_biconjg_llk_noth.F90
new file mode 100644
index 00000000..65975a24
--- /dev/null
+++ b/prec/impl/psb_zsparse_biconjg_llk_noth.F90
@@ -0,0 +1,362 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_zsparse_biconjg_llk_noth(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_z_ainv_tools_mod
+  use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_llk_noth
+
+  !
+  ! Left-looking variant, with NO drop rule on p(i)/p(j)
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)               :: n
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)               :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  complex(psb_dpk_), intent(out)                :: p(:)
+  integer(psb_ipk_), intent(out)              :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable        :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_dpk_), allocatable :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_z_csc_sparse_mat) :: ac
+  real(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_orth_llk_noth'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = zzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = zzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = done
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = done
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.true.) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = done
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j) < i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.true.) then
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+end subroutine psb_zsparse_biconjg_llk_noth
diff --git a/prec/impl/psb_zsparse_biconjg_mlk.F90 b/prec/impl/psb_zsparse_biconjg_mlk.F90
new file mode 100644
index 00000000..52c30c2a
--- /dev/null
+++ b/prec/impl/psb_zsparse_biconjg_mlk.F90
@@ -0,0 +1,501 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_zsparse_biconjg_mlk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_z_ainv_tools_mod
+  use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_mlk
+  !
+  ! Left-looking variant
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  complex(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:), hlist(:), bfr(:), rwlist(:)
+  complex(psb_dpk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_) :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, hlhead, li, mj, kkc, ifrst, ilst, rwhead
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_z_csc_sparse_mat) :: ac
+  complex(psb_dpk_)     :: alpha
+  character(len=20)  :: name='psb_biconjg_mlk'
+  logical, parameter :: debug=.false., test_merge=.true.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n), &
+       & hlist(n),rwlist(n),bfr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i)  = 0
+    izcr(i)  = 0
+    zval(i)  = zzero
+    hlist(i) = -1
+    rwlist(i) = -1
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = zzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = zone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = zone
+  nzw       = 1
+
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    rwhead  = i
+
+    hlhead = -1
+
+    kkc = 0
+    ilst  = ac%icp(i)-1
+    ifrst = ac%icp(i)
+    do j = ac%icp(i+1)-1, ac%icp(i), -1
+      if (ac%ia(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (.true..or.debug) then
+!!$      write(0,*) 'Outer Before insertion  : ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+    if (kkc > 0) then
+!!$      write(0,*) i,' Outer Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      mj      = hlhead
+      if (mj > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outer
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+
+      if (debug) write(0,*) 'update loop, using row: ',j,i,mj
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+!!$      write(0,*) 'At step ',i,j,' p(i) ',p(i),alpha
+!!$      write(0,*) '   Current list is : ',hlhead
+!!$      call printlist(hlhead,hlist)
+!!$
+
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=z%icp(j)
+        ilst=z%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,z%ia(ifrst:ilst))
+!!$        write(0,*) 'At step ',i,j,' range ',z%icp(j), z%icp(j+1)-1, &
+!!$             & ' vals ',z%ia(z%icp(j):z%icp(j+1)-1)
+
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+
+          if (izkr(kr) == 0) then
+!!$            write(0,*) ' main inner Inserting ',kr
+!!$            call hlmerge(rwhead,rwlist,(/kr/))
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = ac%icp(kr)-1
+            ifrst = ac%icp(kr+1)
+            kkc = 0
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              if ((ac%ia(kc) < i).and.(ac%ia(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',ac%ia(ifrst:ilst)
+            end if
+            if (ilst >= ifrst) then
+!!$              write(0,*) j,i,' Inner inserting ',ac%ia(ifrst:ilst)
+              call hlmerge(hlhead,hlist,ac%ia(ifrst:ilst))
+            end if
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    rwhead  = i
+    hlhead = -1
+
+    kkc = 0
+    ilst  = a%irp(i)-1
+    ifrst = a%irp(i)
+    do j = a%irp(i+1)-1, a%irp(i), -1
+      if (a%ja(j) < i) then
+        ilst = j
+        exit
+      end if
+    end do
+    kkc = ilst-ifrst+1
+
+    if (debug) then
+      write(0,*) 'Outer Before insertion: '
+      call printlist(hlhead,hlist)
+      write(0,*) 'Outer Inserting : ',kkc,':',a%ja(ifrst:ilst)
+    end if
+    if (kkc > 0 ) then
+      !call hlmerge(hlhead,hlist,bfr(1:kkc))
+      call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+    end if
+    if (debug) then
+      write(0,*) 'Outer After insertion: ',hlhead
+      call printlist(hlhead,hlist)
+    end if
+
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='init_lists')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      mj      = hlhead
+      if (hlhead > 0)  then
+        hlhead = hlist(mj)
+        hlist(mj) = -1
+      end if
+      j = mj
+      if (j < 0)  exit outerw
+
+      izcr(j) = 0
+      if (j>=i) cycle outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = ac%icp(j)
+      ip2 = ac%icp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (ac%ia(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-q(i)/q(j))
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        ifrst=w%icp(j)
+        ilst=w%icp(j+1)-1
+        call hlmerge(rwhead,rwlist,w%ia(ifrst:ilst))
+
+        do k=w%icp(j), w%icp(j+1)-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            ilst  = a%irp(kr)-1
+            ifrst = a%irp(kr+1)
+            kkc = 0
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              if ((a%ja(kc) < i).and.(a%ja(kc) >j)) then
+                ifrst = min(ifrst,kc )
+                ilst  = max(ilst,kc)
+              end if
+            end do
+            kkc = ilst-ifrst+1
+            if (debug) then
+              write(0,*) 'Inner Before insertion: '
+              call printlist(hlhead,hlist)
+              write(0,*) 'Inner Inserting : ',kkc,':',a%ja(ifrst:ilst)
+            end if
+
+            call hlmerge(hlhead,hlist,a%ja(ifrst:ilst))
+
+            if (debug) then
+              write(0,*) 'Inner After insertion: ',hlhead
+              call printlist(hlhead,hlist)
+            end if
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outerw
+    ip1 = ac%icp(i)
+    ip2 = ac%icp(i+1) - 1
+    do
+      if (ip2 < ip1 ) exit
+      if (ac%ia(ip2) <= n) exit
+      ip2 = ip2 -1
+    end do
+    nzra = max(0,ip2 - ip1 + 1)
+    q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+
+!!$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,rwhead,rwlist,izkr,info)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrz, w%ia,  info)
+    call psb_ensure_size(nzw+nzrz, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrz
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrz
+    nzw        = nzw + nzrz
+
+  end do
+
+contains
+
+  subroutine hlmerge(head,listv,vals)
+    integer(psb_ipk_), intent(inout) :: head, listv(:)
+    integer(psb_ipk_), intent(in)    :: vals(:)
+    integer(psb_ipk_) :: i,j,k, lh, lv, nv, vv, flh, ph
+
+    nv  = size(vals)
+    lh  = head
+    flh = -1
+    lv  = 1
+    if ((head < 0).and.(nv > 0)) then
+      ! Adjust head if empty
+      head = vals(lv)
+      lv = lv + 1
+    else if ((head > 0) .and. (nv >0)) then
+      ! Adjust head if first item less than it
+      if (head > vals(lv)) then
+        listv(vals(lv)) = head
+        head = vals(lv)
+        lv   = lv + 1
+      end if
+    end if
+
+    lh = head
+    ph = lh
+    do while ((lh > 0) .and. (lv <= nv))
+      if (lh == vals(lv)) then
+        lv = lv + 1
+      else if (lh > vals(lv)) then
+        listv(vals(lv)) = lh
+        listv(ph) = vals(lv)
+        lh = vals(lv)
+        lv = lv + 1
+      else
+        ph = lh
+        lh = listv(lh)
+      end if
+    end do
+    lh = ph
+    do while (lv <= nv)
+      listv(lh) = vals(lv)
+      lh = listv(lh)
+      lv = lv + 1
+    end do
+  end subroutine hlmerge
+
+
+  subroutine printlist(head,listv)
+    integer(psb_ipk_), intent(in) :: head, listv(:)
+    integer(psb_ipk_) :: li
+
+    li = head
+    do while (li > 0)
+      write(0,*) 'Item: ', li
+      li = listv(li)
+    end do
+  end subroutine printlist
+
+end subroutine psb_zsparse_biconjg_mlk
diff --git a/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90 b/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90
new file mode 100644
index 00000000..541a755c
--- /dev/null
+++ b/prec/impl/psb_zsparse_biconjg_s_ft_llk.F90
@@ -0,0 +1,414 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_zsparse_biconjg_s_ft_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_z_ainv_tools_mod
+  use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_s_ft_llk
+
+  !
+  ! Left-looking variant, stabilized i.e. product by A is applied
+  ! to compute the diagonal elements.
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  complex(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:),iww(:)
+  complex(psb_dpk_), allocatable    :: zval(:),val(:), q(:),  ww(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj, nzww,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw, nzrw
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_z_csc_sparse_mat) :: ac
+  complex(psb_dpk_)     :: alpha, tmpq,tmpq2
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),q(n),iww(n),ww(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! Init pointers to:
+  !  ljr(i): last occupied column index within row  I
+  !  izcr(i): first occupied row index within column I
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = zzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = zzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  q(1) = p(1)
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1)  = 1
+  z%icp(2)  = 2
+  z%ia(1)  = 1
+  z%val(1) = zone
+  nzz       = 1
+
+  call w%allocate(n,n,n*nzrmax)
+  w%icp(1)  = 1
+  w%icp(2)  = 2
+  w%ia(1)  = 1
+  w%val(1) = zone
+  nzw       = 1
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j)<i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+      izcr(j) = 0
+      if (j>=i) exit outer
+      if (debug) write(0,*) 'update loop, using row: ',j
+      ip1 = w%icp(j)
+      ip2 = w%icp(j+1) - 1
+      nzra = max(0,ip2 - ip1 + 1)
+      nzww = 0
+      call psb_z_spvspm(zone,a,nzra,w%ia(ip1:ip2),w%val(ip1:ip2),&
+           & zzero,nzww,iww,ww,info)
+
+      p(i) =  psb_spge_dot(nzww,iww,ww,zval)
+
+      ipz1 = z%icp(j)
+      ipz2 = z%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-p(i)/p(j))
+!!$        write(0,*) ' p(i)/p(j) ',i,j,alpha,p(i),p(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outer
+
+    if (.false.) then
+      ! We can't do the proper thing until we have bot Z_i and W_i.
+      call a%csget(i,i,nzra,ia,ja,val,info)
+      call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+      p(i) = psb_spge_dot(nzra,ja,val,zval)
+      if (abs(p(i)) < d_epstol) &
+           & p(i) = 1.d-3
+    end if
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+
+
+
+
+    ! WVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+!!$      write(0,*) 'Inserting into heap ',i
+    if (info == psb_success_) call rheap%init(info)
+    do j = a%irp(i), a%irp(i+1)-1
+      if (a%ja(j)<i) then
+        if (info == psb_success_) call rheap%insert(a%ja(j),info)
+        izcr(a%ja(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outerw: do
+      innerw: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outerw ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit innerw
+        end if
+      end do innerw
+      izcr(j) = 0
+      if (j>=i) exit outerw
+      if (debug) write(0,*) 'update loop, using row: ',j
+      if (.false.) then
+        ip1 = ac%icp(j)
+        ip2 = ac%icp(j+1) - 1
+        do
+          if (ip2 < ip1 ) exit
+          if (ac%ia(ip2) <= n) exit
+          ip2 = ip2 -1
+        end do
+        nzra = max(0,ip2 - ip1 + 1)
+        q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+        ! !$          write(psb_err_unit,*) j,i,p(i)
+      else
+        ip1 = z%icp(j)
+        ip2 = z%icp(j+1) - 1
+        nzra = max(0,ip2 - ip1 + 1)
+        nzww = 0
+        call psb_z_spmspv(zone,ac,nzra,z%ia(ip1:ip2),z%val(ip1:ip2),&
+             & zzero,nzww,iww,ww,info)
+
+        q(i) =  psb_spge_dot(nzww,iww,ww,zval)
+      end if
+
+      ipz1 = w%icp(j)
+      ipz2 = w%icp(j+1)
+      nzrz = ipz2-ipz1
+      alpha = (-q(i)/q(j))
+!!$        write(0,*) ' q(i)/q(j) ',i,j,alpha,q(i),q(j)
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+
+        do k=ipz1, ipz2-1
+          kr     = w%ia(k)
+          zval(kr) = zval(kr) + alpha*w%val(k)
+          if (izkr(kr) == 0) then
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = a%irp(kr), a%irp(kr+1)-1
+              nextj=a%ja(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  a%ja(a%irp(kr):a%irp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+!!$        izcr(j) = 0
+    end do outerw
+
+!!$      ip1 = ac%icp(i)
+!!$      ip2 = ac%icp(i+1) - 1
+!!$      do
+!!$        if (ip2 < ip1 ) exit
+!!$        if (ac%ia(ip2) <= n) exit
+!!$        ip2 = ip2 -1
+!!$      end do
+!!$      nzra = max(0,ip2 - ip1 + 1)
+!!$
+!!$      q(i) = psb_spge_dot(nzra,ac%ia(ip1:ip2),ac%val(ip1:ip2),zval)
+!!$      if (abs(q(i)) < d_epstol) &
+!!$           & q(i) = 1.d-3
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrw,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzw+nzrw, w%ia,  info)
+    call psb_ensure_size(nzw+nzrw, w%val, info)
+    ipz1 = w%icp(i)
+    do j=1, nzrw
+      w%ia(ipz1  + j -1) = ia(j)
+      w%val(ipz1 + j -1) = val(j)
+    end do
+    w%icp(i+1) = ipz1 + nzrw
+    nzw        = nzw + nzrw
+
+!!$      !
+!!$      ! Ok, now compute w_i^T A z_i
+!!$      !
+    nzww = 0
+    nzrz = z%icp(i+1)-z%icp(i)
+    ipz1 = z%icp(i)
+    call psb_z_spmspv(zone,ac,&
+         & nzrz,z%ia(ipz1:ipz1+nzrz-1),z%val(ipz1:ipz1+nzrz-1),&
+         & zzero,nzww,iww,ww,info)
+    tmpq  = psb_spdot_srtd(nzww,iww,ww,nzrw,ia,val)
+    q(i) = tmpq
+!    if (tmpq <0) then
+!!$        write(0,*) 'On negative dot prod at ',i
+!!$        write(0,*) 'On negative dot prod a ',ia(1:nzrw),val(1:nzrw)
+!!$        write(0,*) 'On negative dot prod w ',iww(1:nzww),ww(1:nzww)
+!!$        ip1 = ac%icp(i)
+!!$        ip2 = ac%icp(i+1) - 1
+!!$        do
+!!$          if (ip2 < ip1 ) exit
+!!$          if (ac%ia(ip2) <= n) exit
+!!$          ip2 = ip2 -1
+!!$        end do
+!!$        nzra = max(0,ip2 - ip1 + 1)
+!!$        write(0,*) 'On negative dot prod a ',ac%ia(ip1:ip2),ac%val(ip1:ip2)
+!
+!    end if
+
+    if (abs(q(i)) < d_epstol) &
+         & q(i) = 1.d-3
+    p(i) = q(i)
+
+  end do
+
+end subroutine psb_zsparse_biconjg_s_ft_llk
diff --git a/prec/impl/psb_zsparse_biconjg_s_llk.F90 b/prec/impl/psb_zsparse_biconjg_s_llk.F90
new file mode 100644
index 00000000..e4d6624a
--- /dev/null
+++ b/prec/impl/psb_zsparse_biconjg_s_llk.F90
@@ -0,0 +1,248 @@
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+subroutine psb_zsparse_biconjg_s_llk(n,a,p,z,w,nzrmax,sp_thresh,info)
+  use psb_base_mod
+  use psb_z_ainv_tools_mod
+  use psb_z_biconjg_mod, psb_protect_name => psb_zsparse_biconjg_s_llk
+
+  !
+  ! Left-looking variant SYMMETRIC/HERMITIAN A. You have been warned!
+  !
+  !
+  implicit none
+  integer(psb_ipk_), intent(in)             :: n
+  type(psb_z_csr_sparse_mat), intent(in)    :: a
+  type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+  integer(psb_ipk_), intent(in)             :: nzrmax
+  real(psb_dpk_), intent(in)                :: sp_thresh
+  complex(psb_dpk_), intent(out)               :: p(:)
+  integer(psb_ipk_), intent(out)            :: info
+
+  ! Locals
+  integer(psb_ipk_), allocatable :: ia(:), ja(:), izkr(:), izcr(:)
+  complex(psb_dpk_), allocatable    :: zval(:),val(:), q(:)
+  integer(psb_ipk_)  :: i,j,k, kc, kr, err_act, nz, nzra, nzrz, ipzi,ipzj,&
+       & nzzi,nzzj, nzz, ip1, ip2, ipza,ipzz, ipzn, nzzn, ipz1, ipz2,&
+       &  ipj, lastj, nextj, nzw,kk
+  type(psb_i_heap)   :: heap, rheap
+  type(psb_z_csc_sparse_mat) :: ac
+  complex(psb_dpk_)     :: alpha, zvalmax
+  character(len=20)  :: name='psb_orth_llk'
+  logical, parameter :: debug=.false.
+
+  allocate(zval(n),ia(n),val(n),izkr(n),izcr(n),stat=info)
+  if (info == psb_success_) call ac%cp_from_fmt(a,info)
+  if (info /= psb_success_) then
+    call psb_errpush(psb_err_from_subroutine_,name,a_err='Allocate')
+    return
+  end if
+  !
+  ! izkr(i): flag nonzeros in ZVAL. To minimize traffic into heap.
+  ! izcr(i): flag rows to be used for the dot products. Used to minimize
+  !               traffic in rheap.
+  !
+  do i=1,n
+    izkr(i) = 0
+    izcr(i) = 0
+    zval(i)  = zzero
+  end do
+
+  ! Init z_1=e_1 and p_1=a_11
+  p(1) = zzero
+  i   = 1
+  nz  = a%irp(i+1) - a%irp(i)
+  do j=1,nz
+    if (a%ja(j) == 1) then
+      p(1) = a%val(j)
+      exit
+    end if
+  end do
+  if (abs(p(1)) < d_epstol) &
+       & p(1) = 1.d-3
+
+  !
+  !
+  call z%allocate(n,n,n*nzrmax)
+
+  z%icp(1) = 1
+  z%icp(2) = 2
+  z%ia(1)  = 1
+  z%val(1) = zone
+  nzz      = 1
+  zvalmax  = zone
+
+  do i = 2, n
+    if (debug) write(0,*) 'Main loop iteration ',i,n
+
+    !
+    ! Update loop on Z.
+    ! Must be separated from update loop of W because of
+    ! the conflict on J that would result.
+    !
+
+    ! ZVAL = e_i
+    ! !$        do j=1, i-1
+    ! !$          zval(j) = zzero
+    ! !$        end do
+    zval(i)  = zone
+    izkr(i) = 1
+    call heap%init(info)
+    if (info == psb_success_) call heap%insert(i,info)
+
+    if (info == psb_success_) call rheap%init(info)
+    do j = ac%icp(i), ac%icp(i+1)-1
+      if (ac%ia(j) < i) then
+        if (info == psb_success_) call rheap%insert(ac%ia(j),info)
+        izcr(ac%ia(j)) = 1
+      end if
+    end do
+    if (info /= psb_success_) then
+      info=psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_init_heap')
+      return
+    end if
+
+    ! Update loop
+    ! The idea is to keep track of the indices of the nonzeros in zval,
+    ! so as to only do the dot products on the rows which have nonzeros
+    ! in their positions; to do this we keep an extra
+    ! copy of A in CSC, and the row indices to be considered are in rheap.
+    lastj = -1
+    outer: do
+      inner: do
+        call rheap%get_first(j,info)
+        if (debug) write(0,*) 'from get_first: ',j,info
+        if (info == -1) exit outer ! Empty heap
+        if (j > lastj) then
+          lastj = j
+          exit inner
+        end if
+      end do inner
+
+      izcr(j) = 0
+      if (j>=i) cycle outer
+      if (debug) write(0,*) 'update loop, using row: ',j,i
+      ip1 = a%irp(j)
+      ip2 = a%irp(j+1) - 1
+      do
+        if (ip2 < ip1 ) exit
+        if (a%ja(ip2) <= n) exit
+        ip2 = ip2 -1
+      end do
+      nzra = max(0,ip2 - ip1 + 1)
+      p(i) = psb_spge_dot(nzra,a%ja(ip1:ip2),a%val(ip1:ip2),zval)
+      ! !$          write(psb_err_unit,*) j,i,p(i)
+
+      alpha = (-p(i)/p(j))
+
+      if (.false..or.(abs(alpha) > sp_thresh)) then
+        do k=z%icp(j), z%icp(j+1)-1
+          kr     = z%ia(k)
+          zval(kr) = zval(kr) + alpha*z%val(k)
+!!$          if (abs(zval(kr)) > 1e16) then
+!!$            write(0,*) i,j,p(i),p(j),alpha,z%val(k),alpha*z%val(k),kr,zval(kr)
+!!$          end if
+          if (izkr(kr) == 0) then
+
+            call heap%insert(kr,info)
+            if (info /= psb_success_) exit
+            izkr(kr) = 1
+            ! We have just added a new nonzero in KR. Thus, we will
+            ! need to explicitly compute the dot products on all
+            ! rows j<k<i with nonzeros in column kr; we keep  them in
+            ! a heap.
+            !
+            do kc = ac%icp(kr), ac%icp(kr+1)-1
+              nextj=ac%ia(kc)
+              if ((info == psb_success_).and.(izcr(nextj)==0)&
+                   & .and.(nextj>j).and.(nextj<i)) then
+                call rheap%insert(nextj,info)
+                izcr(nextj) = 1
+              end if
+            end do
+            if (debug) write(0,*) 'update loop, adding indices: ',&
+                 &  ac%ia(ac%icp(kr):ac%icp(kr+1)-1)
+
+          end if
+          if (info /= psb_success_) exit
+        end do
+        if (info /= psb_success_) then
+          info=psb_err_from_subroutine_
+          call psb_errpush(info,name,a_err='psb_insert_heap')
+          return
+        end if
+      end if
+    end do outer
+    call a%csget(i,i,nzra,ia,ja,val,info)
+    call rwclip(nzra,ia,ja,val,ione,n,ione,n)
+    p(i) = psb_spge_dot(nzra,ja,val,zval)
+!!$    if ((1761<=i).and.(i<=1780)) then
+!!$      write(0,*) 'Dot product terms at ',i,nzra
+!!$      do kk=1,nzra
+!!$        write(0,*) kk,ja(kk),val(kk),zval(ja(kk))
+!!$      end do
+!!$    end if
+
+    if (abs(p(i)) < d_epstol) &
+         & p(i) = 1.d-3
+
+    ! !$      write(0,*) 'Dropping from a column with: ',i,psb_howmany_heap(heap),sp_thresh
+
+    !
+    ! Sparsify current ZVAL and put into ZMAT
+    !
+    call sparsify(i,nzrmax,sp_thresh,n,zval,nzrz,ia,val,info,iheap=heap,ikr=izkr)
+    if (info /= psb_success_) then
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='sparsify')
+      return
+    end if
+    call psb_ensure_size(nzz+nzrz, z%ia,  info)
+    call psb_ensure_size(nzz+nzrz, z%val, info)
+    ipz1 = z%icp(i)
+    do j=1, nzrz
+      z%ia(ipz1  + j -1) = ia(j)
+      z%val(ipz1 + j -1) = val(j)
+!!$      zvalmax = max(zvalmax,abs(val(j)))
+    end do
+    z%icp(i+1) = ipz1 + nzrz
+    nzz        = nzz + nzrz
+!!$    write(0,*) ' Dot: ',i,p(i),zvalmax
+
+  end do
+
+  call z%cp_to_fmt(w,info)
+
+end subroutine psb_zsparse_biconjg_s_llk
diff --git a/prec/psb_ainv_tools_mod.f90 b/prec/psb_ainv_tools_mod.f90
new file mode 100644
index 00000000..561f0f79
--- /dev/null
+++ b/prec/psb_ainv_tools_mod.f90
@@ -0,0 +1,6 @@
+module psb_ainv_tools_mod
+  use psb_c_ainv_tools_mod
+  use psb_d_ainv_tools_mod
+  use psb_s_ainv_tools_mod
+  use psb_z_ainv_tools_mod
+end module psb_ainv_tools_mod
diff --git a/prec/psb_biconjg_mod.F90 b/prec/psb_biconjg_mod.F90
new file mode 100644
index 00000000..8d867c76
--- /dev/null
+++ b/prec/psb_biconjg_mod.F90
@@ -0,0 +1,6 @@
+module psb_biconjg_mod
+  use psb_c_biconjg_mod
+  use psb_d_biconjg_mod
+  use psb_s_biconjg_mod
+  use psb_z_biconjg_mod
+end module psb_biconjg_mod
diff --git a/prec/psb_c_ainv_fact_mod.f90 b/prec/psb_c_ainv_fact_mod.f90
new file mode 100644
index 00000000..200ff561
--- /dev/null
+++ b/prec/psb_c_ainv_fact_mod.f90
@@ -0,0 +1,98 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from MLD2P4, original copyright below.
+!
+!
+!                             MLD2P4  version 2.2
+!    MultiLevel Domain Decomposition Parallel Preconditioners Package
+!               based on PSBLAS (Parallel Sparse BLAS version 3.5)
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the MLD2P4 group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+! File: psb_c_ainv_fact_mod.f90
+!
+! Module: psb_c_ainv_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_c_ainv_solver, but not visible to the end user.
+!
+!
+module psb_c_ainv_fact_mod
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_ainv_fact
+    subroutine psb_c_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+      import psb_cspmat_type, psb_spk_, psb_ipk_, psb_desc_type
+      type(psb_cspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)                 :: fillin,alg
+      real(psb_spk_), intent(in)                  :: thresh
+      type(psb_cspmat_type), intent(inout)        :: wmat, zmat
+      complex(psb_spk_), allocatable                  :: d(:)
+      Type(psb_desc_type), Intent(in)               :: desc
+      integer(psb_ipk_), intent(out)                :: info
+      type(psb_cspmat_type), intent(in), optional :: blck
+      integer(psb_ipk_), intent(in), optional       :: iscale
+    end subroutine psb_c_ainv_bld
+  end interface
+
+end module psb_c_ainv_fact_mod
diff --git a/prec/psb_c_ainv_tools_mod.f90 b/prec/psb_c_ainv_tools_mod.f90
new file mode 100644
index 00000000..a68d6d69
--- /dev/null
+++ b/prec/psb_c_ainv_tools_mod.f90
@@ -0,0 +1,132 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!
+!
+module psb_c_ainv_tools_mod
+
+  interface sp_drop
+    subroutine psb_c_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+      implicit none
+      real(psb_spk_), intent(in)      :: sp_thresh
+      integer(psb_ipk_), intent(in)     :: idiag, nzrmax
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: iz(:)
+      complex(psb_spk_), intent(inout)    :: valz(:)
+      integer(psb_ipk_), intent(out)    :: info
+    end subroutine psb_c_sp_drop
+  end interface
+
+  interface rwclip
+    subroutine psb_c_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+
+      implicit none
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+      complex(psb_spk_), intent(inout)    :: val(*)
+      integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+    end subroutine psb_c_rwclip
+  end interface
+
+  interface sparsify
+    subroutine psb_c_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, &
+         & istart,iheap,ikr)
+      use psb_base_mod, only : psb_spk_, psb_ipk_, psb_i_heap
+      implicit none
+
+      real(psb_spk_), intent(in)            :: sp_thresh
+      integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+      complex(psb_spk_), intent(inout)          :: zw(:)
+      integer(psb_ipk_), intent(out)          :: nz
+      integer(psb_ipk_), intent(out)          :: iz(:)
+      complex(psb_spk_), intent(out)            :: valz(:)
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: istart
+      type(psb_i_heap), optional              :: iheap
+      integer(psb_ipk_), optional             :: ikr(:)
+    end subroutine psb_c_sparsify
+    subroutine psb_c_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+      implicit none
+
+      real(psb_spk_), intent(in)     :: sp_thresh
+      integer(psb_ipk_), intent(in)    :: idiag, n, nzrmax
+      complex(psb_spk_), intent(inout)   :: zw(:)
+      integer(psb_ipk_), intent(out)   :: nz
+      integer(psb_ipk_), intent(out)   :: iz(:)
+      complex(psb_spk_), intent(out)      :: valz(:)
+      integer(psb_ipk_), intent(out)   :: info
+      integer(psb_ipk_), intent(inout) :: lhead, listv(:)
+      integer(psb_ipk_)                :: ikr(:)
+    end subroutine psb_c_sparsify_list
+
+  end interface
+
+end module psb_c_ainv_tools_mod
diff --git a/prec/psb_c_biconjg_mod.F90 b/prec/psb_c_biconjg_mod.F90
new file mode 100644
index 00000000..6af30b4f
--- /dev/null
+++ b/prec/psb_c_biconjg_mod.F90
@@ -0,0 +1,367 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+module psb_c_biconjg_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_sparse_biconjg
+    module procedure psb_csparse_biconjg
+  end interface
+
+  abstract interface
+    subroutine psb_csparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info)
+      use psb_base_mod, only : psb_c_csr_sparse_mat, psb_c_csc_sparse_mat, &
+           &  psb_spk_, psb_ipk_
+      !
+      implicit none
+      integer(psb_ipk_), intent(in)               :: n
+      type(psb_c_csr_sparse_mat), intent(in)    :: a
+      type(psb_c_csc_sparse_mat), intent(inout) :: z,w
+      integer(psb_ipk_), intent(in)               :: nzrmax
+      real(psb_spk_), intent(in)                :: sp_thresh
+      complex(psb_spk_), intent(out)                :: p(:)
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_csparse_biconjg_variant
+  end interface
+
+
+  procedure(psb_csparse_biconjg_variant) :: psb_csparse_biconjg_llk,&
+       & psb_csparse_biconjg_s_llk,  psb_csparse_biconjg_s_ft_llk,&
+       &  psb_csparse_biconjg_llk_noth, psb_csparse_biconjg_mlk
+
+#if defined(HAVE_TUMA_SAINV)
+  procedure(psb_csparse_biconjg_variant)  ::  psb_csparse_tuma_sainv,&
+       & psb_csparse_tuma_lainv
+#endif
+
+contains
+
+  subroutine psb_csparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info)
+    use psb_base_mod
+    use psb_prec_const_mod
+    integer(psb_ipk_), intent(in)            :: alg,n
+    type(psb_c_csr_sparse_mat), intent(in) :: acsr
+    type(psb_cspmat_type), intent(out)     :: z, w
+    integer(psb_ipk_), intent(in)            :: nzrmax
+    real(psb_spk_), intent(in)             :: sp_thresh
+    complex(psb_spk_), intent(out)             :: p(:)
+    integer(psb_ipk_), intent(out)           :: info
+
+    type(psb_c_csc_sparse_mat)             :: zcsc,wcsc
+    integer(psb_ipk_) :: i,j,k,nrm
+    integer(psb_ipk_) :: err_act
+    character(len=20)  :: name='psb_sparse_biconjg'
+    integer(psb_ipk_), parameter :: variant=1
+
+
+    info = psb_success_
+    call psb_erractionsave(err_act)
+    if (psb_errstatus_fatal()) then
+      info = psb_err_internal_error_; goto 9999
+    end if
+
+    if (size(p)<n) then
+      write(psb_err_unit,*) 'Size of P wrong'
+      info = psb_err_internal_error_
+      call psb_errpush(psb_err_internal_error_,name,a_err='Allocate')
+      goto 9999
+    end if
+
+    if (nzrmax <= 0) write(psb_out_unit,'("Out nzrmax = ",i0)') nzrmax
+
+    select case(alg)
+    case (psb_ainv_llk_)
+      call psb_csparse_biconjg_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_llk_)
+      call psb_csparse_biconjg_s_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_mlk_)
+      call psb_csparse_biconjg_mlk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_ft_llk_)
+      call psb_csparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_llk_noth_)
+      call psb_csparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#if defined(HAVE_TUMA_SAINV)
+!    case (psb_ainv_s_tuma_)
+!      call psb_csparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!    case (psb_ainv_l_tuma_)
+!      call psb_csparse_tuma_lainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#endif
+    case default
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='Invalid alg')
+      goto 9999
+    end select
+
+    if (info /= 0) then
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='sparse_orth')
+      goto 9999
+    end if
+
+    call z%mv_from(zcsc)
+    call z%cscnv(info,type='CSR')
+    call w%mv_from(wcsc)
+    call w%transp()
+    call w%cscnv(info,type='CSR')
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_csparse_biconjg
+
+
+  subroutine psb_c_spmspv(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = A x  sparse-sparse mode, A in CSC
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    complex(psb_spk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    complex(psb_spk_), intent(inout)           :: vy(:)
+    type(psb_c_csc_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_) :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable        :: iv(:)
+    complex(psb_spk_), allocatable           :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spmspv ',alpha,beta
+    if (beta == -cone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == czero) then
+      do i=1, ny
+        vy(i) = czero
+      end do
+    else if (beta /= cone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == czero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%icp(j)
+      na = a%icp(j+1) - a%icp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ia(k:k+na-1), a%val(k:k+na-1),&
+           & cone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_c_spmspv
+
+
+  subroutine psb_c_spvspm(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = x A  sparse-sparse mode, A in CSR
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    complex(psb_spk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    complex(psb_spk_), intent(inout)           :: vy(:)
+    type(psb_c_csr_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_)              :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable :: iv(:)
+    complex(psb_spk_), allocatable   :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spvspm ',alpha,beta
+    if (beta == -cone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == czero) then
+      do i=1, ny
+        vy(i) = czero
+      end do
+    else if (beta /= cone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == czero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%irp(j)
+      na = a%irp(j+1) - a%irp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ja(k:k+na-1), a%val(k:k+na-1),&
+           & cone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_c_spvspm
+
+end module psb_c_biconjg_mod
diff --git a/prec/psb_c_bjacprec.f90 b/prec/psb_c_bjacprec.f90
index 6526ac31..1a36109d 100644
--- a/prec/psb_c_bjacprec.f90
+++ b/prec/psb_c_bjacprec.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,15 +27,19 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 module psb_c_bjacprec
 
   use psb_c_base_prec_mod
   use psb_c_ilu_fact_mod
-  
+  use psb_c_ainv_fact_mod
+  use psb_c_invk_fact_mod
+  use psb_c_invt_fact_mod
+
   type, extends(psb_c_base_prec_type)   :: psb_c_bjac_prec_type
     integer(psb_ipk_), allocatable      :: iprcparm(:)
+    real(psb_spk_), allocatable       :: rprcparm(:)
     type(psb_cspmat_type), allocatable  :: av(:)
     type(psb_c_vect_type), allocatable  :: dv, wrk(:)
   contains
@@ -44,6 +48,7 @@ module psb_c_bjacprec
     procedure, pass(prec) :: precbld   => psb_c_bjac_precbld
     procedure, pass(prec) :: precinit  => psb_c_bjac_precinit
     procedure, pass(prec) :: precseti  => psb_c_bjac_precseti
+    procedure, pass(prec) :: precsetr  => psb_c_bjac_precsetr
     procedure, pass(prec) :: precdescr => psb_c_bjac_precdescr
     procedure, pass(prec) :: dump      => psb_c_bjac_dump
     procedure, pass(prec) :: clone     => psb_c_bjac_clone
@@ -55,15 +60,14 @@ module psb_c_bjacprec
     procedure, pass(prec) :: is_allocated_wrk => psb_c_bjac_is_allocated_wrk
   end type psb_c_bjac_prec_type
 
-  private :: psb_c_bjac_sizeof, psb_c_bjac_precdescr, psb_c_bjac_get_nzeros
- 
-
   character(len=15), parameter, private :: &
-       &  fact_names(0:2)=(/'None          ','ILU(n)        ',&
-       &  'ILU(eps)      '/)
+       &  fact_names(0:6)=(/'None          ','ILU(0)        ',&
+       &  'ILU(n)        ','ILU(eps)      ','AINV(eps)     ',&
+       &  'INVT          ','INVK          '/)
 
-  
-  interface  
+  private :: psb_c_bjac_sizeof, psb_c_bjac_precdescr, psb_c_bjac_get_nzeros
+
+  interface
     subroutine psb_c_bjac_dump(prec,info,prefix,head)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
       class(psb_c_bjac_prec_type), intent(in) :: prec
@@ -72,7 +76,7 @@ module psb_c_bjacprec
     end subroutine psb_c_bjac_dump
   end interface
 
-  interface  
+  interface
     subroutine psb_c_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -89,7 +93,7 @@ module psb_c_bjacprec
   interface
     subroutine psb_c_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
-      
+
       type(psb_desc_type),intent(in)    :: desc_data
       class(psb_c_bjac_prec_type), intent(inout)  :: prec
       complex(psb_spk_),intent(in)         :: alpha,beta
@@ -100,7 +104,7 @@ module psb_c_bjacprec
       complex(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_c_bjac_apply
   end interface
-  
+
   interface
     subroutine psb_c_bjac_precinit(prec,info)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
@@ -108,7 +112,7 @@ module psb_c_bjacprec
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_c_bjac_precinit
   end interface
-  
+
   interface
     subroutine psb_c_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_, &
@@ -123,24 +127,34 @@ module psb_c_bjacprec
       class(psb_i_base_vect_type), intent(in), optional  :: imold
     end subroutine psb_c_bjac_precbld
   end interface
-  
+
   interface
     subroutine psb_c_bjac_precseti(prec,what,val,info)
       import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
       class(psb_c_bjac_prec_type),intent(inout) :: prec
-      integer(psb_ipk_), intent(in)                      :: what 
-      integer(psb_ipk_), intent(in)                      :: val 
+      integer(psb_ipk_), intent(in)                      :: what
+      integer(psb_ipk_), intent(in)                      :: val
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_c_bjac_precseti
   end interface
-  
+
+  interface
+    subroutine psb_c_bjac_precsetr(prec,what,val,info)
+      import :: psb_ipk_, psb_desc_type, psb_c_bjac_prec_type, psb_c_vect_type, psb_spk_
+      class(psb_c_bjac_prec_type),intent(inout) :: prec
+      integer(psb_ipk_), intent(in)                      :: what
+      real(psb_spk_), intent(in)                      :: val
+      integer(psb_ipk_), intent(out)                     :: info
+    end subroutine psb_c_bjac_precsetr
+  end interface
+
 
 contains
 
   subroutine psb_c_bjac_precdescr(prec,iout,root)
     use psb_penv_mod
     use psb_error_mod
-    implicit none 
+    implicit none
 
     class(psb_c_bjac_prec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
@@ -154,19 +168,19 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-   
-    if (present(iout)) then 
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
-    if (present(root)) then 
+    if (present(root)) then
       root_ = root
     else
       root_ = psb_root_
     end if
 
-    if (.not.allocated(prec%iprcparm)) then 
+    if (.not.allocated(prec%iprcparm)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
       goto 9999
@@ -180,26 +194,26 @@ contains
          &  write(iout_,*) trim(prec%desc_prefix()),' ',&
          & 'Block Jacobi with: ',&
          &  fact_names(prec%iprcparm(psb_f_type_))
-    
+
     call psb_erractionrestore(err_act)
     return
 
 9999 call psb_error_handler(err_act)
 
     return
-    
+
   end subroutine psb_c_bjac_precdescr
 
 
   function psb_c_bjac_sizeof(prec) result(val)
     class(psb_c_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + (2*psb_sizeof_sp) * prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%sizeof()
       val = val + prec%av(psb_u_pr_)%sizeof()
     endif
@@ -210,12 +224,12 @@ contains
 
     class(psb_c_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%get_nzeros()
       val = val + prec%av(psb_u_pr_)%get_nzeros()
     endif
@@ -236,18 +250,18 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-    if (allocated(prec%av)) then 
-      do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      do i=1,size(prec%av)
         call prec%av(i)%free()
       enddo
       deallocate(prec%av,stat=info)
     end if
 
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       call prec%dv%free(info)
       if (info == 0) deallocate(prec%dv,stat=info)
     end if
-    if (allocated(prec%iprcparm)) then 
+    if (allocated(prec%iprcparm)) then
       deallocate(prec%iprcparm,stat=info)
     end if
     call psb_erractionrestore(err_act)
@@ -283,19 +297,19 @@ contains
          & allocate(psb_c_bjac_prec_type :: precout, stat=info)
     if (info /= 0) goto 9999
     select type(pout => precout)
-    type is (psb_c_bjac_prec_type) 
+    type is (psb_c_bjac_prec_type)
       call pout%set_ctxt(prec%get_ctxt())
 
-      if (allocated(prec%av)) then 
+      if (allocated(prec%av)) then
         allocate(pout%av(size(prec%av)),stat=info)
-        do i=1,size(prec%av) 
+        do i=1,size(prec%av)
           if (info /= psb_success_) exit
           call prec%av(i)%clone(pout%av(i),info)
         enddo
         if (info /= psb_success_) goto 9999
       end if
 
-      if (allocated(prec%dv)) then 
+      if (allocated(prec%dv)) then
         allocate(pout%dv,stat=info)
         if (info == 0) call prec%dv%clone(pout%dv,info)
       end if
@@ -316,7 +330,7 @@ contains
   subroutine psb_c_bjac_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_c_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -326,11 +340,11 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act, i
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_c_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
     if (allocated(prec%wrk)) then
       if (size(prec%wrk)<2) then
@@ -343,11 +357,11 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     if (.not.allocated(prec%wrk)) then
-      if (.not.present(desc)) then 
+      if (.not.present(desc)) then
         info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?");   goto 9999
-      end if      
+      end if
       allocate(prec%wrk(2),stat=info)
       do i=1, 2
         if (info == 0)   call psb_geall(prec%wrk(i),desc,info)
@@ -357,19 +371,19 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_c_bjac_allocate_wrk
 
   subroutine psb_c_bjac_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_c_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -378,14 +392,14 @@ contains
     integer(psb_ipk_) :: err_act
     integer(psb_ipk_) :: i
     character(len=20) :: name
-    
+
     info=psb_success_
     name = 'psb_c_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    info = psb_success_ 
+    info = psb_success_
     if (allocated(prec%wrk)) then
       do i=1,size(prec%wrk)
         if (info == 0) call prec%wrk(i)%free(info)
@@ -395,29 +409,29 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_c_bjac_free_wrk
 
   function psb_c_bjac_is_allocated_wrk(prec) result(res)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_c_bjac_prec_type), intent(in) :: prec
     logical :: res
 
-    ! In the base version we can say yes, because 
+    ! In the base version we can say yes, because
     ! there is nothing to allocate
 
     res = allocated(prec%wrk)
-    
+
   end function psb_c_bjac_is_allocated_wrk
 
-  
+
 end module psb_c_bjacprec
diff --git a/prec/psb_c_invk_fact_mod.f90 b/prec/psb_c_invk_fact_mod.f90
new file mode 100644
index 00000000..620a8adf
--- /dev/null
+++ b/prec/psb_c_invk_fact_mod.f90
@@ -0,0 +1,165 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_c_invk_fact_mod.f90
+!
+! Module: psb_c_invk_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_c_invk_solver, but not visible to the end user.
+!
+!
+module psb_c_invk_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invk_fact
+    subroutine psb_c_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+      ! import
+      import psb_cspmat_type, psb_ipk_, psb_spk_, psb_desc_type
+      ! Arguments
+      type(psb_cspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)               :: fill1, fill2
+      type(psb_cspmat_type), intent(inout)        :: lmat, umat
+      complex(psb_spk_), allocatable                 :: d(:)
+      Type(psb_desc_type), Intent(inout)          :: desc
+      integer(psb_ipk_), intent(out)              :: info
+      type(psb_cspmat_type), intent(in), optional :: blck
+    end subroutine
+  end interface
+
+  ! Inner workings
+  interface psb_sparse_invk
+    subroutine psb_csparse_invk(n,a,z,fill_in,info,inlevs)
+      ! Import
+      import psb_ipk_, psb_cspmat_type
+      ! Arguments
+      integer(psb_ipk_), intent(in)           :: n
+      type(psb_cspmat_type), intent(in)       :: a
+      type(psb_cspmat_type), intent(inout)    :: z
+      integer(psb_ipk_), intent(in)           :: fill_in
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: inlevs(:)
+    end subroutine psb_csparse_invk
+  end interface
+
+  interface psb_invk_inv
+    subroutine psb_cinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,&
+         & nidx,idxs,info)
+
+      use psb_base_mod, only : psb_cspmat_type, psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+
+      ! Arguments
+      type(psb_i_heap), intent(inout)               :: heap
+      integer(psb_ipk_), intent(in)                 :: i, fill_in
+      integer(psb_ipk_), intent(inout)              :: nidx,info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)                 :: uia1(:),uia2(:),uplevs(:)
+      complex(psb_spk_), intent(in)                    :: uaspk(:)
+      complex(psb_spk_), intent(inout)                 :: row(:)
+
+
+    end subroutine psb_cinvk_inv
+  end interface
+
+  interface  psb_invk_copyin
+    subroutine psb_c_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,&
+         & ktrw,trw,info,sign,inlevs)
+      ! Import
+      use psb_base_mod, only : psb_c_csr_sparse_mat, psb_c_coo_sparse_mat,&
+           & psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      type(psb_c_csr_sparse_mat), intent(in)    :: a
+      type(psb_c_coo_sparse_mat), intent(inout) :: trw
+      integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+      integer(psb_ipk_), intent(inout)          :: ktrw,info
+      integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+      complex(psb_spk_), intent(inout)             :: row(:)
+      type(psb_i_heap), intent(inout)           :: heap
+      real(psb_spk_), optional, intent(in)      :: sign
+      integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+    end subroutine psb_c_invk_copyin
+  end interface
+
+  interface psb_invk_copyout
+    subroutine psb_c_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+         &  l2,uia1,uia2,uaspk,info)
+      use psb_base_mod, only : psb_cspmat_type, psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+      complex(psb_spk_), allocatable, intent(inout)    :: uaspk(:)
+      complex(psb_spk_), intent(inout)                 :: row(:)
+    end subroutine psb_c_invk_copyout
+  end interface
+
+end module
diff --git a/prec/psb_c_invt_fact_mod.f90 b/prec/psb_c_invt_fact_mod.f90
new file mode 100644
index 00000000..841c39b1
--- /dev/null
+++ b/prec/psb_c_invt_fact_mod.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_c_invt_fact_mod.f90
+!
+! Module: psb_c_invt_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_c_invt_solver, but not visible to the end user.
+!
+!
+module psb_c_invt_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invt_fact
+      subroutine psb_c_invt_bld(a,fillin,invfill,thresh,invthresh,&
+         & lmat,d,umat,desc,info,blck)
+         ! Import
+         import psb_cspmat_type, psb_spk_, psb_ipk_, psb_desc_type
+         ! Arguments
+         type(psb_cspmat_type), intent(in), target   :: a
+         integer(psb_ipk_), intent(in)               :: fillin,invfill
+         real(psb_spk_), intent(in)                :: thresh
+         real(psb_spk_), intent(in)                  :: invthresh
+         type(psb_cspmat_type), intent(inout)        :: lmat, umat
+         complex(psb_spk_), allocatable                 :: d(:)
+         Type(psb_desc_type), Intent(inout)          :: desc
+         integer(psb_ipk_), intent(out)              :: info
+         type(psb_cspmat_type), intent(in), optional :: blck
+      end subroutine psb_c_invt_bld
+  end interface
+
+  ! Interfaces for the inner workings
+
+  interface
+      subroutine psb_csparse_invt(n,a,z,nzrmax,sp_thresh,info)
+         ! Import
+         import psb_cspmat_type, psb_spk_, psb_ipk_
+         ! Arguments
+         integer(psb_ipk_), intent(in)        :: n
+         type(psb_cspmat_type), intent(in)    :: a
+         type(psb_cspmat_type), intent(inout) :: z
+         integer(psb_ipk_), intent(in)        :: nzrmax
+         real(psb_spk_), intent(in)           :: sp_thresh
+         integer(psb_ipk_), intent(out)       :: info
+      end subroutine psb_csparse_invt
+  end interface
+
+  interface
+     subroutine psb_c_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+          & irwt,ktrw,trw,info,sign)
+       ! Import
+       import psb_c_csr_sparse_mat, psb_c_coo_sparse_mat, psb_spk_, &
+         & psb_ipk_, psb_i_heap
+       ! Arguments
+       type(psb_c_csr_sparse_mat), intent(in)    :: a
+       type(psb_c_coo_sparse_mat), intent(inout) :: trw
+       integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+       integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+       integer(psb_ipk_), intent(inout)          :: irwt(:)
+       real(psb_spk_), intent(inout)          :: nrmi
+       complex(psb_spk_), intent(inout)            :: row(:)
+       type(psb_i_heap), intent(inout)           :: heap
+       real(psb_spk_), intent(in), optional    :: sign
+    end subroutine psb_c_invt_copyin
+  end interface
+
+  interface
+     subroutine psb_c_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+          & nidx,idxs,l2,ja,irp,val,info)
+      ! Import
+      import psb_spk_, psb_ipk_
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+      integer(psb_ipk_), intent(in)                 :: idxs(:)
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+      real(psb_spk_), intent(in)                    :: thres,nrmi
+      complex(psb_spk_),allocatable, intent(inout)     :: val(:)
+      complex(psb_spk_), intent(inout)                 :: row(:)
+     end subroutine psb_c_invt_copyout
+  end interface
+
+  interface psb_invt_inv
+    subroutine psb_c_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, &
+      & nidx,idxs,info)
+      ! import
+      import psb_spk_, psb_i_heap, psb_ipk_
+      ! Arguments
+      type(psb_i_heap), intent(inout)     :: heap
+      integer(psb_ipk_), intent(in)       :: i
+      integer(psb_ipk_), intent(inout)    :: nidx,info
+      integer(psb_ipk_), intent(inout)    :: irwt(:)
+      real(psb_spk_), intent(in)          :: thres,nrmi
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+      complex(psb_spk_), intent(in)          :: val(:)
+      complex(psb_spk_), intent(inout)       :: row(:)
+    end subroutine
+  end interface
+
+contains
+
+end module psb_c_invt_fact_mod
diff --git a/prec/psb_c_prec_type.f90 b/prec/psb_c_prec_type.f90
index cd5e64e4..38080cee 100644
--- a/prec/psb_c_prec_type.f90
+++ b/prec/psb_c_prec_type.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!	Module to   define PREC_DATA,           !!
 !!      structure for preconditioning.          !!
@@ -54,6 +54,10 @@ module psb_c_prec_type
     procedure, pass(prec)               :: build  => psb_cprecbld
     procedure, pass(prec)               :: init   => psb_cprecinit
     procedure, pass(prec)               :: descr  => psb_cfile_prec_descr
+    procedure, pass(prec)               :: cseti  => psb_ccprecseti
+    procedure, pass(prec)               :: csetc  => psb_ccprecsetc
+    procedure, pass(prec)               :: csetr  => psb_ccprecsetr
+    generic, public                     :: set => cseti, csetc, csetr
     procedure, pass(prec)               :: allocate_wrk => psb_c_allocate_wrk
     procedure, pass(prec)               :: free_wrk => psb_c_free_wrk
     procedure, pass(prec)               :: is_allocated_wrk => psb_c_is_allocated_wrk
@@ -102,7 +106,7 @@ module psb_c_prec_type
     module procedure psb_cprec_sizeof
   end interface
 
-  interface 
+  interface
     subroutine psb_c_apply2_vect(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_
       type(psb_desc_type),intent(in)       :: desc_data
@@ -114,8 +118,8 @@ module psb_c_prec_type
       complex(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_c_apply2_vect
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_c_apply1_vect(prec,x,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_
         type(psb_desc_type),intent(in)       :: desc_data
@@ -126,7 +130,7 @@ module psb_c_prec_type
       complex(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_c_apply1_vect
   end interface
-  
+
   interface
     subroutine psb_c_apply2v(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_
@@ -139,8 +143,8 @@ module psb_c_prec_type
       complex(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_c_apply2v
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_c_apply1v(prec,x,desc_data,info,trans)
       import :: psb_ipk_, psb_desc_type, psb_cprec_type, psb_c_vect_type, psb_spk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -150,56 +154,89 @@ module psb_c_prec_type
       character(len=1), optional        :: trans
     end subroutine psb_c_apply1v
   end interface
-  
+
+  interface
+  subroutine psb_ccprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_cprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    integer(psb_ipk_), intent(in)            :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_ccprecseti
+  subroutine psb_ccprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_cprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    real(psb_spk_), intent(in)             :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_ccprecsetr
+  subroutine psb_ccprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+    import :: psb_cprec_type, psb_cspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_cprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    character(len=*), intent(in)             :: string
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_ccprecsetc
+end interface
+
 contains
 
   subroutine psb_cfile_prec_descr(prec,iout, root)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_cprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
     integer(psb_ipk_), intent(in), optional    :: root
     integer(psb_ipk_) :: iout_,info
-    character(len=20) :: name='prec_descr' 
-    
-    if (present(iout)) then 
+    character(len=20) :: name='prec_descr'
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
     end if
     call prec%prec%descr(iout=iout,root=root)
-    
+
   end subroutine psb_cfile_prec_descr
 
   subroutine psb_c_prec_dump(prec,info,prefix,head)
-    implicit none 
+    implicit none
     type(psb_cprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(out)             :: info
     character(len=*), intent(in), optional :: prefix,head
-    !  len of prefix_ 
+    !  len of prefix_
 
     info = 0
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to dump a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%dump(info,prefix,head)
-    
-    
+
+
   end subroutine psb_c_prec_dump
 
   subroutine psb_c_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_cprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -209,33 +246,33 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_c_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_c_allocate_wrk
 
   subroutine psb_c_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_cprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -243,47 +280,47 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_c_free_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to free a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%free_wrk(info)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_c_free_wrk
 
   function psb_c_is_allocated_wrk(prec) result(res)
     implicit none
-    
+
     ! Arguments
     class(psb_cprec_type), intent(in) :: prec
     logical :: res
 
     if (.not.allocated(prec%prec)) then
       res = .false.
-    else 
+    else
       res = prec%prec%is_allocated_wrk()
     end if
-    
+
   end function psb_c_is_allocated_wrk
 
   subroutine psb_c_precfree(p,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_cprec_type), intent(inout) :: p
     integer(psb_ipk_), intent(out)                :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -303,14 +340,14 @@ contains
     return
 
 9999 call psb_error_handler(err_act)
-    
+
     return
 
   end subroutine psb_c_precfree
 
   subroutine psb_c_prec_free(prec,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_cprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)         :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -324,7 +361,7 @@ contains
 
     me=-1
 
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%free(info)
       if (info /= psb_success_) goto 9999
       deallocate(prec%prec,stat=info)
@@ -343,26 +380,26 @@ contains
     class(psb_cprec_type), intent(in) :: prec
     integer(psb_epk_) :: val
     integer(psb_ipk_) :: i
-    
+
     val = 0
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       val = val + prec%prec%sizeof()
     end if
-    
+
   end function psb_cprec_sizeof
 
   subroutine psb_c_prec_clone(prec,precout,info)
-    implicit none 
+    implicit none
     class(psb_cprec_type), intent(inout) :: prec
     class(psb_cprec_type), intent(inout) :: precout
     integer(psb_ipk_), intent(out)             :: info
 
     info = psb_success_
     call prec%free(info)
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%clone(precout%prec,info)
     end if
-    
+
   end subroutine psb_c_prec_clone
 
 end module psb_c_prec_type
diff --git a/prec/psb_d_ainv_fact_mod.f90 b/prec/psb_d_ainv_fact_mod.f90
new file mode 100644
index 00000000..8eb6fbc8
--- /dev/null
+++ b/prec/psb_d_ainv_fact_mod.f90
@@ -0,0 +1,98 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from MLD2P4, original copyright below.
+!
+!
+!                             MLD2P4  version 2.2
+!    MultiLevel Domain Decomposition Parallel Preconditioners Package
+!               based on PSBLAS (Parallel Sparse BLAS version 3.5)
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the MLD2P4 group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+! File: psb_d_ainv_fact_mod.f90
+!
+! Module: psb_d_ainv_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_d_ainv_solver, but not visible to the end user.
+!
+!
+module psb_d_ainv_fact_mod
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_ainv_fact
+    subroutine psb_d_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+      import psb_dspmat_type, psb_dpk_, psb_ipk_, psb_desc_type
+      type(psb_dspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)                 :: fillin,alg
+      real(psb_dpk_), intent(in)                  :: thresh
+      type(psb_dspmat_type), intent(inout)        :: wmat, zmat
+      real(psb_dpk_), allocatable                  :: d(:)
+      Type(psb_desc_type), Intent(in)               :: desc
+      integer(psb_ipk_), intent(out)                :: info
+      type(psb_dspmat_type), intent(in), optional :: blck
+      integer(psb_ipk_), intent(in), optional       :: iscale
+    end subroutine psb_d_ainv_bld
+  end interface
+
+end module psb_d_ainv_fact_mod
diff --git a/prec/psb_d_ainv_tools_mod.f90 b/prec/psb_d_ainv_tools_mod.f90
new file mode 100644
index 00000000..7329533b
--- /dev/null
+++ b/prec/psb_d_ainv_tools_mod.f90
@@ -0,0 +1,132 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!
+!
+module psb_d_ainv_tools_mod
+
+  interface sp_drop
+    subroutine psb_d_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+      implicit none
+      real(psb_dpk_), intent(in)      :: sp_thresh
+      integer(psb_ipk_), intent(in)     :: idiag, nzrmax
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: iz(:)
+      real(psb_dpk_), intent(inout)    :: valz(:)
+      integer(psb_ipk_), intent(out)    :: info
+    end subroutine psb_d_sp_drop
+  end interface
+
+  interface rwclip
+    subroutine psb_d_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+
+      implicit none
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+      real(psb_dpk_), intent(inout)    :: val(*)
+      integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+    end subroutine psb_d_rwclip
+  end interface
+
+  interface sparsify
+    subroutine psb_d_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, &
+         & istart,iheap,ikr)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_i_heap
+      implicit none
+
+      real(psb_dpk_), intent(in)            :: sp_thresh
+      integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+      real(psb_dpk_), intent(inout)          :: zw(:)
+      integer(psb_ipk_), intent(out)          :: nz
+      integer(psb_ipk_), intent(out)          :: iz(:)
+      real(psb_dpk_), intent(out)            :: valz(:)
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: istart
+      type(psb_i_heap), optional              :: iheap
+      integer(psb_ipk_), optional             :: ikr(:)
+    end subroutine psb_d_sparsify
+    subroutine psb_d_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+      implicit none
+
+      real(psb_dpk_), intent(in)     :: sp_thresh
+      integer(psb_ipk_), intent(in)    :: idiag, n, nzrmax
+      real(psb_dpk_), intent(inout)   :: zw(:)
+      integer(psb_ipk_), intent(out)   :: nz
+      integer(psb_ipk_), intent(out)   :: iz(:)
+      real(psb_dpk_), intent(out)      :: valz(:)
+      integer(psb_ipk_), intent(out)   :: info
+      integer(psb_ipk_), intent(inout) :: lhead, listv(:)
+      integer(psb_ipk_)                :: ikr(:)
+    end subroutine psb_d_sparsify_list
+
+  end interface
+
+end module psb_d_ainv_tools_mod
diff --git a/prec/psb_d_biconjg_mod.F90 b/prec/psb_d_biconjg_mod.F90
new file mode 100644
index 00000000..09358744
--- /dev/null
+++ b/prec/psb_d_biconjg_mod.F90
@@ -0,0 +1,367 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+module psb_d_biconjg_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_sparse_biconjg
+    module procedure psb_dsparse_biconjg
+  end interface
+
+  abstract interface
+    subroutine psb_dsparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info)
+      use psb_base_mod, only : psb_d_csr_sparse_mat, psb_d_csc_sparse_mat, &
+           &  psb_dpk_, psb_ipk_
+      !
+      implicit none
+      integer(psb_ipk_), intent(in)               :: n
+      type(psb_d_csr_sparse_mat), intent(in)    :: a
+      type(psb_d_csc_sparse_mat), intent(inout) :: z,w
+      integer(psb_ipk_), intent(in)               :: nzrmax
+      real(psb_dpk_), intent(in)                :: sp_thresh
+      real(psb_dpk_), intent(out)                :: p(:)
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_dsparse_biconjg_variant
+  end interface
+
+
+  procedure(psb_dsparse_biconjg_variant) :: psb_dsparse_biconjg_llk,&
+       & psb_dsparse_biconjg_s_llk,  psb_dsparse_biconjg_s_ft_llk,&
+       &  psb_dsparse_biconjg_llk_noth, psb_dsparse_biconjg_mlk
+
+#if defined(HAVE_TUMA_SAINV)
+  procedure(psb_dsparse_biconjg_variant)  ::  psb_dsparse_tuma_sainv,&
+       & psb_dsparse_tuma_lainv
+#endif
+
+contains
+
+  subroutine psb_dsparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info)
+    use psb_base_mod
+    use psb_prec_const_mod
+    integer(psb_ipk_), intent(in)            :: alg,n
+    type(psb_d_csr_sparse_mat), intent(in) :: acsr
+    type(psb_dspmat_type), intent(out)     :: z, w
+    integer(psb_ipk_), intent(in)            :: nzrmax
+    real(psb_dpk_), intent(in)             :: sp_thresh
+    real(psb_dpk_), intent(out)             :: p(:)
+    integer(psb_ipk_), intent(out)           :: info
+
+    type(psb_d_csc_sparse_mat)             :: zcsc,wcsc
+    integer(psb_ipk_) :: i,j,k,nrm
+    integer(psb_ipk_) :: err_act
+    character(len=20)  :: name='psb_sparse_biconjg'
+    integer(psb_ipk_), parameter :: variant=1
+
+
+    info = psb_success_
+    call psb_erractionsave(err_act)
+    if (psb_errstatus_fatal()) then
+      info = psb_err_internal_error_; goto 9999
+    end if
+
+    if (size(p)<n) then
+      write(psb_err_unit,*) 'Size of P wrong'
+      info = psb_err_internal_error_
+      call psb_errpush(psb_err_internal_error_,name,a_err='Allocate')
+      goto 9999
+    end if
+
+    if (nzrmax <= 0) write(psb_out_unit,'("Out nzrmax = ",i0)') nzrmax
+
+    select case(alg)
+    case (psb_ainv_llk_)
+      call psb_dsparse_biconjg_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_llk_)
+      call psb_dsparse_biconjg_s_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_mlk_)
+      call psb_dsparse_biconjg_mlk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_ft_llk_)
+      call psb_dsparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_llk_noth_)
+      call psb_dsparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#if defined(HAVE_TUMA_SAINV)
+!    case (psb_ainv_s_tuma_)
+!      call psb_dsparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!    case (psb_ainv_l_tuma_)
+!      call psb_dsparse_tuma_lainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#endif
+    case default
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='Invalid alg')
+      goto 9999
+    end select
+
+    if (info /= 0) then
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='sparse_orth')
+      goto 9999
+    end if
+
+    call z%mv_from(zcsc)
+    call z%cscnv(info,type='CSR')
+    call w%mv_from(wcsc)
+    call w%transp()
+    call w%cscnv(info,type='CSR')
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_dsparse_biconjg
+
+
+  subroutine psb_d_spmspv(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = A x  sparse-sparse mode, A in CSC
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    real(psb_dpk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    real(psb_dpk_), intent(inout)           :: vy(:)
+    type(psb_d_csc_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_) :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable        :: iv(:)
+    real(psb_dpk_), allocatable           :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spmspv ',alpha,beta
+    if (beta == -done) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == dzero) then
+      do i=1, ny
+        vy(i) = dzero
+      end do
+    else if (beta /= done) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == dzero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%icp(j)
+      na = a%icp(j+1) - a%icp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ia(k:k+na-1), a%val(k:k+na-1),&
+           & done, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_d_spmspv
+
+
+  subroutine psb_d_spvspm(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = x A  sparse-sparse mode, A in CSR
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    real(psb_dpk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    real(psb_dpk_), intent(inout)           :: vy(:)
+    type(psb_d_csr_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_)              :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable :: iv(:)
+    real(psb_dpk_), allocatable   :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spvspm ',alpha,beta
+    if (beta == -done) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == dzero) then
+      do i=1, ny
+        vy(i) = dzero
+      end do
+    else if (beta /= done) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == dzero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%irp(j)
+      na = a%irp(j+1) - a%irp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ja(k:k+na-1), a%val(k:k+na-1),&
+           & done, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_d_spvspm
+
+end module psb_d_biconjg_mod
diff --git a/prec/psb_d_bjacprec.f90 b/prec/psb_d_bjacprec.f90
index bba4d686..28685bcf 100644
--- a/prec/psb_d_bjacprec.f90
+++ b/prec/psb_d_bjacprec.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,15 +27,19 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 module psb_d_bjacprec
 
   use psb_d_base_prec_mod
   use psb_d_ilu_fact_mod
-  
+  use psb_d_ainv_fact_mod
+  use psb_d_invk_fact_mod
+  use psb_d_invt_fact_mod
+
   type, extends(psb_d_base_prec_type)   :: psb_d_bjac_prec_type
     integer(psb_ipk_), allocatable      :: iprcparm(:)
+    real(psb_dpk_), allocatable       :: rprcparm(:)
     type(psb_dspmat_type), allocatable  :: av(:)
     type(psb_d_vect_type), allocatable  :: dv, wrk(:)
   contains
@@ -44,6 +48,7 @@ module psb_d_bjacprec
     procedure, pass(prec) :: precbld   => psb_d_bjac_precbld
     procedure, pass(prec) :: precinit  => psb_d_bjac_precinit
     procedure, pass(prec) :: precseti  => psb_d_bjac_precseti
+    procedure, pass(prec) :: precsetr  => psb_d_bjac_precsetr
     procedure, pass(prec) :: precdescr => psb_d_bjac_precdescr
     procedure, pass(prec) :: dump      => psb_d_bjac_dump
     procedure, pass(prec) :: clone     => psb_d_bjac_clone
@@ -55,15 +60,14 @@ module psb_d_bjacprec
     procedure, pass(prec) :: is_allocated_wrk => psb_d_bjac_is_allocated_wrk
   end type psb_d_bjac_prec_type
 
-  private :: psb_d_bjac_sizeof, psb_d_bjac_precdescr, psb_d_bjac_get_nzeros
- 
-
   character(len=15), parameter, private :: &
-       &  fact_names(0:2)=(/'None          ','ILU(n)        ',&
-       &  'ILU(eps)      '/)
+       &  fact_names(0:6)=(/'None          ','ILU(0)        ',&
+       &  'ILU(n)        ','ILU(eps)      ','AINV(eps)     ',&
+       &  'INVT          ','INVK          '/)
 
-  
-  interface  
+  private :: psb_d_bjac_sizeof, psb_d_bjac_precdescr, psb_d_bjac_get_nzeros
+
+  interface
     subroutine psb_d_bjac_dump(prec,info,prefix,head)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
       class(psb_d_bjac_prec_type), intent(in) :: prec
@@ -72,7 +76,7 @@ module psb_d_bjacprec
     end subroutine psb_d_bjac_dump
   end interface
 
-  interface  
+  interface
     subroutine psb_d_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -89,7 +93,7 @@ module psb_d_bjacprec
   interface
     subroutine psb_d_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
-      
+
       type(psb_desc_type),intent(in)    :: desc_data
       class(psb_d_bjac_prec_type), intent(inout)  :: prec
       real(psb_dpk_),intent(in)         :: alpha,beta
@@ -100,7 +104,7 @@ module psb_d_bjacprec
       real(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_d_bjac_apply
   end interface
-  
+
   interface
     subroutine psb_d_bjac_precinit(prec,info)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
@@ -108,7 +112,7 @@ module psb_d_bjacprec
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_d_bjac_precinit
   end interface
-  
+
   interface
     subroutine psb_d_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_, &
@@ -123,24 +127,34 @@ module psb_d_bjacprec
       class(psb_i_base_vect_type), intent(in), optional  :: imold
     end subroutine psb_d_bjac_precbld
   end interface
-  
+
   interface
     subroutine psb_d_bjac_precseti(prec,what,val,info)
       import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
       class(psb_d_bjac_prec_type),intent(inout) :: prec
-      integer(psb_ipk_), intent(in)                      :: what 
-      integer(psb_ipk_), intent(in)                      :: val 
+      integer(psb_ipk_), intent(in)                      :: what
+      integer(psb_ipk_), intent(in)                      :: val
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_d_bjac_precseti
   end interface
-  
+
+  interface
+    subroutine psb_d_bjac_precsetr(prec,what,val,info)
+      import :: psb_ipk_, psb_desc_type, psb_d_bjac_prec_type, psb_d_vect_type, psb_dpk_
+      class(psb_d_bjac_prec_type),intent(inout) :: prec
+      integer(psb_ipk_), intent(in)                      :: what
+      real(psb_dpk_), intent(in)                      :: val
+      integer(psb_ipk_), intent(out)                     :: info
+    end subroutine psb_d_bjac_precsetr
+  end interface
+
 
 contains
 
   subroutine psb_d_bjac_precdescr(prec,iout,root)
     use psb_penv_mod
     use psb_error_mod
-    implicit none 
+    implicit none
 
     class(psb_d_bjac_prec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
@@ -154,19 +168,19 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-   
-    if (present(iout)) then 
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
-    if (present(root)) then 
+    if (present(root)) then
       root_ = root
     else
       root_ = psb_root_
     end if
 
-    if (.not.allocated(prec%iprcparm)) then 
+    if (.not.allocated(prec%iprcparm)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
       goto 9999
@@ -180,26 +194,26 @@ contains
          &  write(iout_,*) trim(prec%desc_prefix()),' ',&
          & 'Block Jacobi with: ',&
          &  fact_names(prec%iprcparm(psb_f_type_))
-    
+
     call psb_erractionrestore(err_act)
     return
 
 9999 call psb_error_handler(err_act)
 
     return
-    
+
   end subroutine psb_d_bjac_precdescr
 
 
   function psb_d_bjac_sizeof(prec) result(val)
     class(psb_d_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + psb_sizeof_dp * prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%sizeof()
       val = val + prec%av(psb_u_pr_)%sizeof()
     endif
@@ -210,12 +224,12 @@ contains
 
     class(psb_d_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%get_nzeros()
       val = val + prec%av(psb_u_pr_)%get_nzeros()
     endif
@@ -236,18 +250,18 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-    if (allocated(prec%av)) then 
-      do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      do i=1,size(prec%av)
         call prec%av(i)%free()
       enddo
       deallocate(prec%av,stat=info)
     end if
 
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       call prec%dv%free(info)
       if (info == 0) deallocate(prec%dv,stat=info)
     end if
-    if (allocated(prec%iprcparm)) then 
+    if (allocated(prec%iprcparm)) then
       deallocate(prec%iprcparm,stat=info)
     end if
     call psb_erractionrestore(err_act)
@@ -283,19 +297,19 @@ contains
          & allocate(psb_d_bjac_prec_type :: precout, stat=info)
     if (info /= 0) goto 9999
     select type(pout => precout)
-    type is (psb_d_bjac_prec_type) 
+    type is (psb_d_bjac_prec_type)
       call pout%set_ctxt(prec%get_ctxt())
 
-      if (allocated(prec%av)) then 
+      if (allocated(prec%av)) then
         allocate(pout%av(size(prec%av)),stat=info)
-        do i=1,size(prec%av) 
+        do i=1,size(prec%av)
           if (info /= psb_success_) exit
           call prec%av(i)%clone(pout%av(i),info)
         enddo
         if (info /= psb_success_) goto 9999
       end if
 
-      if (allocated(prec%dv)) then 
+      if (allocated(prec%dv)) then
         allocate(pout%dv,stat=info)
         if (info == 0) call prec%dv%clone(pout%dv,info)
       end if
@@ -316,7 +330,7 @@ contains
   subroutine psb_d_bjac_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_d_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -326,11 +340,11 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act, i
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_d_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
     if (allocated(prec%wrk)) then
       if (size(prec%wrk)<2) then
@@ -343,11 +357,11 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     if (.not.allocated(prec%wrk)) then
-      if (.not.present(desc)) then 
+      if (.not.present(desc)) then
         info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?");   goto 9999
-      end if      
+      end if
       allocate(prec%wrk(2),stat=info)
       do i=1, 2
         if (info == 0)   call psb_geall(prec%wrk(i),desc,info)
@@ -357,19 +371,19 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_d_bjac_allocate_wrk
 
   subroutine psb_d_bjac_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_d_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -378,14 +392,14 @@ contains
     integer(psb_ipk_) :: err_act
     integer(psb_ipk_) :: i
     character(len=20) :: name
-    
+
     info=psb_success_
     name = 'psb_d_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    info = psb_success_ 
+    info = psb_success_
     if (allocated(prec%wrk)) then
       do i=1,size(prec%wrk)
         if (info == 0) call prec%wrk(i)%free(info)
@@ -395,29 +409,29 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_d_bjac_free_wrk
 
   function psb_d_bjac_is_allocated_wrk(prec) result(res)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_d_bjac_prec_type), intent(in) :: prec
     logical :: res
 
-    ! In the base version we can say yes, because 
+    ! In the base version we can say yes, because
     ! there is nothing to allocate
 
     res = allocated(prec%wrk)
-    
+
   end function psb_d_bjac_is_allocated_wrk
 
-  
+
 end module psb_d_bjacprec
diff --git a/prec/psb_d_invk_fact_mod.f90 b/prec/psb_d_invk_fact_mod.f90
new file mode 100644
index 00000000..2bd97198
--- /dev/null
+++ b/prec/psb_d_invk_fact_mod.f90
@@ -0,0 +1,165 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_d_invk_fact_mod.f90
+!
+! Module: psb_d_invk_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_d_invk_solver, but not visible to the end user.
+!
+!
+module psb_d_invk_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invk_fact
+    subroutine psb_d_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+      ! import
+      import psb_dspmat_type, psb_ipk_, psb_dpk_, psb_desc_type
+      ! Arguments
+      type(psb_dspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)               :: fill1, fill2
+      type(psb_dspmat_type), intent(inout)        :: lmat, umat
+      real(psb_dpk_), allocatable                 :: d(:)
+      Type(psb_desc_type), Intent(inout)          :: desc
+      integer(psb_ipk_), intent(out)              :: info
+      type(psb_dspmat_type), intent(in), optional :: blck
+    end subroutine
+  end interface
+
+  ! Inner workings
+  interface psb_sparse_invk
+    subroutine psb_dsparse_invk(n,a,z,fill_in,info,inlevs)
+      ! Import
+      import psb_ipk_, psb_dspmat_type
+      ! Arguments
+      integer(psb_ipk_), intent(in)           :: n
+      type(psb_dspmat_type), intent(in)       :: a
+      type(psb_dspmat_type), intent(inout)    :: z
+      integer(psb_ipk_), intent(in)           :: fill_in
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: inlevs(:)
+    end subroutine psb_dsparse_invk
+  end interface
+
+  interface psb_invk_inv
+    subroutine psb_dinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,&
+         & nidx,idxs,info)
+
+      use psb_base_mod, only : psb_dspmat_type, psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+
+      ! Arguments
+      type(psb_i_heap), intent(inout)               :: heap
+      integer(psb_ipk_), intent(in)                 :: i, fill_in
+      integer(psb_ipk_), intent(inout)              :: nidx,info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)                 :: uia1(:),uia2(:),uplevs(:)
+      real(psb_dpk_), intent(in)                    :: uaspk(:)
+      real(psb_dpk_), intent(inout)                 :: row(:)
+
+
+    end subroutine psb_dinvk_inv
+  end interface
+
+  interface  psb_invk_copyin
+    subroutine psb_d_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,&
+         & ktrw,trw,info,sign,inlevs)
+      ! Import
+      use psb_base_mod, only : psb_d_csr_sparse_mat, psb_d_coo_sparse_mat,&
+           & psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      type(psb_d_csr_sparse_mat), intent(in)    :: a
+      type(psb_d_coo_sparse_mat), intent(inout) :: trw
+      integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+      integer(psb_ipk_), intent(inout)          :: ktrw,info
+      integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+      real(psb_dpk_), intent(inout)             :: row(:)
+      type(psb_i_heap), intent(inout)           :: heap
+      real(psb_dpk_), optional, intent(in)      :: sign
+      integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+    end subroutine psb_d_invk_copyin
+  end interface
+
+  interface psb_invk_copyout
+    subroutine psb_d_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+         &  l2,uia1,uia2,uaspk,info)
+      use psb_base_mod, only : psb_dspmat_type, psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+      real(psb_dpk_), allocatable, intent(inout)    :: uaspk(:)
+      real(psb_dpk_), intent(inout)                 :: row(:)
+    end subroutine psb_d_invk_copyout
+  end interface
+
+end module
diff --git a/prec/psb_d_invt_fact_mod.f90 b/prec/psb_d_invt_fact_mod.f90
new file mode 100644
index 00000000..f38c1c2b
--- /dev/null
+++ b/prec/psb_d_invt_fact_mod.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_d_invt_fact_mod.f90
+!
+! Module: psb_d_invt_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_d_invt_solver, but not visible to the end user.
+!
+!
+module psb_d_invt_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invt_fact
+      subroutine psb_d_invt_bld(a,fillin,invfill,thresh,invthresh,&
+         & lmat,d,umat,desc,info,blck)
+         ! Import
+         import psb_dspmat_type, psb_dpk_, psb_ipk_, psb_desc_type
+         ! Arguments
+         type(psb_dspmat_type), intent(in), target   :: a
+         integer(psb_ipk_), intent(in)               :: fillin,invfill
+         real(psb_dpk_), intent(in)                :: thresh
+         real(psb_dpk_), intent(in)                  :: invthresh
+         type(psb_dspmat_type), intent(inout)        :: lmat, umat
+         real(psb_dpk_), allocatable                 :: d(:)
+         Type(psb_desc_type), Intent(inout)          :: desc
+         integer(psb_ipk_), intent(out)              :: info
+         type(psb_dspmat_type), intent(in), optional :: blck
+      end subroutine psb_d_invt_bld
+  end interface
+
+  ! Interfaces for the inner workings
+
+  interface
+      subroutine psb_dsparse_invt(n,a,z,nzrmax,sp_thresh,info)
+         ! Import
+         import psb_dspmat_type, psb_dpk_, psb_ipk_
+         ! Arguments
+         integer(psb_ipk_), intent(in)        :: n
+         type(psb_dspmat_type), intent(in)    :: a
+         type(psb_dspmat_type), intent(inout) :: z
+         integer(psb_ipk_), intent(in)        :: nzrmax
+         real(psb_dpk_), intent(in)           :: sp_thresh
+         integer(psb_ipk_), intent(out)       :: info
+      end subroutine psb_dsparse_invt
+  end interface
+
+  interface
+     subroutine psb_d_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+          & irwt,ktrw,trw,info,sign)
+       ! Import
+       import psb_d_csr_sparse_mat, psb_d_coo_sparse_mat, psb_dpk_, &
+         & psb_ipk_, psb_i_heap
+       ! Arguments
+       type(psb_d_csr_sparse_mat), intent(in)    :: a
+       type(psb_d_coo_sparse_mat), intent(inout) :: trw
+       integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+       integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+       integer(psb_ipk_), intent(inout)          :: irwt(:)
+       real(psb_dpk_), intent(inout)          :: nrmi
+       real(psb_dpk_), intent(inout)            :: row(:)
+       type(psb_i_heap), intent(inout)           :: heap
+       real(psb_dpk_), intent(in), optional    :: sign
+    end subroutine psb_d_invt_copyin
+  end interface
+
+  interface
+     subroutine psb_d_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+          & nidx,idxs,l2,ja,irp,val,info)
+      ! Import
+      import psb_dpk_, psb_ipk_
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+      integer(psb_ipk_), intent(in)                 :: idxs(:)
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+      real(psb_dpk_), intent(in)                    :: thres,nrmi
+      real(psb_dpk_),allocatable, intent(inout)     :: val(:)
+      real(psb_dpk_), intent(inout)                 :: row(:)
+     end subroutine psb_d_invt_copyout
+  end interface
+
+  interface psb_invt_inv
+    subroutine psb_d_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, &
+      & nidx,idxs,info)
+      ! import
+      import psb_dpk_, psb_i_heap, psb_ipk_
+      ! Arguments
+      type(psb_i_heap), intent(inout)     :: heap
+      integer(psb_ipk_), intent(in)       :: i
+      integer(psb_ipk_), intent(inout)    :: nidx,info
+      integer(psb_ipk_), intent(inout)    :: irwt(:)
+      real(psb_dpk_), intent(in)          :: thres,nrmi
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+      real(psb_dpk_), intent(in)          :: val(:)
+      real(psb_dpk_), intent(inout)       :: row(:)
+    end subroutine
+  end interface
+
+contains
+
+end module psb_d_invt_fact_mod
diff --git a/prec/psb_d_prec_type.f90 b/prec/psb_d_prec_type.f90
index 6ba9d3be..391023bd 100644
--- a/prec/psb_d_prec_type.f90
+++ b/prec/psb_d_prec_type.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!	Module to   define PREC_DATA,           !!
 !!      structure for preconditioning.          !!
@@ -54,6 +54,10 @@ module psb_d_prec_type
     procedure, pass(prec)               :: build  => psb_dprecbld
     procedure, pass(prec)               :: init   => psb_dprecinit
     procedure, pass(prec)               :: descr  => psb_dfile_prec_descr
+    procedure, pass(prec)               :: cseti  => psb_dcprecseti
+    procedure, pass(prec)               :: csetc  => psb_dcprecsetc
+    procedure, pass(prec)               :: csetr  => psb_dcprecsetr
+    generic, public                     :: set => cseti, csetc, csetr
     procedure, pass(prec)               :: allocate_wrk => psb_d_allocate_wrk
     procedure, pass(prec)               :: free_wrk => psb_d_free_wrk
     procedure, pass(prec)               :: is_allocated_wrk => psb_d_is_allocated_wrk
@@ -102,7 +106,7 @@ module psb_d_prec_type
     module procedure psb_dprec_sizeof
   end interface
 
-  interface 
+  interface
     subroutine psb_d_apply2_vect(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)       :: desc_data
@@ -114,8 +118,8 @@ module psb_d_prec_type
       real(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_d_apply2_vect
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_d_apply1_vect(prec,x,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_
         type(psb_desc_type),intent(in)       :: desc_data
@@ -126,7 +130,7 @@ module psb_d_prec_type
       real(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_d_apply1_vect
   end interface
-  
+
   interface
     subroutine psb_d_apply2v(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_
@@ -139,8 +143,8 @@ module psb_d_prec_type
       real(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_d_apply2v
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_d_apply1v(prec,x,desc_data,info,trans)
       import :: psb_ipk_, psb_desc_type, psb_dprec_type, psb_d_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -150,56 +154,89 @@ module psb_d_prec_type
       character(len=1), optional        :: trans
     end subroutine psb_d_apply1v
   end interface
-  
+
+  interface
+  subroutine psb_dcprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_dprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    integer(psb_ipk_), intent(in)            :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_dcprecseti
+  subroutine psb_dcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_dprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    real(psb_dpk_), intent(in)             :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_dcprecsetr
+  subroutine psb_dcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+    import :: psb_dprec_type, psb_dspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_dprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    character(len=*), intent(in)             :: string
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_dcprecsetc
+end interface
+
 contains
 
   subroutine psb_dfile_prec_descr(prec,iout, root)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_dprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
     integer(psb_ipk_), intent(in), optional    :: root
     integer(psb_ipk_) :: iout_,info
-    character(len=20) :: name='prec_descr' 
-    
-    if (present(iout)) then 
+    character(len=20) :: name='prec_descr'
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
     end if
     call prec%prec%descr(iout=iout,root=root)
-    
+
   end subroutine psb_dfile_prec_descr
 
   subroutine psb_d_prec_dump(prec,info,prefix,head)
-    implicit none 
+    implicit none
     type(psb_dprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(out)             :: info
     character(len=*), intent(in), optional :: prefix,head
-    !  len of prefix_ 
+    !  len of prefix_
 
     info = 0
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to dump a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%dump(info,prefix,head)
-    
-    
+
+
   end subroutine psb_d_prec_dump
 
   subroutine psb_d_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_dprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -209,33 +246,33 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_d_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_d_allocate_wrk
 
   subroutine psb_d_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_dprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -243,47 +280,47 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_d_free_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to free a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%free_wrk(info)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_d_free_wrk
 
   function psb_d_is_allocated_wrk(prec) result(res)
     implicit none
-    
+
     ! Arguments
     class(psb_dprec_type), intent(in) :: prec
     logical :: res
 
     if (.not.allocated(prec%prec)) then
       res = .false.
-    else 
+    else
       res = prec%prec%is_allocated_wrk()
     end if
-    
+
   end function psb_d_is_allocated_wrk
 
   subroutine psb_d_precfree(p,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_dprec_type), intent(inout) :: p
     integer(psb_ipk_), intent(out)                :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -303,14 +340,14 @@ contains
     return
 
 9999 call psb_error_handler(err_act)
-    
+
     return
 
   end subroutine psb_d_precfree
 
   subroutine psb_d_prec_free(prec,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_dprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)         :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -324,7 +361,7 @@ contains
 
     me=-1
 
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%free(info)
       if (info /= psb_success_) goto 9999
       deallocate(prec%prec,stat=info)
@@ -343,26 +380,26 @@ contains
     class(psb_dprec_type), intent(in) :: prec
     integer(psb_epk_) :: val
     integer(psb_ipk_) :: i
-    
+
     val = 0
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       val = val + prec%prec%sizeof()
     end if
-    
+
   end function psb_dprec_sizeof
 
   subroutine psb_d_prec_clone(prec,precout,info)
-    implicit none 
+    implicit none
     class(psb_dprec_type), intent(inout) :: prec
     class(psb_dprec_type), intent(inout) :: precout
     integer(psb_ipk_), intent(out)             :: info
 
     info = psb_success_
     call prec%free(info)
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%clone(precout%prec,info)
     end if
-    
+
   end subroutine psb_d_prec_clone
 
 end module psb_d_prec_type
diff --git a/prec/psb_prec_const_mod.f90 b/prec/psb_prec_const_mod.f90
index f7b32d2f..73c22e58 100644
--- a/prec/psb_prec_const_mod.f90
+++ b/prec/psb_prec_const_mod.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!	Module to   define PREC_DATA,           !!
 !!      structure for preconditioning.          !!
@@ -43,19 +43,23 @@ module psb_prec_const_mod
 
   ! Entries in iprcparm: preconditioner type, factorization type,
   ! prolongation type, restriction type, renumbering algorithm,
-  ! number of overlap layers, pointer to SuperLU factors, 
-  ! levels of fill in for ILU(N), 
+  ! number of overlap layers, pointer to SuperLU factors,
+  ! levels of fill in for ILU(N),
   integer(psb_ipk_), parameter :: psb_p_type_=1, psb_f_type_=2
   integer(psb_ipk_), parameter :: psb_ilu_fill_in_=8
+  integer(psb_ipk_), parameter :: psb_ilu_ialg_=9
   !Renumbering. SEE BELOW
   integer(psb_ipk_), parameter :: psb_renum_none_=0, psb_renum_glb_=1, psb_renum_gps_=2
   integer(psb_ipk_), parameter :: psb_ifpsz=10
   ! Entries in rprcparm: ILU(E) epsilon, smoother omega
+  integer(psb_ipk_), parameter :: psb_ilu_scale_=7
   integer(psb_ipk_), parameter :: psb_fact_eps_=1
-  integer(psb_ipk_), parameter :: psb_rfpsz=4
-  ! Factorization types: none, ILU(N), ILU(E)
-  integer(psb_ipk_), parameter :: psb_f_none_=0,psb_f_ilu_n_=1
-  ! Fields for sparse matrices ensembles: 
+  integer(psb_ipk_), parameter :: psb_rfpsz=8
+  ! Factorization types: none, ILU(0), ILU(N), ILU(N,E)
+  integer(psb_ipk_), parameter :: psb_f_none_=0,psb_f_ilu_n_=1,psb_f_ilu_k_=2,psb_f_ilu_t_=3
+  ! Approximate Inverse factorization type: AINV, INVT, INVK
+  integer(psb_ipk_), parameter :: psb_f_ainv_=4, psb_f_invt_=5, psb_f_invk_=6
+  ! Fields for sparse matrices ensembles:
   integer(psb_ipk_), parameter :: psb_l_pr_=1, psb_u_pr_=2, psb_bp_ilu_avsz=2
   integer(psb_ipk_), parameter :: psb_max_avsz=psb_bp_ilu_avsz
 
@@ -65,11 +69,21 @@ module psb_prec_const_mod
   integer(psb_ipk_), parameter :: psb_ilu_scale_none_    = 0
   integer(psb_ipk_), parameter :: psb_ilu_scale_maxval_  = 1
   integer(psb_ipk_), parameter :: psb_ilu_scale_diag_    = 2
-  integer(psb_ipk_), parameter :: psb_ilu_scale_arwsum_  = 3 
+  integer(psb_ipk_), parameter :: psb_ilu_scale_arwsum_  = 3
   integer(psb_ipk_), parameter :: psb_ilu_scale_aclsum_  = 4
   integer(psb_ipk_), parameter :: psb_ilu_scale_arcsum_  = 5
 
-  
+  ! Numerical parameters relative to Approximate Inverse Preconditioners
+  integer, parameter   :: psb_inv_fillin_     = 3
+  integer, parameter   :: psb_ainv_alg_       = psb_inv_fillin_ + 1
+  integer, parameter   :: psb_inv_thresh_     = 3
+  integer, parameter   :: psb_ainv_llk_       = psb_inv_thresh_ + 1
+  integer, parameter   :: psb_ainv_s_llk_     = psb_ainv_llk_ + 1
+  integer, parameter   :: psb_ainv_s_ft_llk_  = psb_ainv_s_llk_ + 1
+  integer, parameter   :: psb_ainv_llk_noth_  = psb_ainv_s_ft_llk_  + 1
+  integer, parameter   :: psb_ainv_mlk_       = psb_ainv_llk_noth_  + 1
+  integer, parameter   :: psb_ainv_lmx_       = psb_ainv_mlk_
+
 
   interface psb_check_def
     module procedure psb_icheck_def, psb_scheck_def, psb_dcheck_def
@@ -87,9 +101,9 @@ contains
     select case(iprec)
     case(psb_noprec_)
       pr_to_str='NOPREC'
-    case(psb_diag_)         
+    case(psb_diag_)
       pr_to_str='DIAG'
-    case(psb_bjac_)         
+    case(psb_bjac_)
       pr_to_str='BJAC'
     case default
       pr_to_str='???'
@@ -125,7 +139,7 @@ contains
     integer(psb_ipk_), intent(inout) :: ip
     integer(psb_ipk_), intent(in)    :: id
     character(len=*), intent(in) :: name
-    interface 
+    interface
       function is_legal(i)
         import :: psb_ipk_
         integer(psb_ipk_), intent(in) :: i
@@ -133,7 +147,7 @@ contains
       end function is_legal
     end interface
 
-    if (.not.is_legal(ip)) then     
+    if (.not.is_legal(ip)) then
       write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id
       ip = id
     end if
@@ -143,7 +157,7 @@ contains
     real(psb_spk_), intent(inout) :: ip
     real(psb_spk_), intent(in)    :: id
     character(len=*), intent(in) :: name
-    interface 
+    interface
       function is_legal(i)
         import :: psb_ipk_, psb_spk_
         real(psb_spk_), intent(in) :: i
@@ -151,7 +165,7 @@ contains
       end function is_legal
     end interface
 
-    if (.not.is_legal(ip)) then     
+    if (.not.is_legal(ip)) then
       write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id
       ip = id
     end if
@@ -161,7 +175,7 @@ contains
     real(psb_dpk_), intent(inout) :: ip
     real(psb_dpk_), intent(in)    :: id
     character(len=*), intent(in) :: name
-    interface 
+    interface
       function is_legal(i)
         import :: psb_ipk_, psb_spk_, psb_dpk_
         real(psb_dpk_), intent(in) :: i
@@ -169,7 +183,7 @@ contains
       end function is_legal
     end interface
 
-    if (.not.is_legal(ip)) then     
+    if (.not.is_legal(ip)) then
       write(psb_err_unit,*) 'Illegal value for ',name,' :',ip, '. defaulting to ',id
       ip = id
     end if
diff --git a/prec/psb_s_ainv_fact_mod.f90 b/prec/psb_s_ainv_fact_mod.f90
new file mode 100644
index 00000000..bc7f1d12
--- /dev/null
+++ b/prec/psb_s_ainv_fact_mod.f90
@@ -0,0 +1,98 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from MLD2P4, original copyright below.
+!
+!
+!                             MLD2P4  version 2.2
+!    MultiLevel Domain Decomposition Parallel Preconditioners Package
+!               based on PSBLAS (Parallel Sparse BLAS version 3.5)
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the MLD2P4 group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+! File: psb_s_ainv_fact_mod.f90
+!
+! Module: psb_s_ainv_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_s_ainv_solver, but not visible to the end user.
+!
+!
+module psb_s_ainv_fact_mod
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_ainv_fact
+    subroutine psb_s_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+      import psb_sspmat_type, psb_spk_, psb_ipk_, psb_desc_type
+      type(psb_sspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)                 :: fillin,alg
+      real(psb_spk_), intent(in)                  :: thresh
+      type(psb_sspmat_type), intent(inout)        :: wmat, zmat
+      real(psb_spk_), allocatable                  :: d(:)
+      Type(psb_desc_type), Intent(in)               :: desc
+      integer(psb_ipk_), intent(out)                :: info
+      type(psb_sspmat_type), intent(in), optional :: blck
+      integer(psb_ipk_), intent(in), optional       :: iscale
+    end subroutine psb_s_ainv_bld
+  end interface
+
+end module psb_s_ainv_fact_mod
diff --git a/prec/psb_s_ainv_tools_mod.f90 b/prec/psb_s_ainv_tools_mod.f90
new file mode 100644
index 00000000..caa50164
--- /dev/null
+++ b/prec/psb_s_ainv_tools_mod.f90
@@ -0,0 +1,132 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!
+!
+module psb_s_ainv_tools_mod
+
+  interface sp_drop
+    subroutine psb_s_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+      implicit none
+      real(psb_spk_), intent(in)      :: sp_thresh
+      integer(psb_ipk_), intent(in)     :: idiag, nzrmax
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: iz(:)
+      real(psb_spk_), intent(inout)    :: valz(:)
+      integer(psb_ipk_), intent(out)    :: info
+    end subroutine psb_s_sp_drop
+  end interface
+
+  interface rwclip
+    subroutine psb_s_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+
+      implicit none
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+      real(psb_spk_), intent(inout)    :: val(*)
+      integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+    end subroutine psb_s_rwclip
+  end interface
+
+  interface sparsify
+    subroutine psb_s_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, &
+         & istart,iheap,ikr)
+      use psb_base_mod, only : psb_spk_, psb_ipk_, psb_i_heap
+      implicit none
+
+      real(psb_spk_), intent(in)            :: sp_thresh
+      integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+      real(psb_spk_), intent(inout)          :: zw(:)
+      integer(psb_ipk_), intent(out)          :: nz
+      integer(psb_ipk_), intent(out)          :: iz(:)
+      real(psb_spk_), intent(out)            :: valz(:)
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: istart
+      type(psb_i_heap), optional              :: iheap
+      integer(psb_ipk_), optional             :: ikr(:)
+    end subroutine psb_s_sparsify
+    subroutine psb_s_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+      use psb_base_mod, only : psb_spk_, psb_ipk_
+      implicit none
+
+      real(psb_spk_), intent(in)     :: sp_thresh
+      integer(psb_ipk_), intent(in)    :: idiag, n, nzrmax
+      real(psb_spk_), intent(inout)   :: zw(:)
+      integer(psb_ipk_), intent(out)   :: nz
+      integer(psb_ipk_), intent(out)   :: iz(:)
+      real(psb_spk_), intent(out)      :: valz(:)
+      integer(psb_ipk_), intent(out)   :: info
+      integer(psb_ipk_), intent(inout) :: lhead, listv(:)
+      integer(psb_ipk_)                :: ikr(:)
+    end subroutine psb_s_sparsify_list
+
+  end interface
+
+end module psb_s_ainv_tools_mod
diff --git a/prec/psb_s_biconjg_mod.F90 b/prec/psb_s_biconjg_mod.F90
new file mode 100644
index 00000000..bc2aaefc
--- /dev/null
+++ b/prec/psb_s_biconjg_mod.F90
@@ -0,0 +1,367 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+module psb_s_biconjg_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_sparse_biconjg
+    module procedure psb_ssparse_biconjg
+  end interface
+
+  abstract interface
+    subroutine psb_ssparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info)
+      use psb_base_mod, only : psb_s_csr_sparse_mat, psb_s_csc_sparse_mat, &
+           &  psb_spk_, psb_ipk_
+      !
+      implicit none
+      integer(psb_ipk_), intent(in)               :: n
+      type(psb_s_csr_sparse_mat), intent(in)    :: a
+      type(psb_s_csc_sparse_mat), intent(inout) :: z,w
+      integer(psb_ipk_), intent(in)               :: nzrmax
+      real(psb_spk_), intent(in)                :: sp_thresh
+      real(psb_spk_), intent(out)                :: p(:)
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_ssparse_biconjg_variant
+  end interface
+
+
+  procedure(psb_ssparse_biconjg_variant) :: psb_ssparse_biconjg_llk,&
+       & psb_ssparse_biconjg_s_llk,  psb_ssparse_biconjg_s_ft_llk,&
+       &  psb_ssparse_biconjg_llk_noth, psb_ssparse_biconjg_mlk
+
+#if defined(HAVE_TUMA_SAINV)
+  procedure(psb_ssparse_biconjg_variant)  ::  psb_ssparse_tuma_sainv,&
+       & psb_ssparse_tuma_lainv
+#endif
+
+contains
+
+  subroutine psb_ssparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info)
+    use psb_base_mod
+    use psb_prec_const_mod
+    integer(psb_ipk_), intent(in)            :: alg,n
+    type(psb_s_csr_sparse_mat), intent(in) :: acsr
+    type(psb_sspmat_type), intent(out)     :: z, w
+    integer(psb_ipk_), intent(in)            :: nzrmax
+    real(psb_spk_), intent(in)             :: sp_thresh
+    real(psb_spk_), intent(out)             :: p(:)
+    integer(psb_ipk_), intent(out)           :: info
+
+    type(psb_s_csc_sparse_mat)             :: zcsc,wcsc
+    integer(psb_ipk_) :: i,j,k,nrm
+    integer(psb_ipk_) :: err_act
+    character(len=20)  :: name='psb_sparse_biconjg'
+    integer(psb_ipk_), parameter :: variant=1
+
+
+    info = psb_success_
+    call psb_erractionsave(err_act)
+    if (psb_errstatus_fatal()) then
+      info = psb_err_internal_error_; goto 9999
+    end if
+
+    if (size(p)<n) then
+      write(psb_err_unit,*) 'Size of P wrong'
+      info = psb_err_internal_error_
+      call psb_errpush(psb_err_internal_error_,name,a_err='Allocate')
+      goto 9999
+    end if
+
+    if (nzrmax <= 0) write(psb_out_unit,'("Out nzrmax = ",i0)') nzrmax
+
+    select case(alg)
+    case (psb_ainv_llk_)
+      call psb_ssparse_biconjg_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_llk_)
+      call psb_ssparse_biconjg_s_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_mlk_)
+      call psb_ssparse_biconjg_mlk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_ft_llk_)
+      call psb_ssparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_llk_noth_)
+      call psb_ssparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#if defined(HAVE_TUMA_SAINV)
+!    case (psb_ainv_s_tuma_)
+!      call psb_ssparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!    case (psb_ainv_l_tuma_)
+!      call psb_ssparse_tuma_lainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#endif
+    case default
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='Invalid alg')
+      goto 9999
+    end select
+
+    if (info /= 0) then
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='sparse_orth')
+      goto 9999
+    end if
+
+    call z%mv_from(zcsc)
+    call z%cscnv(info,type='CSR')
+    call w%mv_from(wcsc)
+    call w%transp()
+    call w%cscnv(info,type='CSR')
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_ssparse_biconjg
+
+
+  subroutine psb_s_spmspv(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = A x  sparse-sparse mode, A in CSC
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    real(psb_spk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    real(psb_spk_), intent(inout)           :: vy(:)
+    type(psb_s_csc_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_) :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable        :: iv(:)
+    real(psb_spk_), allocatable           :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spmspv ',alpha,beta
+    if (beta == -sone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == szero) then
+      do i=1, ny
+        vy(i) = szero
+      end do
+    else if (beta /= sone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == szero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%icp(j)
+      na = a%icp(j+1) - a%icp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ia(k:k+na-1), a%val(k:k+na-1),&
+           & sone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_s_spmspv
+
+
+  subroutine psb_s_spvspm(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = x A  sparse-sparse mode, A in CSR
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    real(psb_spk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    real(psb_spk_), intent(inout)           :: vy(:)
+    type(psb_s_csr_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_)              :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable :: iv(:)
+    real(psb_spk_), allocatable   :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spvspm ',alpha,beta
+    if (beta == -sone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == szero) then
+      do i=1, ny
+        vy(i) = szero
+      end do
+    else if (beta /= sone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == szero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%irp(j)
+      na = a%irp(j+1) - a%irp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ja(k:k+na-1), a%val(k:k+na-1),&
+           & sone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_s_spvspm
+
+end module psb_s_biconjg_mod
diff --git a/prec/psb_s_bjacprec.f90 b/prec/psb_s_bjacprec.f90
index 3e757f6b..1ddaa695 100644
--- a/prec/psb_s_bjacprec.f90
+++ b/prec/psb_s_bjacprec.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,15 +27,19 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 module psb_s_bjacprec
 
   use psb_s_base_prec_mod
   use psb_s_ilu_fact_mod
-  
+  use psb_s_ainv_fact_mod
+  use psb_s_invk_fact_mod
+  use psb_s_invt_fact_mod
+
   type, extends(psb_s_base_prec_type)   :: psb_s_bjac_prec_type
     integer(psb_ipk_), allocatable      :: iprcparm(:)
+    real(psb_spk_), allocatable       :: rprcparm(:)
     type(psb_sspmat_type), allocatable  :: av(:)
     type(psb_s_vect_type), allocatable  :: dv, wrk(:)
   contains
@@ -44,6 +48,7 @@ module psb_s_bjacprec
     procedure, pass(prec) :: precbld   => psb_s_bjac_precbld
     procedure, pass(prec) :: precinit  => psb_s_bjac_precinit
     procedure, pass(prec) :: precseti  => psb_s_bjac_precseti
+    procedure, pass(prec) :: precsetr  => psb_s_bjac_precsetr
     procedure, pass(prec) :: precdescr => psb_s_bjac_precdescr
     procedure, pass(prec) :: dump      => psb_s_bjac_dump
     procedure, pass(prec) :: clone     => psb_s_bjac_clone
@@ -55,15 +60,14 @@ module psb_s_bjacprec
     procedure, pass(prec) :: is_allocated_wrk => psb_s_bjac_is_allocated_wrk
   end type psb_s_bjac_prec_type
 
-  private :: psb_s_bjac_sizeof, psb_s_bjac_precdescr, psb_s_bjac_get_nzeros
- 
-
   character(len=15), parameter, private :: &
-       &  fact_names(0:2)=(/'None          ','ILU(n)        ',&
-       &  'ILU(eps)      '/)
+       &  fact_names(0:6)=(/'None          ','ILU(0)        ',&
+       &  'ILU(n)        ','ILU(eps)      ','AINV(eps)     ',&
+       &  'INVT          ','INVK          '/)
 
-  
-  interface  
+  private :: psb_s_bjac_sizeof, psb_s_bjac_precdescr, psb_s_bjac_get_nzeros
+
+  interface
     subroutine psb_s_bjac_dump(prec,info,prefix,head)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
       class(psb_s_bjac_prec_type), intent(in) :: prec
@@ -72,7 +76,7 @@ module psb_s_bjacprec
     end subroutine psb_s_bjac_dump
   end interface
 
-  interface  
+  interface
     subroutine psb_s_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -89,7 +93,7 @@ module psb_s_bjacprec
   interface
     subroutine psb_s_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
-      
+
       type(psb_desc_type),intent(in)    :: desc_data
       class(psb_s_bjac_prec_type), intent(inout)  :: prec
       real(psb_spk_),intent(in)         :: alpha,beta
@@ -100,7 +104,7 @@ module psb_s_bjacprec
       real(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_s_bjac_apply
   end interface
-  
+
   interface
     subroutine psb_s_bjac_precinit(prec,info)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
@@ -108,7 +112,7 @@ module psb_s_bjacprec
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_s_bjac_precinit
   end interface
-  
+
   interface
     subroutine psb_s_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_, &
@@ -123,24 +127,34 @@ module psb_s_bjacprec
       class(psb_i_base_vect_type), intent(in), optional  :: imold
     end subroutine psb_s_bjac_precbld
   end interface
-  
+
   interface
     subroutine psb_s_bjac_precseti(prec,what,val,info)
       import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
       class(psb_s_bjac_prec_type),intent(inout) :: prec
-      integer(psb_ipk_), intent(in)                      :: what 
-      integer(psb_ipk_), intent(in)                      :: val 
+      integer(psb_ipk_), intent(in)                      :: what
+      integer(psb_ipk_), intent(in)                      :: val
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_s_bjac_precseti
   end interface
-  
+
+  interface
+    subroutine psb_s_bjac_precsetr(prec,what,val,info)
+      import :: psb_ipk_, psb_desc_type, psb_s_bjac_prec_type, psb_s_vect_type, psb_spk_
+      class(psb_s_bjac_prec_type),intent(inout) :: prec
+      integer(psb_ipk_), intent(in)                      :: what
+      real(psb_spk_), intent(in)                      :: val
+      integer(psb_ipk_), intent(out)                     :: info
+    end subroutine psb_s_bjac_precsetr
+  end interface
+
 
 contains
 
   subroutine psb_s_bjac_precdescr(prec,iout,root)
     use psb_penv_mod
     use psb_error_mod
-    implicit none 
+    implicit none
 
     class(psb_s_bjac_prec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
@@ -154,19 +168,19 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-   
-    if (present(iout)) then 
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
-    if (present(root)) then 
+    if (present(root)) then
       root_ = root
     else
       root_ = psb_root_
     end if
 
-    if (.not.allocated(prec%iprcparm)) then 
+    if (.not.allocated(prec%iprcparm)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
       goto 9999
@@ -180,26 +194,26 @@ contains
          &  write(iout_,*) trim(prec%desc_prefix()),' ',&
          & 'Block Jacobi with: ',&
          &  fact_names(prec%iprcparm(psb_f_type_))
-    
+
     call psb_erractionrestore(err_act)
     return
 
 9999 call psb_error_handler(err_act)
 
     return
-    
+
   end subroutine psb_s_bjac_precdescr
 
 
   function psb_s_bjac_sizeof(prec) result(val)
     class(psb_s_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + psb_sizeof_sp * prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%sizeof()
       val = val + prec%av(psb_u_pr_)%sizeof()
     endif
@@ -210,12 +224,12 @@ contains
 
     class(psb_s_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%get_nzeros()
       val = val + prec%av(psb_u_pr_)%get_nzeros()
     endif
@@ -236,18 +250,18 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-    if (allocated(prec%av)) then 
-      do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      do i=1,size(prec%av)
         call prec%av(i)%free()
       enddo
       deallocate(prec%av,stat=info)
     end if
 
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       call prec%dv%free(info)
       if (info == 0) deallocate(prec%dv,stat=info)
     end if
-    if (allocated(prec%iprcparm)) then 
+    if (allocated(prec%iprcparm)) then
       deallocate(prec%iprcparm,stat=info)
     end if
     call psb_erractionrestore(err_act)
@@ -283,19 +297,19 @@ contains
          & allocate(psb_s_bjac_prec_type :: precout, stat=info)
     if (info /= 0) goto 9999
     select type(pout => precout)
-    type is (psb_s_bjac_prec_type) 
+    type is (psb_s_bjac_prec_type)
       call pout%set_ctxt(prec%get_ctxt())
 
-      if (allocated(prec%av)) then 
+      if (allocated(prec%av)) then
         allocate(pout%av(size(prec%av)),stat=info)
-        do i=1,size(prec%av) 
+        do i=1,size(prec%av)
           if (info /= psb_success_) exit
           call prec%av(i)%clone(pout%av(i),info)
         enddo
         if (info /= psb_success_) goto 9999
       end if
 
-      if (allocated(prec%dv)) then 
+      if (allocated(prec%dv)) then
         allocate(pout%dv,stat=info)
         if (info == 0) call prec%dv%clone(pout%dv,info)
       end if
@@ -316,7 +330,7 @@ contains
   subroutine psb_s_bjac_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_s_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -326,11 +340,11 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act, i
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_s_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
     if (allocated(prec%wrk)) then
       if (size(prec%wrk)<2) then
@@ -343,11 +357,11 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     if (.not.allocated(prec%wrk)) then
-      if (.not.present(desc)) then 
+      if (.not.present(desc)) then
         info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?");   goto 9999
-      end if      
+      end if
       allocate(prec%wrk(2),stat=info)
       do i=1, 2
         if (info == 0)   call psb_geall(prec%wrk(i),desc,info)
@@ -357,19 +371,19 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_s_bjac_allocate_wrk
 
   subroutine psb_s_bjac_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_s_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -378,14 +392,14 @@ contains
     integer(psb_ipk_) :: err_act
     integer(psb_ipk_) :: i
     character(len=20) :: name
-    
+
     info=psb_success_
     name = 'psb_s_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    info = psb_success_ 
+    info = psb_success_
     if (allocated(prec%wrk)) then
       do i=1,size(prec%wrk)
         if (info == 0) call prec%wrk(i)%free(info)
@@ -395,29 +409,29 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_s_bjac_free_wrk
 
   function psb_s_bjac_is_allocated_wrk(prec) result(res)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_s_bjac_prec_type), intent(in) :: prec
     logical :: res
 
-    ! In the base version we can say yes, because 
+    ! In the base version we can say yes, because
     ! there is nothing to allocate
 
     res = allocated(prec%wrk)
-    
+
   end function psb_s_bjac_is_allocated_wrk
 
-  
+
 end module psb_s_bjacprec
diff --git a/prec/psb_s_invk_fact_mod.f90 b/prec/psb_s_invk_fact_mod.f90
new file mode 100644
index 00000000..6b0d3553
--- /dev/null
+++ b/prec/psb_s_invk_fact_mod.f90
@@ -0,0 +1,165 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_s_invk_fact_mod.f90
+!
+! Module: psb_s_invk_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_s_invk_solver, but not visible to the end user.
+!
+!
+module psb_s_invk_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invk_fact
+    subroutine psb_s_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+      ! import
+      import psb_sspmat_type, psb_ipk_, psb_spk_, psb_desc_type
+      ! Arguments
+      type(psb_sspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)               :: fill1, fill2
+      type(psb_sspmat_type), intent(inout)        :: lmat, umat
+      real(psb_spk_), allocatable                 :: d(:)
+      Type(psb_desc_type), Intent(inout)          :: desc
+      integer(psb_ipk_), intent(out)              :: info
+      type(psb_sspmat_type), intent(in), optional :: blck
+    end subroutine
+  end interface
+
+  ! Inner workings
+  interface psb_sparse_invk
+    subroutine psb_ssparse_invk(n,a,z,fill_in,info,inlevs)
+      ! Import
+      import psb_ipk_, psb_sspmat_type
+      ! Arguments
+      integer(psb_ipk_), intent(in)           :: n
+      type(psb_sspmat_type), intent(in)       :: a
+      type(psb_sspmat_type), intent(inout)    :: z
+      integer(psb_ipk_), intent(in)           :: fill_in
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: inlevs(:)
+    end subroutine psb_ssparse_invk
+  end interface
+
+  interface psb_invk_inv
+    subroutine psb_sinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,&
+         & nidx,idxs,info)
+
+      use psb_base_mod, only : psb_sspmat_type, psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+
+      ! Arguments
+      type(psb_i_heap), intent(inout)               :: heap
+      integer(psb_ipk_), intent(in)                 :: i, fill_in
+      integer(psb_ipk_), intent(inout)              :: nidx,info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)                 :: uia1(:),uia2(:),uplevs(:)
+      real(psb_spk_), intent(in)                    :: uaspk(:)
+      real(psb_spk_), intent(inout)                 :: row(:)
+
+
+    end subroutine psb_sinvk_inv
+  end interface
+
+  interface  psb_invk_copyin
+    subroutine psb_s_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,&
+         & ktrw,trw,info,sign,inlevs)
+      ! Import
+      use psb_base_mod, only : psb_s_csr_sparse_mat, psb_s_coo_sparse_mat,&
+           & psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      type(psb_s_csr_sparse_mat), intent(in)    :: a
+      type(psb_s_coo_sparse_mat), intent(inout) :: trw
+      integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+      integer(psb_ipk_), intent(inout)          :: ktrw,info
+      integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+      real(psb_spk_), intent(inout)             :: row(:)
+      type(psb_i_heap), intent(inout)           :: heap
+      real(psb_spk_), optional, intent(in)      :: sign
+      integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+    end subroutine psb_s_invk_copyin
+  end interface
+
+  interface psb_invk_copyout
+    subroutine psb_s_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+         &  l2,uia1,uia2,uaspk,info)
+      use psb_base_mod, only : psb_sspmat_type, psb_spk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+      real(psb_spk_), allocatable, intent(inout)    :: uaspk(:)
+      real(psb_spk_), intent(inout)                 :: row(:)
+    end subroutine psb_s_invk_copyout
+  end interface
+
+end module
diff --git a/prec/psb_s_invt_fact_mod.f90 b/prec/psb_s_invt_fact_mod.f90
new file mode 100644
index 00000000..2c9ce38c
--- /dev/null
+++ b/prec/psb_s_invt_fact_mod.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_s_invt_fact_mod.f90
+!
+! Module: psb_s_invt_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_s_invt_solver, but not visible to the end user.
+!
+!
+module psb_s_invt_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invt_fact
+      subroutine psb_s_invt_bld(a,fillin,invfill,thresh,invthresh,&
+         & lmat,d,umat,desc,info,blck)
+         ! Import
+         import psb_sspmat_type, psb_spk_, psb_ipk_, psb_desc_type
+         ! Arguments
+         type(psb_sspmat_type), intent(in), target   :: a
+         integer(psb_ipk_), intent(in)               :: fillin,invfill
+         real(psb_spk_), intent(in)                :: thresh
+         real(psb_spk_), intent(in)                  :: invthresh
+         type(psb_sspmat_type), intent(inout)        :: lmat, umat
+         real(psb_spk_), allocatable                 :: d(:)
+         Type(psb_desc_type), Intent(inout)          :: desc
+         integer(psb_ipk_), intent(out)              :: info
+         type(psb_sspmat_type), intent(in), optional :: blck
+      end subroutine psb_s_invt_bld
+  end interface
+
+  ! Interfaces for the inner workings
+
+  interface
+      subroutine psb_ssparse_invt(n,a,z,nzrmax,sp_thresh,info)
+         ! Import
+         import psb_sspmat_type, psb_spk_, psb_ipk_
+         ! Arguments
+         integer(psb_ipk_), intent(in)        :: n
+         type(psb_sspmat_type), intent(in)    :: a
+         type(psb_sspmat_type), intent(inout) :: z
+         integer(psb_ipk_), intent(in)        :: nzrmax
+         real(psb_spk_), intent(in)           :: sp_thresh
+         integer(psb_ipk_), intent(out)       :: info
+      end subroutine psb_ssparse_invt
+  end interface
+
+  interface
+     subroutine psb_s_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+          & irwt,ktrw,trw,info,sign)
+       ! Import
+       import psb_s_csr_sparse_mat, psb_s_coo_sparse_mat, psb_spk_, &
+         & psb_ipk_, psb_i_heap
+       ! Arguments
+       type(psb_s_csr_sparse_mat), intent(in)    :: a
+       type(psb_s_coo_sparse_mat), intent(inout) :: trw
+       integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+       integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+       integer(psb_ipk_), intent(inout)          :: irwt(:)
+       real(psb_spk_), intent(inout)          :: nrmi
+       real(psb_spk_), intent(inout)            :: row(:)
+       type(psb_i_heap), intent(inout)           :: heap
+       real(psb_spk_), intent(in), optional    :: sign
+    end subroutine psb_s_invt_copyin
+  end interface
+
+  interface
+     subroutine psb_s_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+          & nidx,idxs,l2,ja,irp,val,info)
+      ! Import
+      import psb_spk_, psb_ipk_
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+      integer(psb_ipk_), intent(in)                 :: idxs(:)
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+      real(psb_spk_), intent(in)                    :: thres,nrmi
+      real(psb_spk_),allocatable, intent(inout)     :: val(:)
+      real(psb_spk_), intent(inout)                 :: row(:)
+     end subroutine psb_s_invt_copyout
+  end interface
+
+  interface psb_invt_inv
+    subroutine psb_s_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, &
+      & nidx,idxs,info)
+      ! import
+      import psb_spk_, psb_i_heap, psb_ipk_
+      ! Arguments
+      type(psb_i_heap), intent(inout)     :: heap
+      integer(psb_ipk_), intent(in)       :: i
+      integer(psb_ipk_), intent(inout)    :: nidx,info
+      integer(psb_ipk_), intent(inout)    :: irwt(:)
+      real(psb_spk_), intent(in)          :: thres,nrmi
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+      real(psb_spk_), intent(in)          :: val(:)
+      real(psb_spk_), intent(inout)       :: row(:)
+    end subroutine
+  end interface
+
+contains
+
+end module psb_s_invt_fact_mod
diff --git a/prec/psb_s_prec_type.f90 b/prec/psb_s_prec_type.f90
index b6c43c8a..764e9109 100644
--- a/prec/psb_s_prec_type.f90
+++ b/prec/psb_s_prec_type.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!	Module to   define PREC_DATA,           !!
 !!      structure for preconditioning.          !!
@@ -54,6 +54,10 @@ module psb_s_prec_type
     procedure, pass(prec)               :: build  => psb_sprecbld
     procedure, pass(prec)               :: init   => psb_sprecinit
     procedure, pass(prec)               :: descr  => psb_sfile_prec_descr
+    procedure, pass(prec)               :: cseti  => psb_scprecseti
+    procedure, pass(prec)               :: csetc  => psb_scprecsetc
+    procedure, pass(prec)               :: csetr  => psb_scprecsetr
+    generic, public                     :: set => cseti, csetc, csetr
     procedure, pass(prec)               :: allocate_wrk => psb_s_allocate_wrk
     procedure, pass(prec)               :: free_wrk => psb_s_free_wrk
     procedure, pass(prec)               :: is_allocated_wrk => psb_s_is_allocated_wrk
@@ -102,7 +106,7 @@ module psb_s_prec_type
     module procedure psb_sprec_sizeof
   end interface
 
-  interface 
+  interface
     subroutine psb_s_apply2_vect(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_
       type(psb_desc_type),intent(in)       :: desc_data
@@ -114,8 +118,8 @@ module psb_s_prec_type
       real(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_s_apply2_vect
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_s_apply1_vect(prec,x,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_
         type(psb_desc_type),intent(in)       :: desc_data
@@ -126,7 +130,7 @@ module psb_s_prec_type
       real(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_s_apply1_vect
   end interface
-  
+
   interface
     subroutine psb_s_apply2v(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_
@@ -139,8 +143,8 @@ module psb_s_prec_type
       real(psb_spk_),intent(inout), optional, target :: work(:)
     end subroutine psb_s_apply2v
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_s_apply1v(prec,x,desc_data,info,trans)
       import :: psb_ipk_, psb_desc_type, psb_sprec_type, psb_s_vect_type, psb_spk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -150,56 +154,89 @@ module psb_s_prec_type
       character(len=1), optional        :: trans
     end subroutine psb_s_apply1v
   end interface
-  
+
+  interface
+  subroutine psb_scprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_sprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    integer(psb_ipk_), intent(in)            :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_scprecseti
+  subroutine psb_scprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_sprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    real(psb_spk_), intent(in)             :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_scprecsetr
+  subroutine psb_scprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+    import :: psb_sprec_type, psb_sspmat_type, psb_desc_type, psb_spk_, &
+      & psb_ipk_
+    class(psb_sprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    character(len=*), intent(in)             :: string
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_scprecsetc
+end interface
+
 contains
 
   subroutine psb_sfile_prec_descr(prec,iout, root)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_sprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
     integer(psb_ipk_), intent(in), optional    :: root
     integer(psb_ipk_) :: iout_,info
-    character(len=20) :: name='prec_descr' 
-    
-    if (present(iout)) then 
+    character(len=20) :: name='prec_descr'
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
     end if
     call prec%prec%descr(iout=iout,root=root)
-    
+
   end subroutine psb_sfile_prec_descr
 
   subroutine psb_s_prec_dump(prec,info,prefix,head)
-    implicit none 
+    implicit none
     type(psb_sprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(out)             :: info
     character(len=*), intent(in), optional :: prefix,head
-    !  len of prefix_ 
+    !  len of prefix_
 
     info = 0
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to dump a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%dump(info,prefix,head)
-    
-    
+
+
   end subroutine psb_s_prec_dump
 
   subroutine psb_s_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_sprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -209,33 +246,33 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_s_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_s_allocate_wrk
 
   subroutine psb_s_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_sprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -243,47 +280,47 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_s_free_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to free a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%free_wrk(info)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_s_free_wrk
 
   function psb_s_is_allocated_wrk(prec) result(res)
     implicit none
-    
+
     ! Arguments
     class(psb_sprec_type), intent(in) :: prec
     logical :: res
 
     if (.not.allocated(prec%prec)) then
       res = .false.
-    else 
+    else
       res = prec%prec%is_allocated_wrk()
     end if
-    
+
   end function psb_s_is_allocated_wrk
 
   subroutine psb_s_precfree(p,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_sprec_type), intent(inout) :: p
     integer(psb_ipk_), intent(out)                :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -303,14 +340,14 @@ contains
     return
 
 9999 call psb_error_handler(err_act)
-    
+
     return
 
   end subroutine psb_s_precfree
 
   subroutine psb_s_prec_free(prec,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_sprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)         :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -324,7 +361,7 @@ contains
 
     me=-1
 
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%free(info)
       if (info /= psb_success_) goto 9999
       deallocate(prec%prec,stat=info)
@@ -343,26 +380,26 @@ contains
     class(psb_sprec_type), intent(in) :: prec
     integer(psb_epk_) :: val
     integer(psb_ipk_) :: i
-    
+
     val = 0
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       val = val + prec%prec%sizeof()
     end if
-    
+
   end function psb_sprec_sizeof
 
   subroutine psb_s_prec_clone(prec,precout,info)
-    implicit none 
+    implicit none
     class(psb_sprec_type), intent(inout) :: prec
     class(psb_sprec_type), intent(inout) :: precout
     integer(psb_ipk_), intent(out)             :: info
 
     info = psb_success_
     call prec%free(info)
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%clone(precout%prec,info)
     end if
-    
+
   end subroutine psb_s_prec_clone
 
 end module psb_s_prec_type
diff --git a/prec/psb_z_ainv_fact_mod.f90 b/prec/psb_z_ainv_fact_mod.f90
new file mode 100644
index 00000000..490fe132
--- /dev/null
+++ b/prec/psb_z_ainv_fact_mod.f90
@@ -0,0 +1,98 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from MLD2P4, original copyright below.
+!
+!
+!                             MLD2P4  version 2.2
+!    MultiLevel Domain Decomposition Parallel Preconditioners Package
+!               based on PSBLAS (Parallel Sparse BLAS version 3.5)
+!
+!    (C) Copyright 2008-2018
+!
+!        Salvatore Filippone
+!        Pasqua D'Ambra
+!        Daniela di Serafino
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the MLD2P4 group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+! File: psb_z_ainv_fact_mod.f90
+!
+! Module: psb_z_ainv_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_z_ainv_solver, but not visible to the end user.
+!
+!
+module psb_z_ainv_fact_mod
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_ainv_fact
+    subroutine psb_z_ainv_bld(a,alg,fillin,thresh,wmat,d,zmat,desc,info,blck,iscale)
+      import psb_zspmat_type, psb_dpk_, psb_ipk_, psb_desc_type
+      type(psb_zspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)                 :: fillin,alg
+      real(psb_dpk_), intent(in)                  :: thresh
+      type(psb_zspmat_type), intent(inout)        :: wmat, zmat
+      complex(psb_dpk_), allocatable                  :: d(:)
+      Type(psb_desc_type), Intent(in)               :: desc
+      integer(psb_ipk_), intent(out)                :: info
+      type(psb_zspmat_type), intent(in), optional :: blck
+      integer(psb_ipk_), intent(in), optional       :: iscale
+    end subroutine psb_z_ainv_bld
+  end interface
+
+end module psb_z_ainv_fact_mod
diff --git a/prec/psb_z_ainv_tools_mod.f90 b/prec/psb_z_ainv_tools_mod.f90
new file mode 100644
index 00000000..f611c2a7
--- /dev/null
+++ b/prec/psb_z_ainv_tools_mod.f90
@@ -0,0 +1,132 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!
+!
+module psb_z_ainv_tools_mod
+
+  interface sp_drop
+    subroutine psb_z_sp_drop(idiag,nzrmax,sp_thresh,nz,iz,valz,info)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+      implicit none
+      real(psb_dpk_), intent(in)      :: sp_thresh
+      integer(psb_ipk_), intent(in)     :: idiag, nzrmax
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: iz(:)
+      complex(psb_dpk_), intent(inout)    :: valz(:)
+      integer(psb_ipk_), intent(out)    :: info
+    end subroutine psb_z_sp_drop
+  end interface
+
+  interface rwclip
+    subroutine psb_z_rwclip(nz,ia,ja,val,imin,imax,jmin,jmax)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+
+      implicit none
+      integer(psb_ipk_), intent(inout)  :: nz
+      integer(psb_ipk_), intent(inout)  :: ia(*), ja(*)
+      complex(psb_dpk_), intent(inout)    :: val(*)
+      integer(psb_ipk_), intent(in)     :: imin,imax,jmin,jmax
+    end subroutine psb_z_rwclip
+  end interface
+
+  interface sparsify
+    subroutine psb_z_sparsify(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,info, &
+         & istart,iheap,ikr)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_i_heap
+      implicit none
+
+      real(psb_dpk_), intent(in)            :: sp_thresh
+      integer(psb_ipk_), intent(in)           :: idiag, n, nzrmax
+      complex(psb_dpk_), intent(inout)          :: zw(:)
+      integer(psb_ipk_), intent(out)          :: nz
+      integer(psb_ipk_), intent(out)          :: iz(:)
+      complex(psb_dpk_), intent(out)            :: valz(:)
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: istart
+      type(psb_i_heap), optional              :: iheap
+      integer(psb_ipk_), optional             :: ikr(:)
+    end subroutine psb_z_sparsify
+    subroutine psb_z_sparsify_list(idiag,nzrmax,sp_thresh,n,zw,nz,iz,valz,lhead,listv,ikr,info)
+      use psb_base_mod, only : psb_dpk_, psb_ipk_
+      implicit none
+
+      real(psb_dpk_), intent(in)     :: sp_thresh
+      integer(psb_ipk_), intent(in)    :: idiag, n, nzrmax
+      complex(psb_dpk_), intent(inout)   :: zw(:)
+      integer(psb_ipk_), intent(out)   :: nz
+      integer(psb_ipk_), intent(out)   :: iz(:)
+      complex(psb_dpk_), intent(out)      :: valz(:)
+      integer(psb_ipk_), intent(out)   :: info
+      integer(psb_ipk_), intent(inout) :: lhead, listv(:)
+      integer(psb_ipk_)                :: ikr(:)
+    end subroutine psb_z_sparsify_list
+
+  end interface
+
+end module psb_z_ainv_tools_mod
diff --git a/prec/psb_z_biconjg_mod.F90 b/prec/psb_z_biconjg_mod.F90
new file mode 100644
index 00000000..b40485e7
--- /dev/null
+++ b/prec/psb_z_biconjg_mod.F90
@@ -0,0 +1,367 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG-AINV, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+module psb_z_biconjg_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_sparse_biconjg
+    module procedure psb_zsparse_biconjg
+  end interface
+
+  abstract interface
+    subroutine psb_zsparse_biconjg_variant(n,a,p,z,w,nzrmax,sp_thresh,info)
+      use psb_base_mod, only : psb_z_csr_sparse_mat, psb_z_csc_sparse_mat, &
+           &  psb_dpk_, psb_ipk_
+      !
+      implicit none
+      integer(psb_ipk_), intent(in)               :: n
+      type(psb_z_csr_sparse_mat), intent(in)    :: a
+      type(psb_z_csc_sparse_mat), intent(inout) :: z,w
+      integer(psb_ipk_), intent(in)               :: nzrmax
+      real(psb_dpk_), intent(in)                :: sp_thresh
+      complex(psb_dpk_), intent(out)                :: p(:)
+      integer(psb_ipk_), intent(out)              :: info
+    end subroutine psb_zsparse_biconjg_variant
+  end interface
+
+
+  procedure(psb_zsparse_biconjg_variant) :: psb_zsparse_biconjg_llk,&
+       & psb_zsparse_biconjg_s_llk,  psb_zsparse_biconjg_s_ft_llk,&
+       &  psb_zsparse_biconjg_llk_noth, psb_zsparse_biconjg_mlk
+
+#if defined(HAVE_TUMA_SAINV)
+  procedure(psb_zsparse_biconjg_variant)  ::  psb_zsparse_tuma_sainv,&
+       & psb_zsparse_tuma_lainv
+#endif
+
+contains
+
+  subroutine psb_zsparse_biconjg(alg,n,acsr,p,z,w,nzrmax,sp_thresh,info)
+    use psb_base_mod
+    use psb_prec_const_mod
+    integer(psb_ipk_), intent(in)            :: alg,n
+    type(psb_z_csr_sparse_mat), intent(in) :: acsr
+    type(psb_zspmat_type), intent(out)     :: z, w
+    integer(psb_ipk_), intent(in)            :: nzrmax
+    real(psb_dpk_), intent(in)             :: sp_thresh
+    complex(psb_dpk_), intent(out)             :: p(:)
+    integer(psb_ipk_), intent(out)           :: info
+
+    type(psb_z_csc_sparse_mat)             :: zcsc,wcsc
+    integer(psb_ipk_) :: i,j,k,nrm
+    integer(psb_ipk_) :: err_act
+    character(len=20)  :: name='psb_sparse_biconjg'
+    integer(psb_ipk_), parameter :: variant=1
+
+
+    info = psb_success_
+    call psb_erractionsave(err_act)
+    if (psb_errstatus_fatal()) then
+      info = psb_err_internal_error_; goto 9999
+    end if
+
+    if (size(p)<n) then
+      write(psb_err_unit,*) 'Size of P wrong'
+      info = psb_err_internal_error_
+      call psb_errpush(psb_err_internal_error_,name,a_err='Allocate')
+      goto 9999
+    end if
+
+    if (nzrmax <= 0) write(psb_out_unit,'("Out nzrmax = ",i0)') nzrmax
+
+    select case(alg)
+    case (psb_ainv_llk_)
+      call psb_zsparse_biconjg_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_llk_)
+      call psb_zsparse_biconjg_s_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_mlk_)
+      call psb_zsparse_biconjg_mlk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_s_ft_llk_)
+      call psb_zsparse_biconjg_s_ft_llk(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+    case (psb_ainv_llk_noth_)
+      call psb_zsparse_biconjg_llk_noth(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#if defined(HAVE_TUMA_SAINV)
+!    case (psb_ainv_s_tuma_)
+!      call psb_zsparse_tuma_sainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!    case (psb_ainv_l_tuma_)
+!      call psb_zsparse_tuma_lainv(n,acsr,p,zcsc,wcsc,nzrmax,sp_thresh,info)
+!#endif
+    case default
+      info = psb_err_internal_error_
+      call psb_errpush(info,name,a_err='Invalid alg')
+      goto 9999
+    end select
+
+    if (info /= 0) then
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='sparse_orth')
+      goto 9999
+    end if
+
+    call z%mv_from(zcsc)
+    call z%cscnv(info,type='CSR')
+    call w%mv_from(wcsc)
+    call w%transp()
+    call w%cscnv(info,type='CSR')
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+  return
+end subroutine psb_zsparse_biconjg
+
+
+  subroutine psb_z_spmspv(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = A x  sparse-sparse mode, A in CSC
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    complex(psb_dpk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    complex(psb_dpk_), intent(inout)           :: vy(:)
+    type(psb_z_csc_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_) :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable        :: iv(:)
+    complex(psb_dpk_), allocatable           :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spmspv ',alpha,beta
+    if (beta == -zone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == zzero) then
+      do i=1, ny
+        vy(i) = zzero
+      end do
+    else if (beta /= zone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == zzero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%icp(j)
+      na = a%icp(j+1) - a%icp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ia(k:k+na-1), a%val(k:k+na-1),&
+           & zone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_z_spmspv
+
+
+  subroutine psb_z_spvspm(alpha,a,nx,ix,vx,beta,ny,iy,vy, info)
+    !
+    !  y = x A  sparse-sparse mode, A in CSR
+    !
+    use psb_base_mod
+    implicit none
+    integer(psb_ipk_), intent(in)            :: nx, ix(:)
+    complex(psb_dpk_), intent(in)              :: alpha, beta, vx(:)
+    integer(psb_ipk_), intent(inout)         :: ny, iy(:)
+    complex(psb_dpk_), intent(inout)           :: vy(:)
+    type(psb_z_csr_sparse_mat), intent(in) :: a
+    integer(psb_ipk_), intent(out)           :: info
+
+    integer(psb_ipk_)              :: i,j,k,m,n, nv, na, iszy
+    integer(psb_ipk_), allocatable :: iv(:)
+    complex(psb_dpk_), allocatable   :: vv(:)
+
+    info = 0
+! !$    write(0,*) 'd_spvspm ',alpha,beta
+    if (beta == -zone) then
+      do i=1, ny
+        vy(i) = -vy(i)
+      end do
+    else if (beta == zzero) then
+      do i=1, ny
+        vy(i) = zzero
+      end do
+    else if (beta /= zone) then
+      do i=1, ny
+        vy(i) = vy(i) * beta
+      end do
+    end if
+    if (alpha == zzero)  return
+    iszy = min(size(iy),size(vy))
+    m = a%get_nrows()
+    n = a%get_ncols()
+
+    if ((ny > m) .or. (nx > n)) then
+      write(0,*) 'Wrong input spmspv rows: ',m,ny,&
+           & ' cols: ',n,nx
+      info = -4
+      return
+    end if
+
+    allocate(iv(m), vv(m), stat=info)
+    if (info /= 0) then
+      write(0,*) 'Allocation error in spmspv'
+      info = -999
+      return
+    endif
+
+    do i = 1, nx
+      j  = ix(i)
+      ! Access column J of A
+      k  = a%irp(j)
+      na = a%irp(j+1) - a%irp(j)
+      call psb_nspaxpby(nv,iv,vv,&
+           & (alpha*vx(i)), na, a%ja(k:k+na-1), a%val(k:k+na-1),&
+           & zone, ny, iy, vy, info)
+
+      if (info /= 0) then
+        write(0,*) 'Internal error in spmspv from nspaxpby'
+        info = -998
+        return
+      endif
+      if (nv > iszy) then
+        write(0,*) 'Error in spmspv: out of memory for output'
+        info = -997
+        return
+      endif
+      ny = nv
+      iy(1:ny) = iv(1:ny)
+      vy(1:ny) = vv(1:ny)
+    end do
+  end subroutine psb_z_spvspm
+
+end module psb_z_biconjg_mod
diff --git a/prec/psb_z_bjacprec.f90 b/prec/psb_z_bjacprec.f90
index 97751de8..2c1acf73 100644
--- a/prec/psb_z_bjacprec.f90
+++ b/prec/psb_z_bjacprec.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,15 +27,19 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 module psb_z_bjacprec
 
   use psb_z_base_prec_mod
   use psb_z_ilu_fact_mod
-  
+  use psb_z_ainv_fact_mod
+  use psb_z_invk_fact_mod
+  use psb_z_invt_fact_mod
+
   type, extends(psb_z_base_prec_type)   :: psb_z_bjac_prec_type
     integer(psb_ipk_), allocatable      :: iprcparm(:)
+    real(psb_dpk_), allocatable       :: rprcparm(:)
     type(psb_zspmat_type), allocatable  :: av(:)
     type(psb_z_vect_type), allocatable  :: dv, wrk(:)
   contains
@@ -44,6 +48,7 @@ module psb_z_bjacprec
     procedure, pass(prec) :: precbld   => psb_z_bjac_precbld
     procedure, pass(prec) :: precinit  => psb_z_bjac_precinit
     procedure, pass(prec) :: precseti  => psb_z_bjac_precseti
+    procedure, pass(prec) :: precsetr  => psb_z_bjac_precsetr
     procedure, pass(prec) :: precdescr => psb_z_bjac_precdescr
     procedure, pass(prec) :: dump      => psb_z_bjac_dump
     procedure, pass(prec) :: clone     => psb_z_bjac_clone
@@ -55,15 +60,14 @@ module psb_z_bjacprec
     procedure, pass(prec) :: is_allocated_wrk => psb_z_bjac_is_allocated_wrk
   end type psb_z_bjac_prec_type
 
-  private :: psb_z_bjac_sizeof, psb_z_bjac_precdescr, psb_z_bjac_get_nzeros
- 
-
   character(len=15), parameter, private :: &
-       &  fact_names(0:2)=(/'None          ','ILU(n)        ',&
-       &  'ILU(eps)      '/)
+       &  fact_names(0:6)=(/'None          ','ILU(0)        ',&
+       &  'ILU(n)        ','ILU(eps)      ','AINV(eps)     ',&
+       &  'INVT          ','INVK          '/)
 
-  
-  interface  
+  private :: psb_z_bjac_sizeof, psb_z_bjac_precdescr, psb_z_bjac_get_nzeros
+
+  interface
     subroutine psb_z_bjac_dump(prec,info,prefix,head)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
       class(psb_z_bjac_prec_type), intent(in) :: prec
@@ -72,7 +76,7 @@ module psb_z_bjacprec
     end subroutine psb_z_bjac_dump
   end interface
 
-  interface  
+  interface
     subroutine psb_z_bjac_apply_vect(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -89,7 +93,7 @@ module psb_z_bjacprec
   interface
     subroutine psb_z_bjac_apply(alpha,prec,x,beta,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
-      
+
       type(psb_desc_type),intent(in)    :: desc_data
       class(psb_z_bjac_prec_type), intent(inout)  :: prec
       complex(psb_dpk_),intent(in)         :: alpha,beta
@@ -100,7 +104,7 @@ module psb_z_bjacprec
       complex(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_z_bjac_apply
   end interface
-  
+
   interface
     subroutine psb_z_bjac_precinit(prec,info)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
@@ -108,7 +112,7 @@ module psb_z_bjacprec
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_z_bjac_precinit
   end interface
-  
+
   interface
     subroutine psb_z_bjac_precbld(a,desc_a,prec,info,amold,vmold,imold)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_, &
@@ -123,24 +127,34 @@ module psb_z_bjacprec
       class(psb_i_base_vect_type), intent(in), optional  :: imold
     end subroutine psb_z_bjac_precbld
   end interface
-  
+
   interface
     subroutine psb_z_bjac_precseti(prec,what,val,info)
       import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
       class(psb_z_bjac_prec_type),intent(inout) :: prec
-      integer(psb_ipk_), intent(in)                      :: what 
-      integer(psb_ipk_), intent(in)                      :: val 
+      integer(psb_ipk_), intent(in)                      :: what
+      integer(psb_ipk_), intent(in)                      :: val
       integer(psb_ipk_), intent(out)                     :: info
     end subroutine psb_z_bjac_precseti
   end interface
-  
+
+  interface
+    subroutine psb_z_bjac_precsetr(prec,what,val,info)
+      import :: psb_ipk_, psb_desc_type, psb_z_bjac_prec_type, psb_z_vect_type, psb_dpk_
+      class(psb_z_bjac_prec_type),intent(inout) :: prec
+      integer(psb_ipk_), intent(in)                      :: what
+      real(psb_dpk_), intent(in)                      :: val
+      integer(psb_ipk_), intent(out)                     :: info
+    end subroutine psb_z_bjac_precsetr
+  end interface
+
 
 contains
 
   subroutine psb_z_bjac_precdescr(prec,iout,root)
     use psb_penv_mod
     use psb_error_mod
-    implicit none 
+    implicit none
 
     class(psb_z_bjac_prec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
@@ -154,19 +168,19 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-   
-    if (present(iout)) then 
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
-    if (present(root)) then 
+    if (present(root)) then
       root_ = root
     else
       root_ = psb_root_
     end if
 
-    if (.not.allocated(prec%iprcparm)) then 
+    if (.not.allocated(prec%iprcparm)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
       goto 9999
@@ -180,26 +194,26 @@ contains
          &  write(iout_,*) trim(prec%desc_prefix()),' ',&
          & 'Block Jacobi with: ',&
          &  fact_names(prec%iprcparm(psb_f_type_))
-    
+
     call psb_erractionrestore(err_act)
     return
 
 9999 call psb_error_handler(err_act)
 
     return
-    
+
   end subroutine psb_z_bjac_precdescr
 
 
   function psb_z_bjac_sizeof(prec) result(val)
     class(psb_z_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + (2*psb_sizeof_dp) * prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%sizeof()
       val = val + prec%av(psb_u_pr_)%sizeof()
     endif
@@ -210,12 +224,12 @@ contains
 
     class(psb_z_bjac_prec_type), intent(in) :: prec
     integer(psb_epk_) :: val
-    
+
     val = 0
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       val = val + prec%dv%get_nrows()
     endif
-    if (allocated(prec%av)) then 
+    if (allocated(prec%av)) then
       val = val + prec%av(psb_l_pr_)%get_nzeros()
       val = val + prec%av(psb_u_pr_)%get_nzeros()
     endif
@@ -236,18 +250,18 @@ contains
     call psb_erractionsave(err_act)
 
     info = psb_success_
-    if (allocated(prec%av)) then 
-      do i=1,size(prec%av) 
+    if (allocated(prec%av)) then
+      do i=1,size(prec%av)
         call prec%av(i)%free()
       enddo
       deallocate(prec%av,stat=info)
     end if
 
-    if (allocated(prec%dv)) then 
+    if (allocated(prec%dv)) then
       call prec%dv%free(info)
       if (info == 0) deallocate(prec%dv,stat=info)
     end if
-    if (allocated(prec%iprcparm)) then 
+    if (allocated(prec%iprcparm)) then
       deallocate(prec%iprcparm,stat=info)
     end if
     call psb_erractionrestore(err_act)
@@ -283,19 +297,19 @@ contains
          & allocate(psb_z_bjac_prec_type :: precout, stat=info)
     if (info /= 0) goto 9999
     select type(pout => precout)
-    type is (psb_z_bjac_prec_type) 
+    type is (psb_z_bjac_prec_type)
       call pout%set_ctxt(prec%get_ctxt())
 
-      if (allocated(prec%av)) then 
+      if (allocated(prec%av)) then
         allocate(pout%av(size(prec%av)),stat=info)
-        do i=1,size(prec%av) 
+        do i=1,size(prec%av)
           if (info /= psb_success_) exit
           call prec%av(i)%clone(pout%av(i),info)
         enddo
         if (info /= psb_success_) goto 9999
       end if
 
-      if (allocated(prec%dv)) then 
+      if (allocated(prec%dv)) then
         allocate(pout%dv,stat=info)
         if (info == 0) call prec%dv%clone(pout%dv,info)
       end if
@@ -316,7 +330,7 @@ contains
   subroutine psb_z_bjac_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_z_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -326,11 +340,11 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act, i
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_z_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
     if (allocated(prec%wrk)) then
       if (size(prec%wrk)<2) then
@@ -343,11 +357,11 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     if (.not.allocated(prec%wrk)) then
-      if (.not.present(desc)) then 
+      if (.not.present(desc)) then
         info = psb_err_internal_error_; call psb_errpush(info,name,a_err="no desc?");   goto 9999
-      end if      
+      end if
       allocate(prec%wrk(2),stat=info)
       do i=1, 2
         if (info == 0)   call psb_geall(prec%wrk(i),desc,info)
@@ -357,19 +371,19 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="allocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_z_bjac_allocate_wrk
 
   subroutine psb_z_bjac_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_z_bjac_prec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -378,14 +392,14 @@ contains
     integer(psb_ipk_) :: err_act
     integer(psb_ipk_) :: i
     character(len=20) :: name
-    
+
     info=psb_success_
     name = 'psb_z_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    info = psb_success_ 
+    info = psb_success_
     if (allocated(prec%wrk)) then
       do i=1,size(prec%wrk)
         if (info == 0) call prec%wrk(i)%free(info)
@@ -395,29 +409,29 @@ contains
     if (info /= 0) then
       info = psb_err_internal_error_; call psb_errpush(info,name,a_err="deallocate");   goto 9999
     end if
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_z_bjac_free_wrk
 
   function psb_z_bjac_is_allocated_wrk(prec) result(res)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_z_bjac_prec_type), intent(in) :: prec
     logical :: res
 
-    ! In the base version we can say yes, because 
+    ! In the base version we can say yes, because
     ! there is nothing to allocate
 
     res = allocated(prec%wrk)
-    
+
   end function psb_z_bjac_is_allocated_wrk
 
-  
+
 end module psb_z_bjacprec
diff --git a/prec/psb_z_invk_fact_mod.f90 b/prec/psb_z_invk_fact_mod.f90
new file mode 100644
index 00000000..0a1e5faf
--- /dev/null
+++ b/prec/psb_z_invk_fact_mod.f90
@@ -0,0 +1,165 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_z_invk_fact_mod.f90
+!
+! Module: psb_z_invk_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_z_invk_solver, but not visible to the end user.
+!
+!
+module psb_z_invk_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invk_fact
+    subroutine psb_z_invk_bld(a,fill1, fill2,lmat,d,umat,desc,info,blck)
+      ! import
+      import psb_zspmat_type, psb_ipk_, psb_dpk_, psb_desc_type
+      ! Arguments
+      type(psb_zspmat_type), intent(in), target   :: a
+      integer(psb_ipk_), intent(in)               :: fill1, fill2
+      type(psb_zspmat_type), intent(inout)        :: lmat, umat
+      complex(psb_dpk_), allocatable                 :: d(:)
+      Type(psb_desc_type), Intent(inout)          :: desc
+      integer(psb_ipk_), intent(out)              :: info
+      type(psb_zspmat_type), intent(in), optional :: blck
+    end subroutine
+  end interface
+
+  ! Inner workings
+  interface psb_sparse_invk
+    subroutine psb_zsparse_invk(n,a,z,fill_in,info,inlevs)
+      ! Import
+      import psb_ipk_, psb_zspmat_type
+      ! Arguments
+      integer(psb_ipk_), intent(in)           :: n
+      type(psb_zspmat_type), intent(in)       :: a
+      type(psb_zspmat_type), intent(inout)    :: z
+      integer(psb_ipk_), intent(in)           :: fill_in
+      integer(psb_ipk_), intent(out)          :: info
+      integer(psb_ipk_), intent(in), optional :: inlevs(:)
+    end subroutine psb_zsparse_invk
+  end interface
+
+  interface psb_invk_inv
+    subroutine psb_zinvk_inv(fill_in,i,row,rowlevs,heap,uia1,uia2,uaspk,uplevs,&
+         & nidx,idxs,info)
+
+      use psb_base_mod, only : psb_zspmat_type, psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+
+      ! Arguments
+      type(psb_i_heap), intent(inout)               :: heap
+      integer(psb_ipk_), intent(in)                 :: i, fill_in
+      integer(psb_ipk_), intent(inout)              :: nidx,info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)                 :: uia1(:),uia2(:),uplevs(:)
+      complex(psb_dpk_), intent(in)                    :: uaspk(:)
+      complex(psb_dpk_), intent(inout)                 :: row(:)
+
+
+    end subroutine psb_zinvk_inv
+  end interface
+
+  interface  psb_invk_copyin
+    subroutine psb_z_invk_copyin(i,m,a,jmin,jmax,row,rowlevs,heap,&
+         & ktrw,trw,info,sign,inlevs)
+      ! Import
+      use psb_base_mod, only : psb_z_csr_sparse_mat, psb_z_coo_sparse_mat,&
+           & psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      type(psb_z_csr_sparse_mat), intent(in)    :: a
+      type(psb_z_coo_sparse_mat), intent(inout) :: trw
+      integer(psb_ipk_), intent(in)             :: i,m,jmin,jmax
+      integer(psb_ipk_), intent(inout)          :: ktrw,info
+      integer(psb_ipk_), intent(inout)          :: rowlevs(:)
+      complex(psb_dpk_), intent(inout)             :: row(:)
+      type(psb_i_heap), intent(inout)           :: heap
+      real(psb_dpk_), optional, intent(in)      :: sign
+      integer(psb_ipk_), intent(in), optional   :: inlevs(:)
+    end subroutine psb_z_invk_copyin
+  end interface
+
+  interface psb_invk_copyout
+    subroutine psb_z_invk_copyout(fill_in,i,m,row,rowlevs,nidx,idxs,&
+         &  l2,uia1,uia2,uaspk,info)
+      use psb_base_mod, only : psb_zspmat_type, psb_dpk_, psb_i_heap, psb_ipk_
+      implicit none
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in, i, m, nidx
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), intent(inout)              :: rowlevs(:), idxs(:)
+      integer(psb_ipk_), allocatable, intent(inout) :: uia1(:), uia2(:)
+      complex(psb_dpk_), allocatable, intent(inout)    :: uaspk(:)
+      complex(psb_dpk_), intent(inout)                 :: row(:)
+    end subroutine psb_z_invk_copyout
+  end interface
+
+end module
diff --git a/prec/psb_z_invt_fact_mod.f90 b/prec/psb_z_invt_fact_mod.f90
new file mode 100644
index 00000000..1cdf32f4
--- /dev/null
+++ b/prec/psb_z_invt_fact_mod.f90
@@ -0,0 +1,168 @@
+!
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!    Moved here from AMG4PSBLAS, original copyright below.
+!
+!
+!                       AMG-AINV: Approximate Inverse plugin for
+!                             AMG4PSBLAS version 1.0
+!
+!    (C) Copyright 2020
+!
+!                        Salvatore Filippone  University of Rome Tor Vergata
+!
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the AMG4PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_z_invt_fact_mod.f90
+!
+! Module: psb_z_invt_fact_mod
+!
+!  This module defines some interfaces used internally by the implementation of
+!  psb_z_invt_solver, but not visible to the end user.
+!
+!
+module psb_z_invt_fact_mod
+
+  use psb_base_mod
+  use psb_prec_const_mod
+
+  interface psb_invt_fact
+      subroutine psb_z_invt_bld(a,fillin,invfill,thresh,invthresh,&
+         & lmat,d,umat,desc,info,blck)
+         ! Import
+         import psb_zspmat_type, psb_dpk_, psb_ipk_, psb_desc_type
+         ! Arguments
+         type(psb_zspmat_type), intent(in), target   :: a
+         integer(psb_ipk_), intent(in)               :: fillin,invfill
+         real(psb_dpk_), intent(in)                :: thresh
+         real(psb_dpk_), intent(in)                  :: invthresh
+         type(psb_zspmat_type), intent(inout)        :: lmat, umat
+         complex(psb_dpk_), allocatable                 :: d(:)
+         Type(psb_desc_type), Intent(inout)          :: desc
+         integer(psb_ipk_), intent(out)              :: info
+         type(psb_zspmat_type), intent(in), optional :: blck
+      end subroutine psb_z_invt_bld
+  end interface
+
+  ! Interfaces for the inner workings
+
+  interface
+      subroutine psb_zsparse_invt(n,a,z,nzrmax,sp_thresh,info)
+         ! Import
+         import psb_zspmat_type, psb_dpk_, psb_ipk_
+         ! Arguments
+         integer(psb_ipk_), intent(in)        :: n
+         type(psb_zspmat_type), intent(in)    :: a
+         type(psb_zspmat_type), intent(inout) :: z
+         integer(psb_ipk_), intent(in)        :: nzrmax
+         real(psb_dpk_), intent(in)           :: sp_thresh
+         integer(psb_ipk_), intent(out)       :: info
+      end subroutine psb_zsparse_invt
+  end interface
+
+  interface
+     subroutine psb_z_invt_copyin(i,m,a,jd,jmin,jmax,nlw,nup,jmaxup,nrmi,row,heap,&
+          & irwt,ktrw,trw,info,sign)
+       ! Import
+       import psb_z_csr_sparse_mat, psb_z_coo_sparse_mat, psb_dpk_, &
+         & psb_ipk_, psb_i_heap
+       ! Arguments
+       type(psb_z_csr_sparse_mat), intent(in)    :: a
+       type(psb_z_coo_sparse_mat), intent(inout) :: trw
+       integer(psb_ipk_), intent(in)             :: i, m,jmin,jmax,jd
+       integer(psb_ipk_), intent(inout)          :: ktrw,nlw,nup,jmaxup,info
+       integer(psb_ipk_), intent(inout)          :: irwt(:)
+       real(psb_dpk_), intent(inout)          :: nrmi
+       complex(psb_dpk_), intent(inout)            :: row(:)
+       type(psb_i_heap), intent(inout)           :: heap
+       real(psb_dpk_), intent(in), optional    :: sign
+    end subroutine psb_z_invt_copyin
+  end interface
+
+  interface
+     subroutine psb_z_invt_copyout(fill_in,thres,i,m,nlw,nup,jmaxup,nrmi,row, &
+          & nidx,idxs,l2,ja,irp,val,info)
+      ! Import
+      import psb_dpk_, psb_ipk_
+      ! Arguments
+      integer(psb_ipk_), intent(in)                 :: fill_in,i,m,nidx,nlw,nup,jmaxup
+      integer(psb_ipk_), intent(in)                 :: idxs(:)
+      integer(psb_ipk_), intent(inout)              :: l2, info
+      integer(psb_ipk_), allocatable, intent(inout) :: ja(:),irp(:)
+      real(psb_dpk_), intent(in)                    :: thres,nrmi
+      complex(psb_dpk_),allocatable, intent(inout)     :: val(:)
+      complex(psb_dpk_), intent(inout)                 :: row(:)
+     end subroutine psb_z_invt_copyout
+  end interface
+
+  interface psb_invt_inv
+    subroutine psb_z_invt_inv(thres,i,nrmi,row,heap,irwt,ja,irp,val, &
+      & nidx,idxs,info)
+      ! import
+      import psb_dpk_, psb_i_heap, psb_ipk_
+      ! Arguments
+      type(psb_i_heap), intent(inout)     :: heap
+      integer(psb_ipk_), intent(in)       :: i
+      integer(psb_ipk_), intent(inout)    :: nidx,info
+      integer(psb_ipk_), intent(inout)    :: irwt(:)
+      real(psb_dpk_), intent(in)          :: thres,nrmi
+      integer(psb_ipk_), allocatable, intent(inout) :: idxs(:)
+      integer(psb_ipk_), intent(in)       :: ja(:),irp(:)
+      complex(psb_dpk_), intent(in)          :: val(:)
+      complex(psb_dpk_), intent(inout)       :: row(:)
+    end subroutine
+  end interface
+
+contains
+
+end module psb_z_invt_fact_mod
diff --git a/prec/psb_z_prec_type.f90 b/prec/psb_z_prec_type.f90
index 7b15c8f2..f40e8004 100644
--- a/prec/psb_z_prec_type.f90
+++ b/prec/psb_z_prec_type.f90
@@ -1,9 +1,9 @@
-!   
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,8 +27,8 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 !!	Module to   define PREC_DATA,           !!
 !!      structure for preconditioning.          !!
@@ -54,6 +54,10 @@ module psb_z_prec_type
     procedure, pass(prec)               :: build  => psb_zprecbld
     procedure, pass(prec)               :: init   => psb_zprecinit
     procedure, pass(prec)               :: descr  => psb_zfile_prec_descr
+    procedure, pass(prec)               :: cseti  => psb_zcprecseti
+    procedure, pass(prec)               :: csetc  => psb_zcprecsetc
+    procedure, pass(prec)               :: csetr  => psb_zcprecsetr
+    generic, public                     :: set => cseti, csetc, csetr
     procedure, pass(prec)               :: allocate_wrk => psb_z_allocate_wrk
     procedure, pass(prec)               :: free_wrk => psb_z_free_wrk
     procedure, pass(prec)               :: is_allocated_wrk => psb_z_is_allocated_wrk
@@ -102,7 +106,7 @@ module psb_z_prec_type
     module procedure psb_zprec_sizeof
   end interface
 
-  interface 
+  interface
     subroutine psb_z_apply2_vect(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)       :: desc_data
@@ -114,8 +118,8 @@ module psb_z_prec_type
       complex(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_z_apply2_vect
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_z_apply1_vect(prec,x,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_
         type(psb_desc_type),intent(in)       :: desc_data
@@ -126,7 +130,7 @@ module psb_z_prec_type
       complex(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_z_apply1_vect
   end interface
-  
+
   interface
     subroutine psb_z_apply2v(prec,x,y,desc_data,info,trans,work)
       import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_
@@ -139,8 +143,8 @@ module psb_z_prec_type
       complex(psb_dpk_),intent(inout), optional, target :: work(:)
     end subroutine psb_z_apply2v
   end interface
-  
-  interface 
+
+  interface
     subroutine psb_z_apply1v(prec,x,desc_data,info,trans)
       import :: psb_ipk_, psb_desc_type, psb_zprec_type, psb_z_vect_type, psb_dpk_
       type(psb_desc_type),intent(in)    :: desc_data
@@ -150,56 +154,89 @@ module psb_z_prec_type
       character(len=1), optional        :: trans
     end subroutine psb_z_apply1v
   end interface
-  
+
+  interface
+  subroutine psb_zcprecseti(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_zprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    integer(psb_ipk_), intent(in)            :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_zcprecseti
+  subroutine psb_zcprecsetr(prec,what,val,info,ilev,ilmax,pos,idx)
+    import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_zprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    real(psb_dpk_), intent(in)             :: val
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_zcprecsetr
+  subroutine psb_zcprecsetc(prec,what,string,info,ilev,ilmax,pos,idx)
+    import :: psb_zprec_type, psb_zspmat_type, psb_desc_type, psb_dpk_, &
+      & psb_ipk_
+    class(psb_zprec_type), intent(inout)   :: prec
+    character(len=*), intent(in)             :: what
+    character(len=*), intent(in)             :: string
+    integer(psb_ipk_), intent(out)           :: info
+    integer(psb_ipk_), optional, intent(in)  :: ilev,ilmax,idx
+    character(len=*), optional, intent(in)   :: pos
+  end subroutine psb_zcprecsetc
+end interface
+
 contains
 
   subroutine psb_zfile_prec_descr(prec,iout, root)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_zprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(in), optional    :: iout
     integer(psb_ipk_), intent(in), optional    :: root
     integer(psb_ipk_) :: iout_,info
-    character(len=20) :: name='prec_descr' 
-    
-    if (present(iout)) then 
+    character(len=20) :: name='prec_descr'
+
+    if (present(iout)) then
       iout_ = iout
     else
-      iout_ = 6 
+      iout_ = 6
     end if
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = 1124
       call psb_errpush(info,name,a_err="preconditioner")
     end if
     call prec%prec%descr(iout=iout,root=root)
-    
+
   end subroutine psb_zfile_prec_descr
 
   subroutine psb_z_prec_dump(prec,info,prefix,head)
-    implicit none 
+    implicit none
     type(psb_zprec_type), intent(in) :: prec
     integer(psb_ipk_), intent(out)             :: info
     character(len=*), intent(in), optional :: prefix,head
-    !  len of prefix_ 
+    !  len of prefix_
 
     info = 0
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to dump a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%dump(info,prefix,head)
-    
-    
+
+
   end subroutine psb_z_prec_dump
 
   subroutine psb_z_allocate_wrk(prec,info,vmold,desc)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_zprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -209,33 +246,33 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_z_allocate_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to allocate wrk to a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%allocate_wrk(info,vmold=vmold,desc=desc)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_z_allocate_wrk
 
   subroutine psb_z_free_wrk(prec,info)
     use psb_base_mod
     implicit none
-    
+
     ! Arguments
     class(psb_zprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)        :: info
@@ -243,47 +280,47 @@ contains
     ! Local variables
     integer(psb_ipk_) :: err_act
     character(len=20)   :: name
-    
+
     info=psb_success_
     name = 'psb_z_free_wrk'
     call psb_erractionsave(err_act)
-    
+
     if (psb_get_errstatus().ne.0) goto 9999
 
-    if (.not.allocated(prec%prec)) then 
+    if (.not.allocated(prec%prec)) then
       info = -1
       write(psb_err_unit,*) 'Trying to free a non-built preconditioner'
       return
     end if
-    
+
     call prec%prec%free_wrk(info)
-    
+
     call psb_erractionrestore(err_act)
     return
-    
+
 9999 call psb_error_handler(err_act)
     return
-    
+
   end subroutine psb_z_free_wrk
 
   function psb_z_is_allocated_wrk(prec) result(res)
     implicit none
-    
+
     ! Arguments
     class(psb_zprec_type), intent(in) :: prec
     logical :: res
 
     if (.not.allocated(prec%prec)) then
       res = .false.
-    else 
+    else
       res = prec%prec%is_allocated_wrk()
     end if
-    
+
   end function psb_z_is_allocated_wrk
 
   subroutine psb_z_precfree(p,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     type(psb_zprec_type), intent(inout) :: p
     integer(psb_ipk_), intent(out)                :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -303,14 +340,14 @@ contains
     return
 
 9999 call psb_error_handler(err_act)
-    
+
     return
 
   end subroutine psb_z_precfree
 
   subroutine psb_z_prec_free(prec,info)
     use psb_base_mod
-    implicit none 
+    implicit none
     class(psb_zprec_type), intent(inout) :: prec
     integer(psb_ipk_), intent(out)         :: info
     integer(psb_ipk_) :: me, err_act,i
@@ -324,7 +361,7 @@ contains
 
     me=-1
 
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%free(info)
       if (info /= psb_success_) goto 9999
       deallocate(prec%prec,stat=info)
@@ -343,26 +380,26 @@ contains
     class(psb_zprec_type), intent(in) :: prec
     integer(psb_epk_) :: val
     integer(psb_ipk_) :: i
-    
+
     val = 0
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       val = val + prec%prec%sizeof()
     end if
-    
+
   end function psb_zprec_sizeof
 
   subroutine psb_z_prec_clone(prec,precout,info)
-    implicit none 
+    implicit none
     class(psb_zprec_type), intent(inout) :: prec
     class(psb_zprec_type), intent(inout) :: precout
     integer(psb_ipk_), intent(out)             :: info
 
     info = psb_success_
     call prec%free(info)
-    if (allocated(prec%prec)) then 
+    if (allocated(prec%prec)) then
       call prec%prec%clone(precout%prec,info)
     end if
-    
+
   end subroutine psb_z_prec_clone
 
 end module psb_z_prec_type
diff --git a/test/pargen/psb_d_pde2d.f90 b/test/pargen/psb_d_pde2d.f90
index edd5034f..1ccd7f32 100644
--- a/test/pargen/psb_d_pde2d.f90
+++ b/test/pargen/psb_d_pde2d.f90
@@ -1,9 +1,9 @@
-!    
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,23 +27,23 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 ! File: psb_d_pde2d.f90
 !
 ! Program: psb_d_pde2d
 ! This sample program solves a linear system obtained by discretizing a
-! PDE with Dirichlet BCs. 
-! 
+! PDE with Dirichlet BCs.
+!
 !
 ! The PDE is a general second order equation in 2d
 !
-!   a1 dd(u)  a2 dd(u)   b1 d(u)   b2 d(u) 
+!   a1 dd(u)  a2 dd(u)   b1 d(u)   b2 d(u)
 ! -   ------ -  ------   -----  +  ------  + c u = f
-!      dxdx     dydy        dx       dy    
+!      dxdx     dydy        dx       dy
 !
 ! with Dirichlet boundary conditions
-!   u = g 
+!   u = g
 !
 !  on the unit square  0<=x,y<=1.
 !
@@ -63,31 +63,31 @@ module psb_d_pde2d_mod
        &  psb_dspmat_type, psb_d_vect_type, dzero,&
        &  psb_d_base_sparse_mat, psb_d_base_vect_type, psb_i_base_vect_type
 
-  interface 
+  interface
     function d_func_2d(x,y) result(val)
       import :: psb_dpk_
       real(psb_dpk_), intent(in) :: x,y
       real(psb_dpk_) :: val
     end function d_func_2d
-  end interface 
+  end interface
 
   interface psb_gen_pde2d
     module procedure  psb_d_gen_pde2d
   end interface psb_gen_pde2d
-  
+
 contains
-  
+
   function d_null_func_2d(x,y) result(val)
 
     real(psb_dpk_), intent(in) :: x,y
     real(psb_dpk_) :: val
-    
+
     val = dzero
 
   end function d_null_func_2d
 
   !
-  ! functions parametrizing the differential equation 
+  ! functions parametrizing the differential equation
   !
 
   !
@@ -101,48 +101,48 @@ contains
   !
   function b1(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) :: b1
     real(psb_dpk_), intent(in) :: x,y
     b1=dzero
   end function b1
   function b2(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  b2
     real(psb_dpk_), intent(in) :: x,y
     b2=dzero
   end function b2
   function c(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  c
     real(psb_dpk_), intent(in) :: x,y
     c=0.d0
   end function c
   function a1(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
-    real(psb_dpk_) ::  a1   
+    implicit none
+    real(psb_dpk_) ::  a1
     real(psb_dpk_), intent(in) :: x,y
     a1=done/80
   end function a1
   function a2(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  a2
     real(psb_dpk_), intent(in) :: x,y
     a2=done/80
   end function a2
   function g(x,y)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  g
     real(psb_dpk_), intent(in) :: x,y
     g = dzero
     if (x == done) then
       g = done
-    else if (x == dzero) then 
+    else if (x == dzero) then
       g = exp(-y**2)
     end if
   end function g
@@ -150,7 +150,7 @@ contains
 
   !
   !  subroutine to allocate and fill in the coefficient matrix and
-  !  the rhs. 
+  !  the rhs.
   !
   subroutine psb_d_gen_pde2d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
        & f,amold,vmold,imold,partition,nrl,iv)
@@ -158,13 +158,13 @@ contains
     use psb_util_mod
     !
     !   Discretizes the partial differential equation
-    ! 
-    !   a1 dd(u)  a2 dd(u)    b1 d(u)  b2 d(u) 
+    !
+    !   a1 dd(u)  a2 dd(u)    b1 d(u)  b2 d(u)
     ! -   ------ -  ------ +  -----  +  ------  + c u = f
-    !      dxdx     dydy         dx       dy     
+    !      dxdx     dydy         dx       dy
     !
     ! with Dirichlet boundary conditions
-    !   u = g 
+    !   u = g
     !
     !  on the unit square  0<=x,y<=1.
     !
@@ -222,7 +222,7 @@ contains
     call psb_info(ctxt, iam, np)
 
 
-    if (present(f)) then 
+    if (present(f)) then
       f_ => f
     else
       f_ => d_null_func_2d
@@ -242,9 +242,9 @@ contains
     else
       partition_ = 3
     end if
-    
+
     ! initialize array descriptor and sparse matrix storage. provide an
-    ! estimate of the number of non zeroes 
+    ! estimate of the number of non zeroes
 
     m   = (1_psb_lpk_)*idim*idim
     n   = m
@@ -253,8 +253,8 @@ contains
     t0 = psb_wtime()
     select case(partition_)
     case(1)
-      ! A BLOCK partition 
-      if (present(nrl)) then 
+      ! A BLOCK partition
+      if (present(nrl)) then
         nr = nrl
       else
         !
@@ -265,46 +265,46 @@ contains
       end if
 
       nt = nr
-      call psb_sum(ctxt,nt) 
-      if (nt /= m) then 
+      call psb_sum(ctxt,nt)
+      if (nt /= m) then
         write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! First example  of use of CDALL: specify for each process a number of
       ! contiguous rows
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,nl=nr)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
 
     case(2)
       ! A  partition  defined by the user through IV
-      
-      if (present(iv)) then 
+
+      if (present(iv)) then
         if (size(iv) /= m) then
           write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
           info = -1
           call psb_barrier(ctxt)
           call psb_abort(ctxt)
-          return    
+          return
         end if
       else
         write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! Second example  of use of CDALL: specify for each row the
-      ! process that owns it 
-      ! 
+      ! process that owns it
+      !
       call psb_cdall(ctxt,desc_a,info,vg=iv)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
@@ -319,7 +319,7 @@ contains
       npy = npdims(2)
 
       allocate(bndx(0:npx),bndy(0:npy))
-      ! We can reuse idx2ijk for process indices as well. 
+      ! We can reuse idx2ijk for process indices as well.
       call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
       ! Now let's split the 2D square in rectangles
       call dist1Didx(bndx,idim,npx)
@@ -327,7 +327,7 @@ contains
       call dist1Didx(bndy,idim,npy)
       myny = bndy(iamy+1)-bndy(iamy)
 
-      ! How many indices do I own? 
+      ! How many indices do I own?
       nlr = mynx*myny
       allocate(myidx(nlr))
       ! Now, let's generate the list of indices I own
@@ -349,9 +349,8 @@ contains
       !
       ! Third example  of use of CDALL: specify for each process
       ! the set of global indices it owns.
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,vl=myidx)
-      
     case default
       write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
@@ -360,9 +359,9 @@ contains
       return
     end select
 
-    
+
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
-    ! define  rhs from boundary conditions; also build initial guess 
+    ! define  rhs from boundary conditions; also build initial guess
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
 
@@ -377,12 +376,12 @@ contains
     end if
 
     ! we build an auxiliary matrix consisting of one row at a
-    ! time; just a small matrix. might be extended to generate 
-    ! a bunch of rows per call. 
-    ! 
+    ! time; just a small matrix. might be extended to generate
+    ! a bunch of rows per call.
+    !
     allocate(val(20*nb),irow(20*nb),&
          &icol(20*nb),stat=info)
-    if (info /= psb_success_ ) then 
+    if (info /= psb_success_ ) then
       info=psb_err_alloc_dealloc_
       call psb_errpush(info,name)
       goto 9999
@@ -395,11 +394,11 @@ contains
     call psb_barrier(ctxt)
     t1 = psb_wtime()
     do ii=1, nlr,nb
-      ib = min(nb,nlr-ii+1) 
+      ib = min(nb,nlr-ii+1)
       icoeff = 1
       do k=1,ib
         i=ii+k-1
-        ! local matrix pointer 
+        ! local matrix pointer
         glob_row=myidx(i)
         ! compute gridpoint coordinates
         call idx2ijk(ix,iy,glob_row,idim,idim)
@@ -409,11 +408,11 @@ contains
 
         zt(k) = f_(x,y)
         ! internal point: build discretization
-        !   
+        !
         !  term depending on   (x-1,y)
         !
         val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
-        if (ix == 1) then 
+        if (ix == 1) then
           zt(k) = g(dzero,y)*(-val(icoeff)) + zt(k)
         else
           call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
@@ -422,7 +421,7 @@ contains
         endif
         !  term depending on     (x,y-1)
         val(icoeff)  = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
-        if (iy == 1) then 
+        if (iy == 1) then
           zt(k) = g(x,dzero)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
@@ -434,10 +433,10 @@ contains
         val(icoeff)=(2*done)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
         call ijk2idx(icol(icoeff),ix,iy,idim,idim)
         irow(icoeff) = glob_row
-        icoeff       = icoeff+1                  
+        icoeff       = icoeff+1
         !  term depending on     (x,y+1)
         val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
-        if (iy == idim) then 
+        if (iy == idim) then
           zt(k) = g(x,done)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
@@ -446,7 +445,7 @@ contains
         endif
         !  term depending on     (x+1,y)
         val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
-        if (ix==idim) then 
+        if (ix==idim) then
           zt(k) = g(done,y)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
@@ -480,8 +479,8 @@ contains
     tcdasb = psb_wtime()-t1
     call psb_barrier(ctxt)
     t1 = psb_wtime()
-    if (info == psb_success_) then 
-      if (present(amold)) then 
+    if (info == psb_success_) then
+      if (present(amold)) then
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold)
       else
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
@@ -504,7 +503,7 @@ contains
     end if
     tasb = psb_wtime()-t1
     call psb_barrier(ctxt)
-    ttot = psb_wtime() - t0 
+    ttot = psb_wtime() - t0
 
     call psb_amx(ctxt,talc)
     call psb_amx(ctxt,tgen)
@@ -545,9 +544,9 @@ program psb_d_pde2d
   integer(psb_ipk_) :: idim
   integer(psb_epk_) :: system_size
 
-  ! miscellaneous 
+  ! miscellaneous
   real(psb_dpk_), parameter :: one = done
-  real(psb_dpk_) :: t1, t2, tprec 
+  real(psb_dpk_) :: t1, t2, tprec
 
   ! sparse matrix and preconditioner
   type(psb_dspmat_type) :: a
@@ -565,6 +564,14 @@ program psb_d_pde2d
   integer(psb_epk_) :: amatsize, precsize, descsize, d2size
   real(psb_dpk_)   :: err, eps
 
+  ! Parameters for solvers in Block-Jacobi preconditioner
+  type ainvparms
+    character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale
+    integer(psb_ipk_) :: fill, inv_fill
+    real(psb_dpk_)    :: thresh, inv_thresh
+  end type ainvparms
+  type(ainvparms)     :: parms
+
   ! other variables
   integer(psb_ipk_) :: info, i
   character(len=20) :: name,ch_err
@@ -576,7 +583,7 @@ program psb_d_pde2d
   call psb_init(ctxt)
   call psb_info(ctxt,iam,np)
 
-  if (iam < 0) then 
+  if (iam < 0) then
     ! This should not happen, but just in case
     call psb_exit(ctxt)
     stop
@@ -587,21 +594,21 @@ program psb_d_pde2d
   !
   ! Hello world
   !
-  if (iam == psb_root_) then 
+  if (iam == psb_root_) then
     write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
     write(*,*) 'This is the ',trim(name),' sample program'
   end if
   !
   !  get parameters
   !
-  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
 
   !
-  !  allocate and fill in the coefficient matrix, rhs and initial guess 
+  !  allocate and fill in the coefficient matrix, rhs and initial guess
   !
   call psb_barrier(ctxt)
   t1 = psb_wtime()
-  call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)  
+  call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)
   call psb_barrier(ctxt)
   t2 = psb_wtime() - t1
   if(info /= psb_success_) then
@@ -614,9 +621,43 @@ program psb_d_pde2d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   !
   !  prepare the preconditioner.
-  !  
+  !
   if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
   call prec%init(ctxt,ptype,info)
+  !
+  ! Set the options for the BJAC preconditioner
+  !
+  if (psb_toupper(ptype) == "BJAC") then
+      call prec%set('sub_solve',       parms%alg,   info)
+      select case (psb_toupper(parms%alg))
+      case ("ILU")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('ilu_alg',         parms%ilu_alg,    info)
+      case ("ILUT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("AINV")
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+        call prec%set('ainv_alg',        parms%orth_alg,   info)
+      case ("INVK")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("INVT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case default
+        ! Do nothing, use default setting in the init routine
+      end select
+  else
+    ! nothing to set for NONE or DIAG preconditioner
+  end if
 
   call psb_barrier(ctxt)
   t1 = psb_wtime()
@@ -636,14 +677,14 @@ program psb_d_pde2d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   call prec%descr()
   !
-  ! iterative method parameters 
+  ! iterative method parameters
   !
   if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
   call psb_barrier(ctxt)
-  t1 = psb_wtime()  
+  t1 = psb_wtime()
   eps   = 1.d-6
-  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& 
-       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)     
+  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
+       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
 
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
@@ -673,14 +714,14 @@ program psb_d_pde2d
     write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
     write(psb_out_unit,'("Info  on exit                 : ",i12)')info
     write(psb_out_unit,'("Total memory occupation for      A: ",i12)')amatsize
-    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize    
+    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize
     write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
     write(psb_out_unit,'("Storage format for               A: ",a)') a%get_fmt()
     write(psb_out_unit,'("Storage format for          DESC_A: ",a)') desc_a%get_fmt()
   end if
 
 
-  !  
+  !
   !  cleanup storage and exit
   !
   call psb_gefree(bv,desc_a,info)
@@ -706,13 +747,14 @@ contains
   !
   ! get iteration parameters from standard input
   !
-  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
     type(psb_ctxt_type) :: ctxt
     character(len=*) :: kmethd, ptype, afmt
     integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart
     integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: ip, inp_unit
     character(len=1024)   :: filename
+    type(ainvparms)   :: parms
 
     call psb_info(ctxt, iam, np)
 
@@ -741,12 +783,12 @@ contains
         if (ip >= 4) then
           read(inp_unit,*) ipart
         else
-          ipart = 3        
+          ipart = 3
         endif
         if (ip >= 5) then
           read(inp_unit,*) istopc
         else
-          istopc=1        
+          istopc=1
         endif
         if (ip >= 6) then
           read(inp_unit,*) itmax
@@ -763,8 +805,27 @@ contains
         else
           irst=1
         endif
+        if (ip >= 9) then
+          read(inp_unit,*) parms%alg
+          read(inp_unit,*) parms%ilu_alg
+          read(inp_unit,*) parms%ilut_scale
+          read(inp_unit,*) parms%fill
+          read(inp_unit,*) parms%inv_fill
+          read(inp_unit,*) parms%thresh
+          read(inp_unit,*) parms%inv_thresh
+          read(inp_unit,*) parms%orth_alg
+        else
+          parms%alg =  'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH
+          parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored
+          parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM
+          parms%fill = 0     ! Level of fill for forward factorization
+          parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK)
+          parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization
+          parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization
+          parms%orth_alg = 'LLK'  ! What orthogonalization algorithm?
+        endif
 
-        write(psb_out_unit,'("Solving matrix       : ell1")')      
+        write(psb_out_unit,'("Solving matrix       : ell1")')
         write(psb_out_unit,'("Grid dimensions      : ",i5," x ",i5)')idim,idim
         write(psb_out_unit,'("Number of processors : ",i0)') np
         select case(ipart)
@@ -777,11 +838,40 @@ contains
           write(psb_out_unit,'("Unknown data distrbution, defaulting to 2D")')
         end select
         write(psb_out_unit,'("Preconditioner       : ",a)') ptype
+        if( psb_toupper(ptype) == "BJAC" ) then
+          write(psb_out_unit,'("Block subsolver      : ",a)') parms%alg
+          select case (psb_toupper(parms%alg))
+            case ('ILU')
+              write(psb_out_unit,'("Fill in       : ",i0)') parms%fill
+              write(psb_out_unit,'("MILU          : ",a)') parms%ilu_alg
+            case ('ILUT')
+              write(psb_out_unit,'("Fill in       : ",i0)') parms%fill
+              write(psb_out_unit,'("Threshold     : ",es12.5)') parms%thresh
+              write(psb_out_unit,'("Scaling       : ",a)') parms%ilut_scale
+            case ('INVK')
+              write(psb_out_unit,'("Fill in            : ",i0)') parms%fill
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case ('INVT')
+              write(psb_out_unit,'("Fill in            : ",i0)') parms%fill
+              write(psb_out_unit,'("Threshold          : ",es12.5)') parms%thresh
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Inverse Threshold  : ",es12.5)') parms%inv_thresh
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case ('AINV','AORTH')
+              write(psb_out_unit,'("Inverse Threshold  : ",es12.5)') parms%inv_thresh
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Orthogonalization  : ",a)') parms%orth_alg
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case default
+              write(psb_out_unit,'("Unknown diagonal solver")')
+            end select
+        end if
         write(psb_out_unit,'("Iterative method     : ",a)') kmethd
         write(psb_out_unit,'(" ")')
       else
         ! wrong number of parameter, print an error message and exit
-        call pr_usage(izero)      
+        call pr_usage(izero)
         call psb_abort(ctxt)
         stop 1
       endif
@@ -800,20 +890,27 @@ contains
     call psb_bcast(ctxt,itmax)
     call psb_bcast(ctxt,itrace)
     call psb_bcast(ctxt,irst)
-
+    call psb_bcast(ctxt,parms%alg)
+    call psb_bcast(ctxt,parms%fill)
+    call psb_bcast(ctxt,parms%inv_fill)
+    call psb_bcast(ctxt,parms%thresh)
+    call psb_bcast(ctxt,parms%inv_thresh)
+    call psb_bcast(ctxt,parms%orth_alg)
+    call psb_bcast(ctxt,parms%ilut_scale)
+    
     return
 
   end subroutine get_parms
   !
-  !  print an error message 
-  !  
+  !  print an error message
+  !
   subroutine pr_usage(iout)
     integer(psb_ipk_) :: iout
     write(iout,*)'incorrect parameter(s) found'
     write(iout,*)' usage:  pde2d90 methd prec dim &
-         &[ipart istop itmax itrace]'  
+         &[ipart istop itmax itrace]'
     write(iout,*)' where:'
-    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl' 
+    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl'
     write(iout,*)'     prec :    bjac diag none'
     write(iout,*)'     dim       number of points along each axis'
     write(iout,*)'               the size of the resulting linear '
@@ -821,11 +918,9 @@ contains
     write(iout,*)'     ipart     data partition  1  3      '
     write(iout,*)'     istop     stopping criterion  1, 2  '
     write(iout,*)'     itmax     maximum number of iterations [500] '
-    write(iout,*)'     itrace    <=0  (no tracing, default) or '  
+    write(iout,*)'     itrace    <=0  (no tracing, default) or '
     write(iout,*)'               >= 1 do tracing every itrace'
-    write(iout,*)'               iterations ' 
+    write(iout,*)'               iterations '
   end subroutine pr_usage
 
 end program psb_d_pde2d
-
-
diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90
index 8cc14086..4630d946 100644
--- a/test/pargen/psb_d_pde3d.f90
+++ b/test/pargen/psb_d_pde3d.f90
@@ -1,9 +1,9 @@
-!    
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,23 +27,23 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 ! File: psb_d_pde3d.f90
 !
 ! Program: psb_d_pde3d
 ! This sample program solves a linear system obtained by discretizing a
-! PDE with Dirichlet BCs. 
-! 
+! PDE with Dirichlet BCs.
+!
 !
 ! The PDE is a general second order equation in 3d
 !
-!   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)  
+!   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)
 ! -   ------ -  ------ -  ------ +  -----  +  ------  +  ------ + c u = f
-!      dxdx     dydy       dzdz        dx       dy         dz   
+!      dxdx     dydy       dzdz        dx       dy         dz
 !
 ! with Dirichlet boundary conditions
-!   u = g 
+!   u = g
 !
 !  on the unit cube  0<=x,y,z<=1.
 !
@@ -60,37 +60,37 @@
 !
 module psb_d_pde3d_mod
 
- 
+
   use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,&
        &  psb_dspmat_type, psb_d_vect_type, dzero,&
        &  psb_d_base_sparse_mat, psb_d_base_vect_type, &
        &  psb_i_base_vect_type, psb_l_base_vect_type
 
-  interface 
+  interface
     function d_func_3d(x,y,z) result(val)
       import :: psb_dpk_
       real(psb_dpk_), intent(in) :: x,y,z
       real(psb_dpk_) :: val
     end function d_func_3d
-  end interface 
+  end interface
 
   interface psb_gen_pde3d
     module procedure  psb_d_gen_pde3d
   end interface psb_gen_pde3d
-  
+
 contains
 
   function d_null_func_3d(x,y,z) result(val)
 
     real(psb_dpk_), intent(in) :: x,y,z
     real(psb_dpk_) :: val
-    
+
     val = dzero
 
   end function d_null_func_3d
   !
-  ! functions parametrizing the differential equation 
-  !  
+  ! functions parametrizing the differential equation
+  !
 
   !
   ! Note: b1, b2 and b3 are the coefficients of the first
@@ -103,70 +103,70 @@ contains
   !
   function b1(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) :: b1
     real(psb_dpk_), intent(in) :: x,y,z
     b1=dzero
   end function b1
   function b2(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  b2
     real(psb_dpk_), intent(in) :: x,y,z
     b2=dzero
   end function b2
   function b3(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  b3
-    real(psb_dpk_), intent(in) :: x,y,z      
+    real(psb_dpk_), intent(in) :: x,y,z
     b3=dzero
   end function b3
   function c(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  c
-    real(psb_dpk_), intent(in) :: x,y,z      
+    real(psb_dpk_), intent(in) :: x,y,z
     c=dzero
   end function c
   function a1(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
-    real(psb_dpk_) ::  a1   
+    implicit none
+    real(psb_dpk_) ::  a1
     real(psb_dpk_), intent(in) :: x,y,z
     a1=done/80
   end function a1
   function a2(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  a2
     real(psb_dpk_), intent(in) :: x,y,z
     a2=done/80
   end function a2
   function a3(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  a3
     real(psb_dpk_), intent(in) :: x,y,z
     a3=done/80
   end function a3
   function g(x,y,z)
     use psb_base_mod, only : psb_dpk_, done, dzero
-    implicit none 
+    implicit none
     real(psb_dpk_) ::  g
     real(psb_dpk_), intent(in) :: x,y,z
     g = dzero
     if (x == done) then
       g = done
-    else if (x == dzero) then 
+    else if (x == dzero) then
       g = exp(y**2-z**2)
     end if
   end function g
 
-  
+
   !
   !  subroutine to allocate and fill in the coefficient matrix and
-  !  the rhs. 
+  !  the rhs.
   !
   subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
        & f,amold,vmold,imold,partition,nrl,iv)
@@ -174,13 +174,13 @@ contains
     use psb_util_mod
     !
     !   Discretizes the partial differential equation
-    ! 
-    !   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)  
+    !
+    !   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)
     ! -   ------ -  ------ -  ------ +  -----  +  ------  +  ------ + c u = f
-    !      dxdx     dydy       dzdz        dx       dy         dz   
+    !      dxdx     dydy       dzdz        dx       dy         dz
     !
     ! with Dirichlet boundary conditions
-    !   u = g 
+    !   u = g
     !
     !  on the unit cube  0<=x,y,z<=1.
     !
@@ -197,7 +197,7 @@ contains
     character(len=*)      :: afmt
     procedure(d_func_3d), optional :: f
     class(psb_d_base_sparse_mat), optional :: amold
-    class(psb_d_base_vect_type), optional :: vmold 
+    class(psb_d_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
     integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
@@ -238,7 +238,7 @@ contains
     call psb_info(ctxt, iam, np)
 
 
-    if (present(f)) then 
+    if (present(f)) then
       f_ => f
     else
       f_ => d_null_func_3d
@@ -258,10 +258,10 @@ contains
     else
       partition_ = 3
     end if
-    
+
     ! initialize array descriptor and sparse matrix storage. provide an
-    ! estimate of the number of non zeroes 
-    
+    ! estimate of the number of non zeroes
+
     m   = (1_psb_lpk_*idim)*idim*idim
     n   = m
     nnz = ((n*7)/(np))
@@ -269,8 +269,8 @@ contains
     t0 = psb_wtime()
     select case(partition_)
     case(1)
-      ! A BLOCK partition 
-      if (present(nrl)) then 
+      ! A BLOCK partition
+      if (present(nrl)) then
         nr = nrl
       else
         !
@@ -281,46 +281,46 @@ contains
       end if
 
       nt = nr
-      call psb_sum(ctxt,nt) 
-      if (nt /= m) then 
+      call psb_sum(ctxt,nt)
+      if (nt /= m) then
         write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! First example  of use of CDALL: specify for each process a number of
       ! contiguous rows
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,nl=nr)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
 
     case(2)
       ! A  partition  defined by the user through IV
-      
-      if (present(iv)) then 
+
+      if (present(iv)) then
         if (size(iv) /= m) then
           write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
           info = -1
           call psb_barrier(ctxt)
           call psb_abort(ctxt)
-          return    
+          return
         end if
       else
         write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! Second example  of use of CDALL: specify for each row the
-      ! process that owns it 
-      ! 
+      ! process that owns it
+      !
       call psb_cdall(ctxt,desc_a,info,vg=iv)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
@@ -336,7 +336,7 @@ contains
       npz = npdims(3)
 
       allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
-      ! We can reuse idx2ijk for process indices as well. 
+      ! We can reuse idx2ijk for process indices as well.
       call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
       ! Now let's split the 3D cube in hexahedra
       call dist1Didx(bndx,idim,npx)
@@ -346,7 +346,7 @@ contains
       call dist1Didx(bndz,idim,npz)
       mynz = bndz(iamz+1)-bndz(iamz)
 
-      ! How many indices do I own? 
+      ! How many indices do I own?
       nlr = mynx*myny*mynz
       allocate(myidx(nlr))
       ! Now, let's generate the list of indices I own
@@ -370,9 +370,8 @@ contains
       !
       ! Third example  of use of CDALL: specify for each process
       ! the set of global indices it owns.
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,vl=myidx)
-      
     case default
       write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
@@ -381,9 +380,9 @@ contains
       return
     end select
 
-    
+
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
-    ! define  rhs from boundary conditions; also build initial guess 
+    ! define  rhs from boundary conditions; also build initial guess
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
 
@@ -398,12 +397,12 @@ contains
     end if
 
     ! we build an auxiliary matrix consisting of one row at a
-    ! time; just a small matrix. might be extended to generate 
-    ! a bunch of rows per call. 
-    ! 
+    ! time; just a small matrix. might be extended to generate
+    ! a bunch of rows per call.
+    !
     allocate(val(20*nb),irow(20*nb),&
          &icol(20*nb),stat=info)
-    if (info /= psb_success_ ) then 
+    if (info /= psb_success_ ) then
       info=psb_err_alloc_dealloc_
       call psb_errpush(info,name)
       goto 9999
@@ -416,11 +415,11 @@ contains
     call psb_barrier(ctxt)
     t1 = psb_wtime()
     do ii=1, nlr,nb
-      ib = min(nb,nlr-ii+1) 
+      ib = min(nb,nlr-ii+1)
       icoeff = 1
       do k=1,ib
         i=ii+k-1
-        ! local matrix pointer 
+        ! local matrix pointer
         glob_row=myidx(i)
         ! compute gridpoint coordinates
         call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
@@ -430,11 +429,11 @@ contains
         z = (iz-1)*deltah
         zt(k) = f_(x,y,z)
         ! internal point: build discretization
-        !   
+        !
         !  term depending on   (x-1,y,z)
         !
         val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
-        if (ix == 1) then 
+        if (ix == 1) then
           zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
         else
           call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
@@ -443,19 +442,19 @@ contains
         endif
         !  term depending on     (x,y-1,z)
         val(icoeff)  = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
-        if (iy == 1) then 
+        if (iy == 1) then
           zt(k) = g(x,dzero,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x,y,z-1)
         val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
-        if (iz == 1) then 
+        if (iz == 1) then
           zt(k) = g(x,y,dzero)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
@@ -463,33 +462,33 @@ contains
         !  term depending on     (x,y,z)
         val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
              & + c(x,y,z)
-        call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)          
+        call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
         irow(icoeff) = glob_row
-        icoeff       = icoeff+1                  
+        icoeff       = icoeff+1
         !  term depending on     (x,y,z+1)
         val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
-        if (iz == idim) then 
+        if (iz == idim) then
           zt(k) = g(x,y,done)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x,y+1,z)
         val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
-        if (iy == idim) then 
+        if (iy == idim) then
           zt(k) = g(x,done,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x+1,y,z)
         val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
-        if (ix==idim) then 
+        if (ix==idim) then
           zt(k) = g(done,y,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
@@ -520,8 +519,8 @@ contains
     tcdasb = psb_wtime()-t1
     call psb_barrier(ctxt)
     t1 = psb_wtime()
-    if (info == psb_success_) then 
-      if (present(amold)) then 
+    if (info == psb_success_) then
+      if (present(amold)) then
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold)
       else
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
@@ -544,7 +543,7 @@ contains
     end if
     tasb = psb_wtime()-t1
     call psb_barrier(ctxt)
-    ttot = psb_wtime() - t0 
+    ttot = psb_wtime() - t0
 
     call psb_amx(ctxt,talc)
     call psb_amx(ctxt,tgen)
@@ -586,9 +585,9 @@ program psb_d_pde3d
   integer(psb_ipk_) :: idim
   integer(psb_epk_) :: system_size
 
-  ! miscellaneous 
+  ! miscellaneous
   real(psb_dpk_), parameter :: one = done
-  real(psb_dpk_) :: t1, t2, tprec 
+  real(psb_dpk_) :: t1, t2, tprec
 
   ! sparse matrix and preconditioner
   type(psb_dspmat_type) :: a
@@ -606,6 +605,14 @@ program psb_d_pde3d
   integer(psb_epk_) :: amatsize, precsize, descsize, d2size
   real(psb_dpk_)   :: err, eps
 
+  ! Parameters for solvers in Block-Jacobi preconditioner
+  type ainvparms
+    character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale
+    integer(psb_ipk_) :: fill, inv_fill
+    real(psb_dpk_)    :: thresh, inv_thresh
+  end type ainvparms
+  type(ainvparms)     :: parms
+
   ! other variables
   integer(psb_ipk_) :: info, i
   character(len=20) :: name,ch_err
@@ -617,7 +624,7 @@ program psb_d_pde3d
   call psb_init(ctxt)
   call psb_info(ctxt,iam,np)
 
-  if (iam < 0) then 
+  if (iam < 0) then
     ! This should not happen, but just in case
     call psb_exit(ctxt)
     stop
@@ -628,21 +635,20 @@ program psb_d_pde3d
   !
   ! Hello world
   !
-  if (iam == psb_root_) then 
+  if (iam == psb_root_) then
     write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
     write(*,*) 'This is the ',trim(name),' sample program'
   end if
   !
   !  get parameters
   !
-  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
-
+  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
   !
-  !  allocate and fill in the coefficient matrix, rhs and initial guess 
+  !  allocate and fill in the coefficient matrix, rhs and initial guess
   !
   call psb_barrier(ctxt)
   t1 = psb_wtime()
-  call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)  
+  call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)
   call psb_barrier(ctxt)
   t2 = psb_wtime() - t1
   if(info /= psb_success_) then
@@ -655,9 +661,43 @@ program psb_d_pde3d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   !
   !  prepare the preconditioner.
-  !  
+  !
   if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
   call prec%init(ctxt,ptype,info)
+  !
+  ! Set the options for the BJAC preconditioner
+  !
+  if (psb_toupper(ptype) == "BJAC") then
+      call prec%set('sub_solve',       parms%alg,   info)
+      select case (psb_toupper(parms%alg))
+      case ("ILU")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('ilu_alg',         parms%ilu_alg,    info)
+      case ("ILUT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("AINV")
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+        call prec%set('ainv_alg',        parms%orth_alg,   info)
+      case ("INVK")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("INVT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case default
+        ! Do nothing, use default setting in the init routine
+      end select
+  else
+    ! nothing to set for NONE or DIAG preconditioner
+  end if
 
   call psb_barrier(ctxt)
   t1 = psb_wtime()
@@ -677,14 +717,14 @@ program psb_d_pde3d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   call prec%descr()
   !
-  ! iterative method parameters 
+  ! iterative method parameters
   !
   if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
   call psb_barrier(ctxt)
-  t1 = psb_wtime()  
+  t1 = psb_wtime()
   eps   = 1.d-6
-  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& 
-       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)     
+  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
+       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
 
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
@@ -714,14 +754,14 @@ program psb_d_pde3d
     write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
     write(psb_out_unit,'("Info  on exit                 : ",i12)')info
     write(psb_out_unit,'("Total memory occupation for      A: ",i12)')amatsize
-    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize    
+    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize
     write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
     write(psb_out_unit,'("Storage format for               A: ",a)') a%get_fmt()
     write(psb_out_unit,'("Storage format for          DESC_A: ",a)') desc_a%get_fmt()
   end if
 
 
-  !  
+  !
   !  cleanup storage and exit
   !
   call psb_gefree(bv,desc_a,info)
@@ -747,13 +787,15 @@ contains
   !
   ! get iteration parameters from standard input
   !
-  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
+       & itmax,itrace,irst,ipart,parms)
     type(psb_ctxt_type) :: ctxt
     character(len=*) :: kmethd, ptype, afmt
     integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart
     integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: ip, inp_unit
     character(len=1024)   :: filename
+    type(ainvparms)   :: parms
 
     call psb_info(ctxt, iam, np)
 
@@ -782,12 +824,12 @@ contains
         if (ip >= 4) then
           read(inp_unit,*) ipart
         else
-          ipart = 3        
+          ipart = 3
         endif
         if (ip >= 5) then
           read(inp_unit,*) istopc
         else
-          istopc=1        
+          istopc=1
         endif
         if (ip >= 6) then
           read(inp_unit,*) itmax
@@ -804,8 +846,27 @@ contains
         else
           irst=1
         endif
+        if (ip >= 9) then
+          read(inp_unit,*) parms%alg
+          read(inp_unit,*) parms%ilu_alg
+          read(inp_unit,*) parms%ilut_scale
+          read(inp_unit,*) parms%fill
+          read(inp_unit,*) parms%inv_fill
+          read(inp_unit,*) parms%thresh
+          read(inp_unit,*) parms%inv_thresh
+          read(inp_unit,*) parms%orth_alg
+        else
+          parms%alg =  'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH
+          parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored
+          parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM
+          parms%fill = 0     ! Level of fill for forward factorization
+          parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK)
+          parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization
+          parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization
+          parms%orth_alg = 'LLK'  ! What orthogonalization algorithm?
+        endif
 
-        write(psb_out_unit,'("Solving matrix       : ell1")')      
+        write(psb_out_unit,'("Solving matrix       : ell1")')
         write(psb_out_unit,&
              & '("Grid dimensions      : ",i4," x ",i4," x ",i4)') &
              & idim,idim,idim
@@ -820,11 +881,40 @@ contains
           write(psb_out_unit,'("Unknown data distrbution, defaulting to 3D")')
         end select
         write(psb_out_unit,'("Preconditioner       : ",a)') ptype
+        if( psb_toupper(ptype) == "BJAC" ) then
+          write(psb_out_unit,'("Block subsolver      : ",a)') parms%alg
+          select case (psb_toupper(parms%alg))
+          case ('ILU')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("MILU                 : ",a)') parms%ilu_alg
+          case ('ILUT')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Threshold            : ",es12.5)') parms%thresh
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('INVK')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('INVT')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Threshold            : ",es12.5)') parms%thresh
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Inverse Threshold    : ",es12.5)') parms%inv_thresh
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('AINV','AORTH')
+            write(psb_out_unit,'("Inverse Threshold    : ",es12.5)') parms%inv_thresh
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Orthogonalization    : ",a)') parms%orth_alg
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case default
+              write(psb_out_unit,'("Unknown diagonal solver")')
+          end select
+        end if
         write(psb_out_unit,'("Iterative method     : ",a)') kmethd
         write(psb_out_unit,'(" ")')
       else
         ! wrong number of parameter, print an error message and exit
-        call pr_usage(izero)      
+        call pr_usage(izero)
         call psb_abort(ctxt)
         stop 1
       endif
@@ -843,20 +933,27 @@ contains
     call psb_bcast(ctxt,itmax)
     call psb_bcast(ctxt,itrace)
     call psb_bcast(ctxt,irst)
+    call psb_bcast(ctxt,parms%alg)
+    call psb_bcast(ctxt,parms%fill)
+    call psb_bcast(ctxt,parms%inv_fill)
+    call psb_bcast(ctxt,parms%thresh)
+    call psb_bcast(ctxt,parms%inv_thresh)
+    call psb_bcast(ctxt,parms%orth_alg)
+    call psb_bcast(ctxt,parms%ilut_scale)
 
     return
 
   end subroutine get_parms
   !
-  !  print an error message 
-  !  
+  !  print an error message
+  !
   subroutine pr_usage(iout)
     integer(psb_ipk_) :: iout
     write(iout,*)'incorrect parameter(s) found'
     write(iout,*)' usage:  pde3d90 methd prec dim &
-         &[istop itmax itrace]'  
+         &[istop itmax itrace]'
     write(iout,*)' where:'
-    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl' 
+    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl'
     write(iout,*)'     prec :    bjac diag none'
     write(iout,*)'     dim       number of points along each axis'
     write(iout,*)'               the size of the resulting linear '
@@ -864,11 +961,9 @@ contains
     write(iout,*)'     ipart     data partition  1  3      '
     write(iout,*)'     istop     stopping criterion  1, 2  '
     write(iout,*)'     itmax     maximum number of iterations [500] '
-    write(iout,*)'     itrace    <=0  (no tracing, default) or '  
+    write(iout,*)'     itrace    <=0  (no tracing, default) or '
     write(iout,*)'               >= 1 do tracing every itrace'
-    write(iout,*)'               iterations ' 
+    write(iout,*)'               iterations '
   end subroutine pr_usage
 
 end program psb_d_pde3d
-
-
diff --git a/test/pargen/psb_s_pde2d.f90 b/test/pargen/psb_s_pde2d.f90
index 63cb4860..f055e5e6 100644
--- a/test/pargen/psb_s_pde2d.f90
+++ b/test/pargen/psb_s_pde2d.f90
@@ -1,9 +1,9 @@
-!    
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,23 +27,23 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 ! File: psb_s_pde2d.f90
 !
 ! Program: psb_s_pde2d
 ! This sample program solves a linear system obtained by discretizing a
-! PDE with Dirichlet BCs. 
-! 
+! PDE with Dirichlet BCs.
+!
 !
 ! The PDE is a general second order equation in 2d
 !
-!   a1 dd(u)  a2 dd(u)   b1 d(u)   b2 d(u) 
+!   a1 dd(u)  a2 dd(u)   b1 d(u)   b2 d(u)
 ! -   ------ -  ------   -----  +  ------  + c u = f
-!      dxdx     dydy        dx       dy    
+!      dxdx     dydy        dx       dy
 !
 ! with Dirichlet boundary conditions
-!   u = g 
+!   u = g
 !
 !  on the unit square  0<=x,y<=1.
 !
@@ -63,31 +63,31 @@ module psb_s_pde2d_mod
        &  psb_sspmat_type, psb_s_vect_type, szero,&
        &  psb_s_base_sparse_mat, psb_s_base_vect_type, psb_i_base_vect_type
 
-  interface 
+  interface
     function s_func_2d(x,y) result(val)
       import :: psb_spk_
       real(psb_spk_), intent(in) :: x,y
       real(psb_spk_) :: val
     end function s_func_2d
-  end interface 
+  end interface
 
   interface psb_gen_pde2d
     module procedure  psb_s_gen_pde2d
   end interface psb_gen_pde2d
-  
+
 contains
-  
+
   function s_null_func_2d(x,y) result(val)
 
     real(psb_spk_), intent(in) :: x,y
     real(psb_spk_) :: val
-    
+
     val = szero
 
   end function s_null_func_2d
 
   !
-  ! functions parametrizing the differential equation 
+  ! functions parametrizing the differential equation
   !
 
   !
@@ -101,48 +101,48 @@ contains
   !
   function b1(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) :: b1
     real(psb_spk_), intent(in) :: x,y
     b1=szero
   end function b1
   function b2(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  b2
     real(psb_spk_), intent(in) :: x,y
     b2=szero
   end function b2
   function c(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  c
     real(psb_spk_), intent(in) :: x,y
     c=0.d0
   end function c
   function a1(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
-    real(psb_spk_) ::  a1   
+    implicit none
+    real(psb_spk_) ::  a1
     real(psb_spk_), intent(in) :: x,y
     a1=sone/80
   end function a1
   function a2(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  a2
     real(psb_spk_), intent(in) :: x,y
     a2=sone/80
   end function a2
   function g(x,y)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  g
     real(psb_spk_), intent(in) :: x,y
     g = szero
     if (x == sone) then
       g = sone
-    else if (x == szero) then 
+    else if (x == szero) then
       g = exp(-y**2)
     end if
   end function g
@@ -150,7 +150,7 @@ contains
 
   !
   !  subroutine to allocate and fill in the coefficient matrix and
-  !  the rhs. 
+  !  the rhs.
   !
   subroutine psb_s_gen_pde2d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
        & f,amold,vmold,imold,partition,nrl,iv)
@@ -158,13 +158,13 @@ contains
     use psb_util_mod
     !
     !   Discretizes the partial differential equation
-    ! 
-    !   a1 dd(u)  a2 dd(u)    b1 d(u)  b2 d(u) 
+    !
+    !   a1 dd(u)  a2 dd(u)    b1 d(u)  b2 d(u)
     ! -   ------ -  ------ +  -----  +  ------  + c u = f
-    !      dxdx     dydy         dx       dy     
+    !      dxdx     dydy         dx       dy
     !
     ! with Dirichlet boundary conditions
-    !   u = g 
+    !   u = g
     !
     !  on the unit square  0<=x,y<=1.
     !
@@ -222,7 +222,7 @@ contains
     call psb_info(ctxt, iam, np)
 
 
-    if (present(f)) then 
+    if (present(f)) then
       f_ => f
     else
       f_ => s_null_func_2d
@@ -242,9 +242,9 @@ contains
     else
       partition_ = 3
     end if
-    
+
     ! initialize array descriptor and sparse matrix storage. provide an
-    ! estimate of the number of non zeroes 
+    ! estimate of the number of non zeroes
 
     m   = (1_psb_lpk_)*idim*idim
     n   = m
@@ -253,8 +253,8 @@ contains
     t0 = psb_wtime()
     select case(partition_)
     case(1)
-      ! A BLOCK partition 
-      if (present(nrl)) then 
+      ! A BLOCK partition
+      if (present(nrl)) then
         nr = nrl
       else
         !
@@ -265,46 +265,46 @@ contains
       end if
 
       nt = nr
-      call psb_sum(ctxt,nt) 
-      if (nt /= m) then 
+      call psb_sum(ctxt,nt)
+      if (nt /= m) then
         write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! First example  of use of CDALL: specify for each process a number of
       ! contiguous rows
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,nl=nr)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
 
     case(2)
       ! A  partition  defined by the user through IV
-      
-      if (present(iv)) then 
+
+      if (present(iv)) then
         if (size(iv) /= m) then
           write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
           info = -1
           call psb_barrier(ctxt)
           call psb_abort(ctxt)
-          return    
+          return
         end if
       else
         write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! Second example  of use of CDALL: specify for each row the
-      ! process that owns it 
-      ! 
+      ! process that owns it
+      !
       call psb_cdall(ctxt,desc_a,info,vg=iv)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
@@ -319,7 +319,7 @@ contains
       npy = npdims(2)
 
       allocate(bndx(0:npx),bndy(0:npy))
-      ! We can reuse idx2ijk for process indices as well. 
+      ! We can reuse idx2ijk for process indices as well.
       call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
       ! Now let's split the 2D square in rectangles
       call dist1Didx(bndx,idim,npx)
@@ -327,7 +327,7 @@ contains
       call dist1Didx(bndy,idim,npy)
       myny = bndy(iamy+1)-bndy(iamy)
 
-      ! How many indices do I own? 
+      ! How many indices do I own?
       nlr = mynx*myny
       allocate(myidx(nlr))
       ! Now, let's generate the list of indices I own
@@ -349,9 +349,8 @@ contains
       !
       ! Third example  of use of CDALL: specify for each process
       ! the set of global indices it owns.
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,vl=myidx)
-      
     case default
       write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
@@ -360,9 +359,9 @@ contains
       return
     end select
 
-    
+
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
-    ! define  rhs from boundary conditions; also build initial guess 
+    ! define  rhs from boundary conditions; also build initial guess
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
 
@@ -377,12 +376,12 @@ contains
     end if
 
     ! we build an auxiliary matrix consisting of one row at a
-    ! time; just a small matrix. might be extended to generate 
-    ! a bunch of rows per call. 
-    ! 
+    ! time; just a small matrix. might be extended to generate
+    ! a bunch of rows per call.
+    !
     allocate(val(20*nb),irow(20*nb),&
          &icol(20*nb),stat=info)
-    if (info /= psb_success_ ) then 
+    if (info /= psb_success_ ) then
       info=psb_err_alloc_dealloc_
       call psb_errpush(info,name)
       goto 9999
@@ -395,11 +394,11 @@ contains
     call psb_barrier(ctxt)
     t1 = psb_wtime()
     do ii=1, nlr,nb
-      ib = min(nb,nlr-ii+1) 
+      ib = min(nb,nlr-ii+1)
       icoeff = 1
       do k=1,ib
         i=ii+k-1
-        ! local matrix pointer 
+        ! local matrix pointer
         glob_row=myidx(i)
         ! compute gridpoint coordinates
         call idx2ijk(ix,iy,glob_row,idim,idim)
@@ -409,11 +408,11 @@ contains
 
         zt(k) = f_(x,y)
         ! internal point: build discretization
-        !   
+        !
         !  term depending on   (x-1,y)
         !
         val(icoeff) = -a1(x,y)/sqdeltah-b1(x,y)/deltah2
-        if (ix == 1) then 
+        if (ix == 1) then
           zt(k) = g(szero,y)*(-val(icoeff)) + zt(k)
         else
           call ijk2idx(icol(icoeff),ix-1,iy,idim,idim)
@@ -422,7 +421,7 @@ contains
         endif
         !  term depending on     (x,y-1)
         val(icoeff)  = -a2(x,y)/sqdeltah-b2(x,y)/deltah2
-        if (iy == 1) then 
+        if (iy == 1) then
           zt(k) = g(x,szero)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix,iy-1,idim,idim)
@@ -434,10 +433,10 @@ contains
         val(icoeff)=(2*sone)*(a1(x,y) + a2(x,y))/sqdeltah + c(x,y)
         call ijk2idx(icol(icoeff),ix,iy,idim,idim)
         irow(icoeff) = glob_row
-        icoeff       = icoeff+1                  
+        icoeff       = icoeff+1
         !  term depending on     (x,y+1)
         val(icoeff)=-a2(x,y)/sqdeltah+b2(x,y)/deltah2
-        if (iy == idim) then 
+        if (iy == idim) then
           zt(k) = g(x,sone)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix,iy+1,idim,idim)
@@ -446,7 +445,7 @@ contains
         endif
         !  term depending on     (x+1,y)
         val(icoeff)=-a1(x,y)/sqdeltah+b1(x,y)/deltah2
-        if (ix==idim) then 
+        if (ix==idim) then
           zt(k) = g(sone,y)*(-val(icoeff))   + zt(k)
         else
           call ijk2idx(icol(icoeff),ix+1,iy,idim,idim)
@@ -480,8 +479,8 @@ contains
     tcdasb = psb_wtime()-t1
     call psb_barrier(ctxt)
     t1 = psb_wtime()
-    if (info == psb_success_) then 
-      if (present(amold)) then 
+    if (info == psb_success_) then
+      if (present(amold)) then
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold)
       else
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
@@ -504,7 +503,7 @@ contains
     end if
     tasb = psb_wtime()-t1
     call psb_barrier(ctxt)
-    ttot = psb_wtime() - t0 
+    ttot = psb_wtime() - t0
 
     call psb_amx(ctxt,talc)
     call psb_amx(ctxt,tgen)
@@ -545,9 +544,9 @@ program psb_s_pde2d
   integer(psb_ipk_) :: idim
   integer(psb_epk_) :: system_size
 
-  ! miscellaneous 
+  ! miscellaneous
   real(psb_spk_), parameter :: one = sone
-  real(psb_dpk_) :: t1, t2, tprec 
+  real(psb_dpk_) :: t1, t2, tprec
 
   ! sparse matrix and preconditioner
   type(psb_sspmat_type) :: a
@@ -565,6 +564,14 @@ program psb_s_pde2d
   integer(psb_epk_) :: amatsize, precsize, descsize, d2size
   real(psb_spk_)   :: err, eps
 
+  ! Parameters for solvers in Block-Jacobi preconditioner
+  type ainvparms
+    character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale
+    integer(psb_ipk_) :: fill, inv_fill
+    real(psb_spk_)    :: thresh, inv_thresh
+  end type ainvparms
+  type(ainvparms)     :: parms
+
   ! other variables
   integer(psb_ipk_) :: info, i
   character(len=20) :: name,ch_err
@@ -576,7 +583,7 @@ program psb_s_pde2d
   call psb_init(ctxt)
   call psb_info(ctxt,iam,np)
 
-  if (iam < 0) then 
+  if (iam < 0) then
     ! This should not happen, but just in case
     call psb_exit(ctxt)
     stop
@@ -587,21 +594,21 @@ program psb_s_pde2d
   !
   ! Hello world
   !
-  if (iam == psb_root_) then 
+  if (iam == psb_root_) then
     write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
     write(*,*) 'This is the ',trim(name),' sample program'
   end if
   !
   !  get parameters
   !
-  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
 
   !
-  !  allocate and fill in the coefficient matrix, rhs and initial guess 
+  !  allocate and fill in the coefficient matrix, rhs and initial guess
   !
   call psb_barrier(ctxt)
   t1 = psb_wtime()
-  call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)  
+  call psb_gen_pde2d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)
   call psb_barrier(ctxt)
   t2 = psb_wtime() - t1
   if(info /= psb_success_) then
@@ -614,9 +621,43 @@ program psb_s_pde2d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   !
   !  prepare the preconditioner.
-  !  
+  !
   if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
   call prec%init(ctxt,ptype,info)
+  !
+  ! Set the options for the BJAC preconditioner
+  !
+  if (psb_toupper(ptype) == "BJAC") then
+      call prec%set('sub_solve',       parms%alg,   info)
+      select case (psb_toupper(parms%alg))
+      case ("ILU")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('ilu_alg',         parms%ilu_alg,    info)
+      case ("ILUT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("AINV")
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+        call prec%set('ainv_alg',        parms%orth_alg,   info)
+      case ("INVK")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("INVT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case default
+        ! Do nothing, use default setting in the init routine
+      end select
+  else
+    ! nothing to set for NONE or DIAG preconditioner
+  end if
 
   call psb_barrier(ctxt)
   t1 = psb_wtime()
@@ -636,14 +677,14 @@ program psb_s_pde2d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   call prec%descr()
   !
-  ! iterative method parameters 
+  ! iterative method parameters
   !
   if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
   call psb_barrier(ctxt)
-  t1 = psb_wtime()  
+  t1 = psb_wtime()
   eps   = 1.d-6
-  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& 
-       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)     
+  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
+       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
 
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
@@ -673,14 +714,14 @@ program psb_s_pde2d
     write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
     write(psb_out_unit,'("Info  on exit                 : ",i12)')info
     write(psb_out_unit,'("Total memory occupation for      A: ",i12)')amatsize
-    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize    
+    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize
     write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
     write(psb_out_unit,'("Storage format for               A: ",a)') a%get_fmt()
     write(psb_out_unit,'("Storage format for          DESC_A: ",a)') desc_a%get_fmt()
   end if
 
 
-  !  
+  !
   !  cleanup storage and exit
   !
   call psb_gefree(bv,desc_a,info)
@@ -706,13 +747,14 @@ contains
   !
   ! get iteration parameters from standard input
   !
-  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
     type(psb_ctxt_type) :: ctxt
     character(len=*) :: kmethd, ptype, afmt
     integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart
     integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: ip, inp_unit
     character(len=1024)   :: filename
+    type(ainvparms)   :: parms
 
     call psb_info(ctxt, iam, np)
 
@@ -741,12 +783,12 @@ contains
         if (ip >= 4) then
           read(inp_unit,*) ipart
         else
-          ipart = 3        
+          ipart = 3
         endif
         if (ip >= 5) then
           read(inp_unit,*) istopc
         else
-          istopc=1        
+          istopc=1
         endif
         if (ip >= 6) then
           read(inp_unit,*) itmax
@@ -763,8 +805,27 @@ contains
         else
           irst=1
         endif
+        if (ip >= 9) then
+          read(inp_unit,*) parms%alg
+          read(inp_unit,*) parms%ilu_alg
+          read(inp_unit,*) parms%ilut_scale
+          read(inp_unit,*) parms%fill
+          read(inp_unit,*) parms%inv_fill
+          read(inp_unit,*) parms%thresh
+          read(inp_unit,*) parms%inv_thresh
+          read(inp_unit,*) parms%orth_alg
+        else
+          parms%alg =  'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH
+          parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored
+          parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM
+          parms%fill = 0     ! Level of fill for forward factorization
+          parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK)
+          parms%thresh = 1E-1_psb_spk_ ! Threshold for forward factorization
+          parms%inv_thresh = 1E-1_psb_spk_ ! Threshold for inverse factorization
+          parms%orth_alg = 'LLK'  ! What orthogonalization algorithm?
+        endif
 
-        write(psb_out_unit,'("Solving matrix       : ell1")')      
+        write(psb_out_unit,'("Solving matrix       : ell1")')
         write(psb_out_unit,'("Grid dimensions      : ",i5," x ",i5)')idim,idim
         write(psb_out_unit,'("Number of processors : ",i0)') np
         select case(ipart)
@@ -777,11 +838,40 @@ contains
           write(psb_out_unit,'("Unknown data distrbution, defaulting to 2D")')
         end select
         write(psb_out_unit,'("Preconditioner       : ",a)') ptype
+        if( psb_toupper(ptype) == "BJAC" ) then
+          write(psb_out_unit,'("Block subsolver      : ",a)') parms%alg
+          select case (psb_toupper(parms%alg))
+            case ('ILU')
+              write(psb_out_unit,'("Fill in       : ",i0)') parms%fill
+              write(psb_out_unit,'("MILU          : ",a)') parms%ilu_alg
+            case ('ILUT')
+              write(psb_out_unit,'("Fill in       : ",i0)') parms%fill
+              write(psb_out_unit,'("Threshold     : ",es12.5)') parms%thresh
+              write(psb_out_unit,'("Scaling       : ",a)') parms%ilut_scale
+            case ('INVK')
+              write(psb_out_unit,'("Fill in            : ",i0)') parms%fill
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case ('INVT')
+              write(psb_out_unit,'("Fill in            : ",i0)') parms%fill
+              write(psb_out_unit,'("Threshold          : ",es12.5)') parms%thresh
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Inverse Threshold  : ",es12.5)') parms%inv_thresh
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case ('AINV','AORTH')
+              write(psb_out_unit,'("Inverse Threshold  : ",es12.5)') parms%inv_thresh
+              write(psb_out_unit,'("Invese Fill in     : ",i0)') parms%inv_fill
+              write(psb_out_unit,'("Orthogonalization  : ",a)') parms%orth_alg
+              write(psb_out_unit,'("Scaling            : ",a)') parms%ilut_scale
+            case default
+              write(psb_out_unit,'("Unknown diagonal solver")')
+            end select
+        end if
         write(psb_out_unit,'("Iterative method     : ",a)') kmethd
         write(psb_out_unit,'(" ")')
       else
         ! wrong number of parameter, print an error message and exit
-        call pr_usage(izero)      
+        call pr_usage(izero)
         call psb_abort(ctxt)
         stop 1
       endif
@@ -800,20 +890,27 @@ contains
     call psb_bcast(ctxt,itmax)
     call psb_bcast(ctxt,itrace)
     call psb_bcast(ctxt,irst)
-
+    call psb_bcast(ctxt,parms%alg)
+    call psb_bcast(ctxt,parms%fill)
+    call psb_bcast(ctxt,parms%inv_fill)
+    call psb_bcast(ctxt,parms%thresh)
+    call psb_bcast(ctxt,parms%inv_thresh)
+    call psb_bcast(ctxt,parms%orth_alg)
+    call psb_bcast(ctxt,parms%ilut_scale)
+    
     return
 
   end subroutine get_parms
   !
-  !  print an error message 
-  !  
+  !  print an error message
+  !
   subroutine pr_usage(iout)
     integer(psb_ipk_) :: iout
     write(iout,*)'incorrect parameter(s) found'
     write(iout,*)' usage:  pde2d90 methd prec dim &
-         &[ipart istop itmax itrace]'  
+         &[ipart istop itmax itrace]'
     write(iout,*)' where:'
-    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl' 
+    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl'
     write(iout,*)'     prec :    bjac diag none'
     write(iout,*)'     dim       number of points along each axis'
     write(iout,*)'               the size of the resulting linear '
@@ -821,11 +918,9 @@ contains
     write(iout,*)'     ipart     data partition  1  3      '
     write(iout,*)'     istop     stopping criterion  1, 2  '
     write(iout,*)'     itmax     maximum number of iterations [500] '
-    write(iout,*)'     itrace    <=0  (no tracing, default) or '  
+    write(iout,*)'     itrace    <=0  (no tracing, default) or '
     write(iout,*)'               >= 1 do tracing every itrace'
-    write(iout,*)'               iterations ' 
+    write(iout,*)'               iterations '
   end subroutine pr_usage
 
 end program psb_s_pde2d
-
-
diff --git a/test/pargen/psb_s_pde3d.f90 b/test/pargen/psb_s_pde3d.f90
index 90b4d042..0bc77248 100644
--- a/test/pargen/psb_s_pde3d.f90
+++ b/test/pargen/psb_s_pde3d.f90
@@ -1,9 +1,9 @@
-!    
+!
 !                Parallel Sparse BLAS  version 3.5
 !      (C) Copyright 2006-2018
-!        Salvatore Filippone    
-!        Alfredo Buttari      
-!   
+!        Salvatore Filippone
+!        Alfredo Buttari
+!
 !    Redistribution and use in source and binary forms, with or without
 !    modification, are permitted provided that the following conditions
 !    are met:
@@ -15,7 +15,7 @@
 !      3. The name of the PSBLAS group or the names of its contributors may
 !         not be used to endorse or promote products derived from this
 !         software without specific written permission.
-!   
+!
 !    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 !    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 !    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
@@ -27,23 +27,23 @@
 !    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 !    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 !    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
+!
+!
 ! File: psb_s_pde3d.f90
 !
 ! Program: psb_s_pde3d
 ! This sample program solves a linear system obtained by discretizing a
-! PDE with Dirichlet BCs. 
-! 
+! PDE with Dirichlet BCs.
+!
 !
 ! The PDE is a general second order equation in 3d
 !
-!   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)  
+!   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)
 ! -   ------ -  ------ -  ------ +  -----  +  ------  +  ------ + c u = f
-!      dxdx     dydy       dzdz        dx       dy         dz   
+!      dxdx     dydy       dzdz        dx       dy         dz
 !
 ! with Dirichlet boundary conditions
-!   u = g 
+!   u = g
 !
 !  on the unit cube  0<=x,y,z<=1.
 !
@@ -60,37 +60,37 @@
 !
 module psb_s_pde3d_mod
 
- 
+
   use psb_base_mod, only : psb_spk_, psb_ipk_, psb_lpk_, psb_desc_type,&
        &  psb_sspmat_type, psb_s_vect_type, szero,&
        &  psb_s_base_sparse_mat, psb_s_base_vect_type, &
        &  psb_i_base_vect_type, psb_l_base_vect_type
 
-  interface 
+  interface
     function s_func_3d(x,y,z) result(val)
       import :: psb_spk_
       real(psb_spk_), intent(in) :: x,y,z
       real(psb_spk_) :: val
     end function s_func_3d
-  end interface 
+  end interface
 
   interface psb_gen_pde3d
     module procedure  psb_s_gen_pde3d
   end interface psb_gen_pde3d
-  
+
 contains
 
   function s_null_func_3d(x,y,z) result(val)
 
     real(psb_spk_), intent(in) :: x,y,z
     real(psb_spk_) :: val
-    
+
     val = szero
 
   end function s_null_func_3d
   !
-  ! functions parametrizing the differential equation 
-  !  
+  ! functions parametrizing the differential equation
+  !
 
   !
   ! Note: b1, b2 and b3 are the coefficients of the first
@@ -103,70 +103,70 @@ contains
   !
   function b1(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) :: b1
     real(psb_spk_), intent(in) :: x,y,z
     b1=szero
   end function b1
   function b2(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  b2
     real(psb_spk_), intent(in) :: x,y,z
     b2=szero
   end function b2
   function b3(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  b3
-    real(psb_spk_), intent(in) :: x,y,z      
+    real(psb_spk_), intent(in) :: x,y,z
     b3=szero
   end function b3
   function c(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  c
-    real(psb_spk_), intent(in) :: x,y,z      
+    real(psb_spk_), intent(in) :: x,y,z
     c=szero
   end function c
   function a1(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
-    real(psb_spk_) ::  a1   
+    implicit none
+    real(psb_spk_) ::  a1
     real(psb_spk_), intent(in) :: x,y,z
     a1=sone/80
   end function a1
   function a2(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  a2
     real(psb_spk_), intent(in) :: x,y,z
     a2=sone/80
   end function a2
   function a3(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  a3
     real(psb_spk_), intent(in) :: x,y,z
     a3=sone/80
   end function a3
   function g(x,y,z)
     use psb_base_mod, only : psb_spk_, sone, szero
-    implicit none 
+    implicit none
     real(psb_spk_) ::  g
     real(psb_spk_), intent(in) :: x,y,z
     g = szero
     if (x == sone) then
       g = sone
-    else if (x == szero) then 
+    else if (x == szero) then
       g = exp(y**2-z**2)
     end if
   end function g
 
-  
+
   !
   !  subroutine to allocate and fill in the coefficient matrix and
-  !  the rhs. 
+  !  the rhs.
   !
   subroutine psb_s_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
        & f,amold,vmold,imold,partition,nrl,iv)
@@ -174,13 +174,13 @@ contains
     use psb_util_mod
     !
     !   Discretizes the partial differential equation
-    ! 
-    !   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)  
+    !
+    !   a1 dd(u)  a2 dd(u)    a3 dd(u)    b1 d(u)   b2 d(u)  b3 d(u)
     ! -   ------ -  ------ -  ------ +  -----  +  ------  +  ------ + c u = f
-    !      dxdx     dydy       dzdz        dx       dy         dz   
+    !      dxdx     dydy       dzdz        dx       dy         dz
     !
     ! with Dirichlet boundary conditions
-    !   u = g 
+    !   u = g
     !
     !  on the unit cube  0<=x,y,z<=1.
     !
@@ -197,7 +197,7 @@ contains
     character(len=*)      :: afmt
     procedure(s_func_3d), optional :: f
     class(psb_s_base_sparse_mat), optional :: amold
-    class(psb_s_base_vect_type), optional :: vmold 
+    class(psb_s_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
     integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
@@ -238,7 +238,7 @@ contains
     call psb_info(ctxt, iam, np)
 
 
-    if (present(f)) then 
+    if (present(f)) then
       f_ => f
     else
       f_ => s_null_func_3d
@@ -258,10 +258,10 @@ contains
     else
       partition_ = 3
     end if
-    
+
     ! initialize array descriptor and sparse matrix storage. provide an
-    ! estimate of the number of non zeroes 
-    
+    ! estimate of the number of non zeroes
+
     m   = (1_psb_lpk_*idim)*idim*idim
     n   = m
     nnz = ((n*7)/(np))
@@ -269,8 +269,8 @@ contains
     t0 = psb_wtime()
     select case(partition_)
     case(1)
-      ! A BLOCK partition 
-      if (present(nrl)) then 
+      ! A BLOCK partition
+      if (present(nrl)) then
         nr = nrl
       else
         !
@@ -281,46 +281,46 @@ contains
       end if
 
       nt = nr
-      call psb_sum(ctxt,nt) 
-      if (nt /= m) then 
+      call psb_sum(ctxt,nt)
+      if (nt /= m) then
         write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! First example  of use of CDALL: specify for each process a number of
       ! contiguous rows
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,nl=nr)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
 
     case(2)
       ! A  partition  defined by the user through IV
-      
-      if (present(iv)) then 
+
+      if (present(iv)) then
         if (size(iv) /= m) then
           write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
           info = -1
           call psb_barrier(ctxt)
           call psb_abort(ctxt)
-          return    
+          return
         end if
       else
         write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ctxt)
         call psb_abort(ctxt)
-        return    
+        return
       end if
 
       !
       ! Second example  of use of CDALL: specify for each row the
-      ! process that owns it 
-      ! 
+      ! process that owns it
+      !
       call psb_cdall(ctxt,desc_a,info,vg=iv)
       myidx = desc_a%get_global_indices()
       nlr = size(myidx)
@@ -336,7 +336,7 @@ contains
       npz = npdims(3)
 
       allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
-      ! We can reuse idx2ijk for process indices as well. 
+      ! We can reuse idx2ijk for process indices as well.
       call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
       ! Now let's split the 3D cube in hexahedra
       call dist1Didx(bndx,idim,npx)
@@ -346,7 +346,7 @@ contains
       call dist1Didx(bndz,idim,npz)
       mynz = bndz(iamz+1)-bndz(iamz)
 
-      ! How many indices do I own? 
+      ! How many indices do I own?
       nlr = mynx*myny*mynz
       allocate(myidx(nlr))
       ! Now, let's generate the list of indices I own
@@ -370,9 +370,8 @@ contains
       !
       ! Third example  of use of CDALL: specify for each process
       ! the set of global indices it owns.
-      ! 
+      !
       call psb_cdall(ctxt,desc_a,info,vl=myidx)
-      
     case default
       write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
@@ -381,9 +380,9 @@ contains
       return
     end select
 
-    
+
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
-    ! define  rhs from boundary conditions; also build initial guess 
+    ! define  rhs from boundary conditions; also build initial guess
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
 
@@ -398,12 +397,12 @@ contains
     end if
 
     ! we build an auxiliary matrix consisting of one row at a
-    ! time; just a small matrix. might be extended to generate 
-    ! a bunch of rows per call. 
-    ! 
+    ! time; just a small matrix. might be extended to generate
+    ! a bunch of rows per call.
+    !
     allocate(val(20*nb),irow(20*nb),&
          &icol(20*nb),stat=info)
-    if (info /= psb_success_ ) then 
+    if (info /= psb_success_ ) then
       info=psb_err_alloc_dealloc_
       call psb_errpush(info,name)
       goto 9999
@@ -416,11 +415,11 @@ contains
     call psb_barrier(ctxt)
     t1 = psb_wtime()
     do ii=1, nlr,nb
-      ib = min(nb,nlr-ii+1) 
+      ib = min(nb,nlr-ii+1)
       icoeff = 1
       do k=1,ib
         i=ii+k-1
-        ! local matrix pointer 
+        ! local matrix pointer
         glob_row=myidx(i)
         ! compute gridpoint coordinates
         call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
@@ -430,11 +429,11 @@ contains
         z = (iz-1)*deltah
         zt(k) = f_(x,y,z)
         ! internal point: build discretization
-        !   
+        !
         !  term depending on   (x-1,y,z)
         !
         val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
-        if (ix == 1) then 
+        if (ix == 1) then
           zt(k) = g(szero,y,z)*(-val(icoeff)) + zt(k)
         else
           call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
@@ -443,19 +442,19 @@ contains
         endif
         !  term depending on     (x,y-1,z)
         val(icoeff)  = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
-        if (iy == 1) then 
+        if (iy == 1) then
           zt(k) = g(x,szero,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x,y,z-1)
         val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
-        if (iz == 1) then 
+        if (iz == 1) then
           zt(k) = g(x,y,szero)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
@@ -463,33 +462,33 @@ contains
         !  term depending on     (x,y,z)
         val(icoeff)=(2*sone)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
              & + c(x,y,z)
-        call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)          
+        call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
         irow(icoeff) = glob_row
-        icoeff       = icoeff+1                  
+        icoeff       = icoeff+1
         !  term depending on     (x,y,z+1)
         val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
-        if (iz == idim) then 
+        if (iz == idim) then
           zt(k) = g(x,y,sone)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x,y+1,z)
         val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
-        if (iy == idim) then 
+        if (iy == idim) then
           zt(k) = g(x,sone,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
         !  term depending on     (x+1,y,z)
         val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
-        if (ix==idim) then 
+        if (ix==idim) then
           zt(k) = g(sone,y,z)*(-val(icoeff))   + zt(k)
         else
-          call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)          
+          call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
           irow(icoeff) = glob_row
           icoeff       = icoeff+1
         endif
@@ -520,8 +519,8 @@ contains
     tcdasb = psb_wtime()-t1
     call psb_barrier(ctxt)
     t1 = psb_wtime()
-    if (info == psb_success_) then 
-      if (present(amold)) then 
+    if (info == psb_success_) then
+      if (present(amold)) then
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,mold=amold)
       else
         call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=afmt)
@@ -544,7 +543,7 @@ contains
     end if
     tasb = psb_wtime()-t1
     call psb_barrier(ctxt)
-    ttot = psb_wtime() - t0 
+    ttot = psb_wtime() - t0
 
     call psb_amx(ctxt,talc)
     call psb_amx(ctxt,tgen)
@@ -586,9 +585,9 @@ program psb_s_pde3d
   integer(psb_ipk_) :: idim
   integer(psb_epk_) :: system_size
 
-  ! miscellaneous 
+  ! miscellaneous
   real(psb_spk_), parameter :: one = sone
-  real(psb_dpk_) :: t1, t2, tprec 
+  real(psb_dpk_) :: t1, t2, tprec
 
   ! sparse matrix and preconditioner
   type(psb_sspmat_type) :: a
@@ -606,6 +605,14 @@ program psb_s_pde3d
   integer(psb_epk_) :: amatsize, precsize, descsize, d2size
   real(psb_spk_)   :: err, eps
 
+  ! Parameters for solvers in Block-Jacobi preconditioner
+  type ainvparms
+    character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale
+    integer(psb_ipk_) :: fill, inv_fill
+    real(psb_spk_)    :: thresh, inv_thresh
+  end type ainvparms
+  type(ainvparms)     :: parms
+
   ! other variables
   integer(psb_ipk_) :: info, i
   character(len=20) :: name,ch_err
@@ -617,7 +624,7 @@ program psb_s_pde3d
   call psb_init(ctxt)
   call psb_info(ctxt,iam,np)
 
-  if (iam < 0) then 
+  if (iam < 0) then
     ! This should not happen, but just in case
     call psb_exit(ctxt)
     stop
@@ -628,21 +635,20 @@ program psb_s_pde3d
   !
   ! Hello world
   !
-  if (iam == psb_root_) then 
+  if (iam == psb_root_) then
     write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
     write(*,*) 'This is the ',trim(name),' sample program'
   end if
   !
   !  get parameters
   !
-  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
-
+  call get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart,parms)
   !
-  !  allocate and fill in the coefficient matrix, rhs and initial guess 
+  !  allocate and fill in the coefficient matrix, rhs and initial guess
   !
   call psb_barrier(ctxt)
   t1 = psb_wtime()
-  call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)  
+  call psb_gen_pde3d(ctxt,idim,a,bv,xxv,desc_a,afmt,info,partition=ipart)
   call psb_barrier(ctxt)
   t2 = psb_wtime() - t1
   if(info /= psb_success_) then
@@ -655,9 +661,43 @@ program psb_s_pde3d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   !
   !  prepare the preconditioner.
-  !  
+  !
   if(iam == psb_root_) write(psb_out_unit,'("Setting preconditioner to : ",a)')ptype
   call prec%init(ctxt,ptype,info)
+  !
+  ! Set the options for the BJAC preconditioner
+  !
+  if (psb_toupper(ptype) == "BJAC") then
+      call prec%set('sub_solve',       parms%alg,   info)
+      select case (psb_toupper(parms%alg))
+      case ("ILU")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('ilu_alg',         parms%ilu_alg,    info)
+      case ("ILUT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("AINV")
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+        call prec%set('ainv_alg',        parms%orth_alg,   info)
+      case ("INVK")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case ("INVT")
+        call prec%set('sub_fillin',      parms%fill,       info)
+        call prec%set('inv_fillin',      parms%inv_fill,   info)
+        call prec%set('sub_iluthrs',     parms%thresh,     info)
+        call prec%set('inv_thresh',      parms%inv_thresh, info)
+        call prec%set('ilut_scale',      parms%ilut_scale, info)
+      case default
+        ! Do nothing, use default setting in the init routine
+      end select
+  else
+    ! nothing to set for NONE or DIAG preconditioner
+  end if
 
   call psb_barrier(ctxt)
   t1 = psb_wtime()
@@ -677,14 +717,14 @@ program psb_s_pde3d
   if (iam == psb_root_) write(psb_out_unit,'(" ")')
   call prec%descr()
   !
-  ! iterative method parameters 
+  ! iterative method parameters
   !
   if(iam == psb_root_) write(psb_out_unit,'("Calling iterative method ",a)')kmethd
   call psb_barrier(ctxt)
-  t1 = psb_wtime()  
+  t1 = psb_wtime()
   eps   = 1.d-6
-  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,& 
-       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)     
+  call psb_krylov(kmethd,a,prec,bv,xxv,eps,desc_a,info,&
+       & itmax=itmax,iter=iter,err=err,itrace=itrace,istop=istopc,irst=irst)
 
   if(info /= psb_success_) then
     info=psb_err_from_subroutine_
@@ -714,14 +754,14 @@ program psb_s_pde3d
     write(psb_out_unit,'("Convergence indicator on exit : ",es12.5)')err
     write(psb_out_unit,'("Info  on exit                 : ",i12)')info
     write(psb_out_unit,'("Total memory occupation for      A: ",i12)')amatsize
-    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize    
+    write(psb_out_unit,'("Total memory occupation for   PREC: ",i12)')precsize
     write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
     write(psb_out_unit,'("Storage format for               A: ",a)') a%get_fmt()
     write(psb_out_unit,'("Storage format for          DESC_A: ",a)') desc_a%get_fmt()
   end if
 
 
-  !  
+  !
   !  cleanup storage and exit
   !
   call psb_gefree(bv,desc_a,info)
@@ -747,13 +787,15 @@ contains
   !
   ! get iteration parameters from standard input
   !
-  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,itmax,itrace,irst,ipart)
+  subroutine  get_parms(ctxt,kmethd,ptype,afmt,idim,istopc,&
+       & itmax,itrace,irst,ipart,parms)
     type(psb_ctxt_type) :: ctxt
     character(len=*) :: kmethd, ptype, afmt
     integer(psb_ipk_) :: idim, istopc,itmax,itrace,irst,ipart
     integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: ip, inp_unit
     character(len=1024)   :: filename
+    type(ainvparms)   :: parms
 
     call psb_info(ctxt, iam, np)
 
@@ -782,12 +824,12 @@ contains
         if (ip >= 4) then
           read(inp_unit,*) ipart
         else
-          ipart = 3        
+          ipart = 3
         endif
         if (ip >= 5) then
           read(inp_unit,*) istopc
         else
-          istopc=1        
+          istopc=1
         endif
         if (ip >= 6) then
           read(inp_unit,*) itmax
@@ -804,8 +846,27 @@ contains
         else
           irst=1
         endif
+        if (ip >= 9) then
+          read(inp_unit,*) parms%alg
+          read(inp_unit,*) parms%ilu_alg
+          read(inp_unit,*) parms%ilut_scale
+          read(inp_unit,*) parms%fill
+          read(inp_unit,*) parms%inv_fill
+          read(inp_unit,*) parms%thresh
+          read(inp_unit,*) parms%inv_thresh
+          read(inp_unit,*) parms%orth_alg
+        else
+          parms%alg =  'ILU' ! Block Solver ILU,ILUT,INVK,AINVT,AORTH
+          parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE othewise ignored
+          parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM
+          parms%fill = 0     ! Level of fill for forward factorization
+          parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK)
+          parms%thresh = 1E-1_psb_spk_ ! Threshold for forward factorization
+          parms%inv_thresh = 1E-1_psb_spk_ ! Threshold for inverse factorization
+          parms%orth_alg = 'LLK'  ! What orthogonalization algorithm?
+        endif
 
-        write(psb_out_unit,'("Solving matrix       : ell1")')      
+        write(psb_out_unit,'("Solving matrix       : ell1")')
         write(psb_out_unit,&
              & '("Grid dimensions      : ",i4," x ",i4," x ",i4)') &
              & idim,idim,idim
@@ -820,11 +881,40 @@ contains
           write(psb_out_unit,'("Unknown data distrbution, defaulting to 3D")')
         end select
         write(psb_out_unit,'("Preconditioner       : ",a)') ptype
+        if( psb_toupper(ptype) == "BJAC" ) then
+          write(psb_out_unit,'("Block subsolver      : ",a)') parms%alg
+          select case (psb_toupper(parms%alg))
+          case ('ILU')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("MILU                 : ",a)') parms%ilu_alg
+          case ('ILUT')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Threshold            : ",es12.5)') parms%thresh
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('INVK')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('INVT')
+            write(psb_out_unit,'("Fill in              : ",i0)') parms%fill
+            write(psb_out_unit,'("Threshold            : ",es12.5)') parms%thresh
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Inverse Threshold    : ",es12.5)') parms%inv_thresh
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case ('AINV','AORTH')
+            write(psb_out_unit,'("Inverse Threshold    : ",es12.5)') parms%inv_thresh
+            write(psb_out_unit,'("Invese Fill in       : ",i0)') parms%inv_fill
+            write(psb_out_unit,'("Orthogonalization    : ",a)') parms%orth_alg
+            write(psb_out_unit,'("Scaling              : ",a)') parms%ilut_scale
+          case default
+              write(psb_out_unit,'("Unknown diagonal solver")')
+          end select
+        end if
         write(psb_out_unit,'("Iterative method     : ",a)') kmethd
         write(psb_out_unit,'(" ")')
       else
         ! wrong number of parameter, print an error message and exit
-        call pr_usage(izero)      
+        call pr_usage(izero)
         call psb_abort(ctxt)
         stop 1
       endif
@@ -843,20 +933,27 @@ contains
     call psb_bcast(ctxt,itmax)
     call psb_bcast(ctxt,itrace)
     call psb_bcast(ctxt,irst)
+    call psb_bcast(ctxt,parms%alg)
+    call psb_bcast(ctxt,parms%fill)
+    call psb_bcast(ctxt,parms%inv_fill)
+    call psb_bcast(ctxt,parms%thresh)
+    call psb_bcast(ctxt,parms%inv_thresh)
+    call psb_bcast(ctxt,parms%orth_alg)
+    call psb_bcast(ctxt,parms%ilut_scale)
 
     return
 
   end subroutine get_parms
   !
-  !  print an error message 
-  !  
+  !  print an error message
+  !
   subroutine pr_usage(iout)
     integer(psb_ipk_) :: iout
     write(iout,*)'incorrect parameter(s) found'
     write(iout,*)' usage:  pde3d90 methd prec dim &
-         &[istop itmax itrace]'  
+         &[istop itmax itrace]'
     write(iout,*)' where:'
-    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl' 
+    write(iout,*)'     methd:    cgstab cgs rgmres bicgstabl'
     write(iout,*)'     prec :    bjac diag none'
     write(iout,*)'     dim       number of points along each axis'
     write(iout,*)'               the size of the resulting linear '
@@ -864,11 +961,9 @@ contains
     write(iout,*)'     ipart     data partition  1  3      '
     write(iout,*)'     istop     stopping criterion  1, 2  '
     write(iout,*)'     itmax     maximum number of iterations [500] '
-    write(iout,*)'     itrace    <=0  (no tracing, default) or '  
+    write(iout,*)'     itrace    <=0  (no tracing, default) or '
     write(iout,*)'               >= 1 do tracing every itrace'
-    write(iout,*)'               iterations ' 
+    write(iout,*)'               iterations '
   end subroutine pr_usage
 
 end program psb_s_pde3d
-
-
diff --git a/test/pargen/runs/ppde.inp b/test/pargen/runs/ppde.inp
index f6fe33eb..f4b45430 100644
--- a/test/pargen/runs/ppde.inp
+++ b/test/pargen/runs/ppde.inp
@@ -1,4 +1,4 @@
-8                      Number of entries below this
+17                     Number of entries below this
 BICGSTAB               Iterative method BICGSTAB CGS  BICG BICGSTABL RGMRES FCG CGR
 BJAC                   Preconditioner NONE  DIAG  BJAC 
 CSR                    Storage format for matrix A:  CSR COO 
@@ -8,5 +8,11 @@ CSR                    Storage format for matrix A:  CSR COO
 0100                   MAXIT
 05                     ITRACE
 002                    IRST    restart for RGMRES  and BiCGSTABL
-
-
+ILU                    Block Solver ILU,ILUT,INVK,AINVT,AORTH
+NONE                   If ILU : MILU or NONE othewise ignored
+NONE                   Scaling if ILUT: NONE, MAXVAL otherwise ignored
+0                      Level of fill for forward factorization
+1                      Level of fill for inverse factorization (only INVK)
+1E-1                   Threshold for forward factorization
+1E-1                   Threshold for inverse factorization (Only INVK, AINVT)
+LLK                    What orthogonalization algorithm? (Only AINVT)
diff --git a/util/Makefile b/util/Makefile
index 9809a34f..4f4a134a 100644
--- a/util/Makefile
+++ b/util/Makefile
@@ -10,7 +10,8 @@ HERE=.
 BASEOBJS= psb_blockpart_mod.o psb_metispart_mod.o psb_partidx_mod.o \
       psb_hbio_mod.o psb_mmio_mod.o psb_mat_dist_mod.o \
       psb_s_mat_dist_mod.o psb_d_mat_dist_mod.o psb_c_mat_dist_mod.o psb_z_mat_dist_mod.o \
-      psb_renum_mod.o psb_gps_mod.o
+      psb_renum_mod.o psb_gps_mod.o \
+      psb_s_renum_mod.o psb_d_renum_mod.o psb_c_renum_mod.o psb_z_renum_mod.o 
 IMPLOBJS= psb_s_hbio_impl.o psb_d_hbio_impl.o \
 	psb_c_hbio_impl.o psb_z_hbio_impl.o \
 	psb_s_mmio_impl.o psb_d_mmio_impl.o \
@@ -40,6 +41,7 @@ $(OBJS): $(MODDIR)/$(BASEMODNAME)$(.mod)
 psb_util_mod.o: $(BASEOBJS)
 psb_metispart_mod.o: psb_metis_int.o
 psb_mat_dist_mod.o:  psb_s_mat_dist_mod.o psb_d_mat_dist_mod.o psb_c_mat_dist_mod.o psb_z_mat_dist_mod.o 
+psb_renum_mod.o: psb_s_renum_mod.o psb_d_renum_mod.o psb_c_renum_mod.o psb_z_renum_mod.o
 
 $(IMPLOBJS): $(BASEOBJS)
 
diff --git a/util/psb_c_renum_impl.F90 b/util/psb_c_renum_impl.F90
index 4a1cf220..f57003a1 100644
--- a/util/psb_c_renum_impl.F90
+++ b/util/psb_c_renum_impl.F90
@@ -29,37 +29,61 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-subroutine psb_c_mat_renums(alg,mat,info,perm)
+subroutine psb_c_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_c_mat_renums
+  use psb_renum_mod, psb_protect_name => psb_c_mat_renum
   implicit none 
   character(len=*), intent(in) :: alg
   type(psb_cspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
   integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, ialg
+  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
+  nr = mat%get_nrows()
+  nc = mat%get_ncols()
+  if (nr /= nc) then 
+    info = psb_err_rectangular_mat_unsupported_
+    ierr(1) = nr; ierr(2) = nc;
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999
+  end if
+
   info = psb_success_
   select case (psb_toupper(alg))
   case ('GPS')
-    ialg = psb_mat_renum_gps_
-  case ('AMD')
-    ialg = psb_mat_renum_amd_
+
+    call psb_mat_renum_gps(mat,info,perm)
+
+  case('AMD') 
+
+    call psb_mat_renum_amd(mat,info,perm)
+
   case ('NONE', 'ID') 
-    ialg = psb_mat_renum_identity_
+    nr = mat%get_nrows()
+    allocate(perm(nr),stat=info) 
+    if (info == 0) then 
+      do i=1,nr
+        perm(i) = i
+      end do
+    else
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
   case default
     write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
-    ialg = -1
+    info = psb_err_input_value_invalid_i_
+    ierr(1) = 1; 
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999 
   end select
-
-  call psb_mat_renum(ialg,mat,info,perm)
-
+  
   if (info /= psb_success_) then 
     info = psb_err_from_subroutine_non_
     call psb_errpush(info,name)
@@ -71,26 +95,219 @@ subroutine psb_c_mat_renums(alg,mat,info,perm)
 
 9999 call psb_error_handler(err_act)
   return
-end subroutine psb_c_mat_renums
-  
-subroutine psb_c_mat_renum(alg,mat,info,perm)
+
+contains
+
+  subroutine psb_mat_renum_gps(a,info,operm)
+    use psb_base_mod
+    use psb_gps_mod
+    implicit none 
+    type(psb_cspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+    class(psb_c_base_sparse_mat), allocatable :: aa
+    type(psb_c_csr_sparse_mat)  :: acsr
+    type(psb_c_coo_sparse_mat)  :: acoo
+    
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+
+    info = psb_success_
+    name = 'mat_renum_gps'
+    call psb_erractionsave(err_act)
+
+    info = psb_success_
+
+    call a%mold(aa)
+    call a%mv_to(aa)
+    call aa%mv_to_fmt(acsr,info)
+    ! Insert call to gps_reduce
+    nr   = acsr%get_nrows()
+    ideg = 0
+    do i=1, nr
+      ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i))
+    end do
+    allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info, name) 
+      goto 9999
+    end if
+    do i=1, nr
+      iold(i) = i 
+      ndstk(i,:) = 0
+      k  = 0
+      do j=acsr%irp(i),acsr%irp(i+1)-1
+        k = k + 1
+        ndstk(i,k) = acsr%ja(j)
+      end do
+    end do
+    perm = 0
+
+    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+
+    if (.not.psb_isaperm(nr,perm)) then 
+      write(0,*) 'Something wrong: bad perm from gps_reduce'
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+    ! Move to coordinate to apply renumbering
+    call acsr%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa)
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+
+  end subroutine psb_mat_renum_gps
+
+
+  subroutine psb_mat_renum_amd(a,info,operm)
+#if defined(HAVE_AMD) 
+    use iso_c_binding
+#endif
+    use psb_base_mod    
+    implicit none 
+    type(psb_cspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+#if defined(HAVE_AMD) 
+    interface 
+      function psb_amd_order(n,ap,ai,p)&
+           & result(res) bind(c,name='psb_amd_order')
+        use iso_c_binding
+        integer(c_int) :: res
+        integer(c_int), value :: n
+        integer(c_int) :: ap(*), ai(*), p(*)
+      end function psb_amd_order
+    end interface
+#endif
+
+    type(psb_c_csc_sparse_mat)  :: acsc
+    class(psb_c_base_sparse_mat), allocatable :: aa
+    type(psb_c_coo_sparse_mat)  :: acoo
+
+    integer(psb_ipk_), allocatable :: perm(:) 
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+
+    info = psb_success_
+    name = 'mat_renum_amd'
+    call psb_erractionsave(err_act)
+
+#if defined(HAVE_AMD)  && defined(IPK4)
+
+    info = psb_success_
+    nr   = a%get_nrows()
+    nz   = a%get_nzeros()
+    allocate(perm(nr),stat=info)
+    if (info /= 0) then 
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+
+
+    allocate(aa, mold=a%a)
+    call a%mv_to(acsc)
+
+    acsc%ia(:)  = acsc%ia(:) - 1
+    acsc%icp(:) = acsc%icp(:) - 1
+
+    info = psb_amd_order(nr,acsc%icp,acsc%ia,perm)
+    if (info /= psb_success_) then 
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_amd_order') 
+      goto 9999
+    end if
+
+    perm(:)     = perm(:) + 1
+    acsc%ia(:)  = acsc%ia(:) + 1
+    acsc%icp(:) = acsc%icp(:) + 1
+
+    call acsc%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa,perm)
+
+#else 
+
+    info = psb_err_missing_aux_lib_
+    call psb_errpush(info, name) 
+    goto 9999
+#endif
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+  end subroutine psb_mat_renum_amd
+
+end subroutine psb_c_mat_renum
+
+subroutine psb_lc_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_c_mat_renum
+  use psb_renum_mod, psb_protect_name => psb_lc_mat_renum
   implicit none 
-  integer(psb_ipk_), intent(in) :: alg
-  type(psb_cspmat_type), intent(inout) :: mat
+  character(len=*), intent(in) :: alg
+  type(psb_lcspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
-  integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+  integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
+  integer(psb_lpk_) :: nr, nc, i
+  integer(psb_ipk_) :: err_act, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
-  info = psb_success_
-  
   nr = mat%get_nrows()
   nc = mat%get_ncols()
   if (nr /= nc) then 
@@ -100,16 +317,17 @@ subroutine psb_c_mat_renum(alg,mat,info,perm)
     goto 9999
   end if
 
-  select case (alg)
-  case(psb_mat_renum_gps_) 
+  info = psb_success_
+  select case (psb_toupper(alg))
+  case ('GPS')
 
-    call psb_mat_renum_gps(mat,info,perm)
+    call psb_lmat_renum_gps(mat,info,perm)
 
-  case(psb_mat_renum_amd_) 
+  case('AMD') 
 
-    call psb_mat_renum_amd(mat,info,perm)
+    call psb_lmat_renum_amd(mat,info,perm)
 
-  case(psb_mat_renum_identity_) 
+  case ('NONE', 'ID') 
     nr = mat%get_nrows()
     allocate(perm(nr),stat=info) 
     if (info == 0) then 
@@ -122,8 +340,9 @@ subroutine psb_c_mat_renum(alg,mat,info,perm)
       goto 9999
     endif
   case default
+    write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
     info = psb_err_input_value_invalid_i_
-    ierr(1) = 1; ierr(2) = alg;
+    ierr(1) = 1; 
     call psb_errpush(info,name,i_err=ierr)
     goto 9999 
   end select
@@ -142,26 +361,26 @@ subroutine psb_c_mat_renum(alg,mat,info,perm)
 
 contains
 
-  subroutine psb_mat_renum_gps(a,info,operm)
+  subroutine psb_lmat_renum_gps(a,info,operm)
     use psb_base_mod
-    use psb_gps_mod
+    use psb_lgps_mod
     implicit none 
-    type(psb_cspmat_type), intent(inout) :: a
+    type(psb_lcspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
-    class(psb_c_base_sparse_mat), allocatable :: aa
-    type(psb_c_csr_sparse_mat)  :: acsr
-    type(psb_c_coo_sparse_mat)  :: acoo
+    class(psb_lc_base_sparse_mat), allocatable :: aa
+    type(psb_lc_csr_sparse_mat)  :: acsr
+    type(psb_lc_coo_sparse_mat)  :: acoo
     
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+    integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
 
     info = psb_success_
-    name = 'mat_renum'
+    name = 'mat_renum_gps'
     call psb_erractionsave(err_act)
 
     info = psb_success_
@@ -192,7 +411,7 @@ contains
     end do
     perm = 0
 
-    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+    call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
 
     if (.not.psb_isaperm(nr,perm)) then 
       write(0,*) 'Something wrong: bad perm from gps_reduce'
@@ -228,18 +447,18 @@ contains
 9999 call psb_error_handler(err_act)
     return
 
-  end subroutine psb_mat_renum_gps
+  end subroutine psb_lmat_renum_gps
 
 
-  subroutine psb_mat_renum_amd(a,info,operm)
+  subroutine psb_lmat_renum_amd(a,info,operm)
 #if defined(HAVE_AMD) 
     use iso_c_binding
 #endif
     use psb_base_mod    
     implicit none 
-    type(psb_cspmat_type), intent(inout) :: a
+    type(psb_lcspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
 #if defined(HAVE_AMD) 
@@ -254,20 +473,20 @@ contains
     end interface
 #endif
 
-    type(psb_c_csc_sparse_mat)  :: acsc
-    class(psb_c_base_sparse_mat), allocatable :: aa
-    type(psb_c_coo_sparse_mat)  :: acoo
+    type(psb_lc_csc_sparse_mat)  :: acsc
+    class(psb_lc_base_sparse_mat), allocatable :: aa
+    type(psb_lc_coo_sparse_mat)  :: acoo
 
     integer(psb_ipk_), allocatable :: perm(:) 
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
 
     info = psb_success_
     name = 'mat_renum_amd'
     call psb_erractionsave(err_act)
 
-#if defined(HAVE_AMD) && defined(IPK4)
+#if defined(HAVE_AMD)  && defined(LPK4)
 
     info = psb_success_
     nr   = a%get_nrows()
@@ -331,11 +550,9 @@ contains
 
 9999 call psb_error_handler(err_act)
     return
+  end subroutine psb_lmat_renum_amd
 
-  end subroutine psb_mat_renum_amd
-
-end subroutine psb_c_mat_renum
-
+end subroutine psb_lc_mat_renum
 
 subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info)
   use psb_base_mod
@@ -386,3 +603,52 @@ subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info)
   end select
   
 end subroutine psb_c_cmp_bwpf
+
+subroutine psb_lc_cmp_bwpf(mat,bwl,bwu,prf,info)
+  use psb_base_mod
+  use psb_renum_mod, psb_protect_name => psb_lc_cmp_bwpf
+  implicit none 
+  type(psb_lcspmat_type), intent(in) :: mat
+  integer(psb_lpk_), intent(out) :: bwl, bwu
+  integer(psb_lpk_), intent(out) :: prf
+  integer(psb_ipk_), intent(out) :: info
+  !
+  integer(psb_lpk_), allocatable :: irow(:), icol(:)
+  complex(psb_spk_), allocatable :: val(:)
+  integer(psb_lpk_) :: nz, i, j, lrbu, lrbl
+  
+  info = psb_success_
+  bwl = 0
+  bwu = 0
+  prf = 0
+  select type (aa=>mat%a)
+  class is (psb_lc_csr_sparse_mat)
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      do j = aa%irp(i), aa%irp(i+1) - 1
+        lrbl = max(lrbl,i-aa%ja(j))
+        lrbu = max(lrbu,aa%ja(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+    
+  class default
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      call aa%csget(i,i,nz,irow,icol,val,info)
+      if (info /= psb_success_) return
+      do j=1, nz
+        lrbl = max(lrbl,i-icol(j))
+        lrbu = max(lrbu,icol(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+  end select
+  
+end subroutine psb_lc_cmp_bwpf
diff --git a/util/psb_c_renum_mod.f90 b/util/psb_c_renum_mod.f90
new file mode 100644
index 00000000..11335484
--- /dev/null
+++ b/util/psb_c_renum_mod.f90
@@ -0,0 +1,69 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+module psb_c_renum_mod
+  use psb_base_mod
+  
+  interface psb_mat_renum
+    subroutine psb_c_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_cspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_cspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_c_mat_renum
+    subroutine psb_lc_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_lpk_, psb_lcspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_lcspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_lc_mat_renum
+  end interface psb_mat_renum
+  
+  interface psb_cmp_bwpf
+    subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_cspmat_type
+      type(psb_cspmat_type), intent(in) :: mat
+      integer(psb_ipk_), intent(out) :: bwl, bwu
+      integer(psb_ipk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_c_cmp_bwpf
+    subroutine psb_lc_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_lpk_, psb_lcspmat_type
+      type(psb_lcspmat_type), intent(in) :: mat
+      integer(psb_lpk_), intent(out) :: bwl, bwu
+      integer(psb_lpk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_lc_cmp_bwpf
+  end interface psb_cmp_bwpf
+
+end module psb_c_renum_mod
diff --git a/util/psb_d_renum_impl.F90 b/util/psb_d_renum_impl.F90
index bd4664d8..1c7928f5 100644
--- a/util/psb_d_renum_impl.F90
+++ b/util/psb_d_renum_impl.F90
@@ -29,37 +29,61 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-subroutine psb_d_mat_renums(alg,mat,info,perm)
+subroutine psb_d_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_d_mat_renums
+  use psb_renum_mod, psb_protect_name => psb_d_mat_renum
   implicit none 
   character(len=*), intent(in) :: alg
   type(psb_dspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
   integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, ialg
+  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
+  nr = mat%get_nrows()
+  nc = mat%get_ncols()
+  if (nr /= nc) then 
+    info = psb_err_rectangular_mat_unsupported_
+    ierr(1) = nr; ierr(2) = nc;
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999
+  end if
+
   info = psb_success_
   select case (psb_toupper(alg))
   case ('GPS')
-    ialg = psb_mat_renum_gps_
-  case ('AMD')
-    ialg = psb_mat_renum_amd_
+
+    call psb_mat_renum_gps(mat,info,perm)
+
+  case('AMD') 
+
+    call psb_mat_renum_amd(mat,info,perm)
+
   case ('NONE', 'ID') 
-    ialg = psb_mat_renum_identity_
+    nr = mat%get_nrows()
+    allocate(perm(nr),stat=info) 
+    if (info == 0) then 
+      do i=1,nr
+        perm(i) = i
+      end do
+    else
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
   case default
     write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
-    ialg = -1
+    info = psb_err_input_value_invalid_i_
+    ierr(1) = 1; 
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999 
   end select
-
-  call psb_mat_renum(ialg,mat,info,perm)
-
+  
   if (info /= psb_success_) then 
     info = psb_err_from_subroutine_non_
     call psb_errpush(info,name)
@@ -71,26 +95,219 @@ subroutine psb_d_mat_renums(alg,mat,info,perm)
 
 9999 call psb_error_handler(err_act)
   return
-end subroutine psb_d_mat_renums
-  
-subroutine psb_d_mat_renum(alg,mat,info,perm)
+
+contains
+
+  subroutine psb_mat_renum_gps(a,info,operm)
+    use psb_base_mod
+    use psb_gps_mod
+    implicit none 
+    type(psb_dspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+    class(psb_d_base_sparse_mat), allocatable :: aa
+    type(psb_d_csr_sparse_mat)  :: acsr
+    type(psb_d_coo_sparse_mat)  :: acoo
+    
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+
+    info = psb_success_
+    name = 'mat_renum_gps'
+    call psb_erractionsave(err_act)
+
+    info = psb_success_
+
+    call a%mold(aa)
+    call a%mv_to(aa)
+    call aa%mv_to_fmt(acsr,info)
+    ! Insert call to gps_reduce
+    nr   = acsr%get_nrows()
+    ideg = 0
+    do i=1, nr
+      ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i))
+    end do
+    allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info, name) 
+      goto 9999
+    end if
+    do i=1, nr
+      iold(i) = i 
+      ndstk(i,:) = 0
+      k  = 0
+      do j=acsr%irp(i),acsr%irp(i+1)-1
+        k = k + 1
+        ndstk(i,k) = acsr%ja(j)
+      end do
+    end do
+    perm = 0
+
+    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+
+    if (.not.psb_isaperm(nr,perm)) then 
+      write(0,*) 'Something wrong: bad perm from gps_reduce'
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+    ! Move to coordinate to apply renumbering
+    call acsr%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa)
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+
+  end subroutine psb_mat_renum_gps
+
+
+  subroutine psb_mat_renum_amd(a,info,operm)
+#if defined(HAVE_AMD) 
+    use iso_c_binding
+#endif
+    use psb_base_mod    
+    implicit none 
+    type(psb_dspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+#if defined(HAVE_AMD) 
+    interface 
+      function psb_amd_order(n,ap,ai,p)&
+           & result(res) bind(c,name='psb_amd_order')
+        use iso_c_binding
+        integer(c_int) :: res
+        integer(c_int), value :: n
+        integer(c_int) :: ap(*), ai(*), p(*)
+      end function psb_amd_order
+    end interface
+#endif
+
+    type(psb_d_csc_sparse_mat)  :: acsc
+    class(psb_d_base_sparse_mat), allocatable :: aa
+    type(psb_d_coo_sparse_mat)  :: acoo
+
+    integer(psb_ipk_), allocatable :: perm(:) 
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+
+    info = psb_success_
+    name = 'mat_renum_amd'
+    call psb_erractionsave(err_act)
+
+#if defined(HAVE_AMD)  && defined(IPK4)
+
+    info = psb_success_
+    nr   = a%get_nrows()
+    nz   = a%get_nzeros()
+    allocate(perm(nr),stat=info)
+    if (info /= 0) then 
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+
+
+    allocate(aa, mold=a%a)
+    call a%mv_to(acsc)
+
+    acsc%ia(:)  = acsc%ia(:) - 1
+    acsc%icp(:) = acsc%icp(:) - 1
+
+    info = psb_amd_order(nr,acsc%icp,acsc%ia,perm)
+    if (info /= psb_success_) then 
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_amd_order') 
+      goto 9999
+    end if
+
+    perm(:)     = perm(:) + 1
+    acsc%ia(:)  = acsc%ia(:) + 1
+    acsc%icp(:) = acsc%icp(:) + 1
+
+    call acsc%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa,perm)
+
+#else 
+
+    info = psb_err_missing_aux_lib_
+    call psb_errpush(info, name) 
+    goto 9999
+#endif
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+  end subroutine psb_mat_renum_amd
+
+end subroutine psb_d_mat_renum
+
+subroutine psb_ld_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_d_mat_renum
+  use psb_renum_mod, psb_protect_name => psb_ld_mat_renum
   implicit none 
-  integer(psb_ipk_), intent(in) :: alg
-  type(psb_dspmat_type), intent(inout) :: mat
+  character(len=*), intent(in) :: alg
+  type(psb_ldspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
-  integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+  integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
+  integer(psb_lpk_) :: nr, nc, i
+  integer(psb_ipk_) :: err_act, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
-  info = psb_success_
-  
   nr = mat%get_nrows()
   nc = mat%get_ncols()
   if (nr /= nc) then 
@@ -100,16 +317,17 @@ subroutine psb_d_mat_renum(alg,mat,info,perm)
     goto 9999
   end if
 
-  select case (alg)
-  case(psb_mat_renum_gps_) 
+  info = psb_success_
+  select case (psb_toupper(alg))
+  case ('GPS')
 
-    call psb_mat_renum_gps(mat,info,perm)
+    call psb_lmat_renum_gps(mat,info,perm)
 
-  case(psb_mat_renum_amd_) 
+  case('AMD') 
 
-    call psb_mat_renum_amd(mat,info,perm)
+    call psb_lmat_renum_amd(mat,info,perm)
 
-  case(psb_mat_renum_identity_) 
+  case ('NONE', 'ID') 
     nr = mat%get_nrows()
     allocate(perm(nr),stat=info) 
     if (info == 0) then 
@@ -122,8 +340,9 @@ subroutine psb_d_mat_renum(alg,mat,info,perm)
       goto 9999
     endif
   case default
+    write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
     info = psb_err_input_value_invalid_i_
-    ierr(1) = 1; ierr(2) = alg;
+    ierr(1) = 1; 
     call psb_errpush(info,name,i_err=ierr)
     goto 9999 
   end select
@@ -142,23 +361,23 @@ subroutine psb_d_mat_renum(alg,mat,info,perm)
 
 contains
 
-  subroutine psb_mat_renum_gps(a,info,operm)
+  subroutine psb_lmat_renum_gps(a,info,operm)
     use psb_base_mod
-    use psb_gps_mod
+    use psb_lgps_mod
     implicit none 
-    type(psb_dspmat_type), intent(inout) :: a
+    type(psb_ldspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
-    class(psb_d_base_sparse_mat), allocatable :: aa
-    type(psb_d_csr_sparse_mat)  :: acsr
-    type(psb_d_coo_sparse_mat)  :: acoo
+    class(psb_ld_base_sparse_mat), allocatable :: aa
+    type(psb_ld_csr_sparse_mat)  :: acsr
+    type(psb_ld_coo_sparse_mat)  :: acoo
     
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+    integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
 
     info = psb_success_
     name = 'mat_renum_gps'
@@ -192,7 +411,7 @@ contains
     end do
     perm = 0
 
-    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+    call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
 
     if (.not.psb_isaperm(nr,perm)) then 
       write(0,*) 'Something wrong: bad perm from gps_reduce'
@@ -228,18 +447,18 @@ contains
 9999 call psb_error_handler(err_act)
     return
 
-  end subroutine psb_mat_renum_gps
+  end subroutine psb_lmat_renum_gps
 
 
-  subroutine psb_mat_renum_amd(a,info,operm)
+  subroutine psb_lmat_renum_amd(a,info,operm)
 #if defined(HAVE_AMD) 
     use iso_c_binding
 #endif
     use psb_base_mod    
     implicit none 
-    type(psb_dspmat_type), intent(inout) :: a
+    type(psb_ldspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
 #if defined(HAVE_AMD) 
@@ -254,20 +473,20 @@ contains
     end interface
 #endif
 
-    type(psb_d_csc_sparse_mat)  :: acsc
-    class(psb_d_base_sparse_mat), allocatable :: aa
-    type(psb_d_coo_sparse_mat)  :: acoo
+    type(psb_ld_csc_sparse_mat)  :: acsc
+    class(psb_ld_base_sparse_mat), allocatable :: aa
+    type(psb_ld_coo_sparse_mat)  :: acoo
 
     integer(psb_ipk_), allocatable :: perm(:) 
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
 
     info = psb_success_
     name = 'mat_renum_amd'
     call psb_erractionsave(err_act)
 
-#if defined(HAVE_AMD)  && defined(IPK4)
+#if defined(HAVE_AMD)  && defined(LPK4)
 
     info = psb_success_
     nr   = a%get_nrows()
@@ -331,10 +550,9 @@ contains
 
 9999 call psb_error_handler(err_act)
     return
-  end subroutine psb_mat_renum_amd
-
-end subroutine psb_d_mat_renum
+  end subroutine psb_lmat_renum_amd
 
+end subroutine psb_ld_mat_renum
 
 subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info)
   use psb_base_mod
@@ -385,3 +603,52 @@ subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info)
   end select
   
 end subroutine psb_d_cmp_bwpf
+
+subroutine psb_ld_cmp_bwpf(mat,bwl,bwu,prf,info)
+  use psb_base_mod
+  use psb_renum_mod, psb_protect_name => psb_ld_cmp_bwpf
+  implicit none 
+  type(psb_ldspmat_type), intent(in) :: mat
+  integer(psb_lpk_), intent(out) :: bwl, bwu
+  integer(psb_lpk_), intent(out) :: prf
+  integer(psb_ipk_), intent(out) :: info
+  !
+  integer(psb_lpk_), allocatable :: irow(:), icol(:)
+  real(psb_dpk_), allocatable :: val(:)
+  integer(psb_lpk_) :: nz, i, j, lrbu, lrbl
+  
+  info = psb_success_
+  bwl = 0
+  bwu = 0
+  prf = 0
+  select type (aa=>mat%a)
+  class is (psb_ld_csr_sparse_mat)
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      do j = aa%irp(i), aa%irp(i+1) - 1
+        lrbl = max(lrbl,i-aa%ja(j))
+        lrbu = max(lrbu,aa%ja(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+    
+  class default
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      call aa%csget(i,i,nz,irow,icol,val,info)
+      if (info /= psb_success_) return
+      do j=1, nz
+        lrbl = max(lrbl,i-icol(j))
+        lrbu = max(lrbu,icol(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+  end select
+  
+end subroutine psb_ld_cmp_bwpf
diff --git a/util/psb_d_renum_mod.f90 b/util/psb_d_renum_mod.f90
new file mode 100644
index 00000000..8e4b4e03
--- /dev/null
+++ b/util/psb_d_renum_mod.f90
@@ -0,0 +1,69 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+module psb_d_renum_mod
+  use psb_base_mod
+  
+  interface psb_mat_renum
+    subroutine psb_d_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_dspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_dspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_d_mat_renum
+    subroutine psb_ld_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_lpk_, psb_ldspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_ldspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_ld_mat_renum
+  end interface psb_mat_renum
+  
+  interface psb_cmp_bwpf
+    subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_dspmat_type
+      type(psb_dspmat_type), intent(in) :: mat
+      integer(psb_ipk_), intent(out) :: bwl, bwu
+      integer(psb_ipk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_d_cmp_bwpf
+    subroutine psb_ld_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_lpk_, psb_ldspmat_type
+      type(psb_ldspmat_type), intent(in) :: mat
+      integer(psb_lpk_), intent(out) :: bwl, bwu
+      integer(psb_lpk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_ld_cmp_bwpf
+  end interface psb_cmp_bwpf
+
+end module psb_d_renum_mod
diff --git a/util/psb_gps_mod.f90 b/util/psb_gps_mod.f90
index fc701ffc..7c42d990 100644
--- a/util/psb_gps_mod.f90
+++ b/util/psb_gps_mod.f90
@@ -774,4 +774,744 @@ CONTAINS
     RETURN
   END SUBROUTINE REALLOC
   !
-END MODULE psb_gps_mod
+end module psb_gps_mod
+
+module psb_lgps_mod
+  !
+  use psb_base_mod, only : psb_lpk_
+  public psb_lgps_reduce
+  !
+  ! COMMON /GRA/ N, IDPTH, IDEG
+  !
+  private
+  ! common /CC/ XCC,SIZEG,STPT
+  integer(psb_lpk_), save            :: xcc,n,idpth,ideg
+  integer(psb_lpk_),allocatable,SAVE :: SIZEG(:),STPT(:)
+  !
+  ! COMMON /LVLW/ NHIGH,NLOW,NACUM
+  integer(psb_lpk_),allocatable,target,save :: NHIGH(:),NLOW(:),NACUM(:),AUX(:)
+  integer(psb_lpk_),PARAMETER :: INIT=500
+  !
+CONTAINS
+  !
+!!$  SUBROUTINE psb_gps_reduce(NDSTK, NR, IOLD, RENUM, NDEG, LVL, LVLS1, LVLS2,&
+!!$       & CCSTOR, IBW2, IPF2,NE,IDPTHE,IDEGE)
+  SUBROUTINE psb_lgps_reduce(NDSTK, NR, IDEGE, IOLD, RENUM, NDEG,ibw2,ipf2,IDPTHE)
+    !  SUBROUTINE REDUCE DETERMINES A ROW AND COLUMN PERMUTATION WHICH,
+    !  WHEN APPLIED TO A GIVEN SPARSE MATRIX, PRODUCES A PERMUTED
+    !  MATRIX WITH A SMALLER BANDWIDTH AND PROFILE.
+    !  THE INPUT ARRAY IS A CONNECTION TABLE WHICH REPRESENTS THE
+    !  INDICES OF THE NONZERO ELEMENTS OF THE MATRIX, A.  THE ALGO-
+    !  RITHM IS DESCRIBED IN TERMS OF THE ADJACENCY GRAPH WHICH
+    !  HAS THE CHARACTERISTIC THAT THERE IS AN EDGE (CONNECTION)
+    !  BETWEEN NODES I AND J IF A(I,J) /= 0 AND I /= J.
+    !  DIMENSIONING INFORMATION--THE FOLLOWING INTEGER ARRAYS MUST BE
+    !  DIMENSIONED IN THE CALLING ROUTINE.
+    !    NDSTK(NR,D1)        D1 IS >= MAXIMUM DEGREE OF ALL NODES.
+    !    IOLD(D2)            D2 AND NR ARE >= THE TOTAL NUMBER OF
+    !    RENUM(D2+1)         NODES IN THE GRAPH.
+    !    NDEG(D2)            STORAGE REQUIREMENTS CAN BE SIGNIFICANTLY
+    !    LVL(D2)             DECREASED FOR IBM 360 AND 370 COMPUTERS
+    !    LVLS1(D2)           BY REPLACING INTEGER NDSTK BY
+    !    LVLS2(D2)           INTEGER*2 NDSTK IN SUBROUTINES REDUCE,
+    !    CCSTOR(D2)          DGREE, FNDIAM, TREE AND NUMBER.
+    !  COMMON INFORMATION--THE FOLLOWING COMMON BLOCK MUST BE IN THE
+    !  CALLING ROUTINE.
+    !    COMMON/GRA/N,IDPTH,IDEG
+    !  EXPLANATION OF INPUT VARIABLES--
+    !    NDSTK-     CONNECTION TABLE REPRESENTING GRAPH.
+    !               NDSTK(I,J)=NODE NUMBER OF JTH CONNECTION TO NODE
+    !               NUMBER I.  A CONNECTION OF A NODE TO ITSELF IS NOT
+    !               LISTED.  EXTRA POSITIONS MUST HAVE ZERO FILL.
+    !    NR-        ROW DIMENSION ASSIGNED NDSTK IN CALLING PROGRAM.
+    !    IOLD(I)-   NUMBERING OF ITH NODE UPON INPUT.
+    !               IF NO NUMBERING EXISTS THEN IOLD(I)=I.
+    !    N-         NUMBER OF NODES IN GRAPH (EQUAL TO ORDER OF MATRIX).
+    !    IDEG-      MAXIMUM DEGREE OF ANY NODE IN THE GRAPH.
+    !  EXPLANATION OF OUTPUT VARIABLES--
+    !    RENUM(I)-  THE NEW NUMBER FOR THE ITH NODE.
+    !    NDEG(I)-   THE DEGREE OF THE ITH NODE.
+    !    IBW2-      THE BANDWIDTH AFTER RENUMBERING.
+    !    IPF2-      THE PROFILE AFTER RENUMBERING.
+    !    IDPTH-     NUMBER OF LEVELS IN REDUCE LEVEL STRUCTURE.
+    !  THE FOLLOWING ONLY HAVE MEANING IF THE GRAPH WAS CONNECTED--
+    !    LVL(I)-    INDEX INTO LVLS1 TO THE FIRST NODE IN LEVEL I.
+    !               LVL(I+1)-LVL(I)= NUMBER OF NODES IN ITH LEVEL
+    !    LVLS1-     NODE NUMBERS LISTED BY LEVEL.
+    !    LVLS2(I)-  THE LEVEL ASSIGNED TO NODE I BY REDUCE.
+    !  WORKING STORAGE VARIABLE--
+    !    CCSTOR
+    !  LOCAL STORAGE--
+    !    COMMON/CC/-SUBROUTINES REDUCE, SORT2 AND PIKLVL ASSUME THAT
+    !               THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS.
+    !               SUBROUTINE FNDIAM ASSUMES THAT THERE ARE AT MOST
+    !               100 NODES IN THE LAST LEVEL.
+    !    COMMON/LVLW/-SUBROUTINES SETUP AND PIKLVL ASSUME THAT THERE
+    !               ARE AT MOST 100 LEVELS.
+    ! USE INTEGER*2 NDSTK  WITH AN IBM 360 OR 370.
+    use psb_base_mod
+    implicit none 
+    INTEGER(psb_lpk_) :: NR, IDEGE, IBW2, IPF2, IDPTHE
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS.
+    ! COMMON /CC/ XCC, SIZEG(50), STPT(50)
+    ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100)
+    integer(psb_lpk_) :: stnode, rvnode, stnum, sbnum
+    integer(psb_lpk_) :: ndstk(nr,idege), iold(nr), renum(nr+1), ndeg(nr) 
+    integer(psb_lpk_) :: lvl(nr), lvls1(nr), lvls2(nr), ccstor(nr)    
+    integer(psb_lpk_) :: nflg, info, i, ibw1, ipf1, idflt, isdir, lroot, lowdg
+    integer(psb_lpk_) :: lvlbot, lvln, lvlwth, maxlw, num
+    n     = nr
+    ideg  = idege
+    idpth = 0
+
+    ALLOCATE(SIZEG(NR),STPT(NR), STAT=INFO)
+    IF(INFO /= psb_success_) THEN
+      write(psb_out_unit,*) 'ERROR! MEMORY ALLOCATION # 1 FAILED IN GPS'
+      STOP
+    END IF
+    !
+    ALLOCATE(NHIGH(INIT), NLOW(INIT), NACUM(INIT), AUX(INIT), STAT=INFO)
+    IF(INFO /= psb_success_) THEN
+      write(psb_out_unit,*) 'ERROR! MEMORY ALLOCATION # 2 FAILED IN GPS'
+      STOP
+    END IF
+    !
+    IBW2 = 0
+    IPF2 = 0
+    ! SET RENUM(I)=0 FOR ALL I TO INDICATE NODE I IS UNNUMBERED
+    DO  I=1,N
+      RENUM(I) = 0
+    END DO
+    ! COMPUTE DEGREE OF EACH NODE AND ORIGINAL BANDWIDTH AND PROFILE
+    CALL DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1)
+    ! SBNUM= LOW END OF AVAILABLE NUMBERS FOR RENUMBERING
+    ! STNUM= HIGH END OF AVAILABLE NUMBERS FOR RENUMBERING
+    SBNUM = 1
+    STNUM = N
+    ! NUMBER THE NODES OF DEGREE ZERO
+    DO I=1,N
+      IF (NDEG(I) > 0) CYCLE
+      RENUM(I) = STNUM
+      STNUM = STNUM - 1
+    END DO
+    ! FIND AN UNNUMBERED NODE OF MIN DEGREE TO START ON
+    do 
+      LOWDG = IDEG + 1
+      NFLG = 1
+      ISDIR = 1
+      DO  I=1,N
+        IF (NDEG(I) >= LOWDG) CYCLE
+        IF (RENUM(I) > 0) CYCLE
+        LOWDG = NDEG(I)
+        STNODE = I
+      END DO
+      ! FIND PSEUDO-DIAMETER AND ASSOCIATED LEVEL STRUCTURES.
+      ! STNODE AND RVNODE ARE THE ENDS OF THE DIAM AND LVLS1 AND LVLS2
+      ! ARE THE RESPECTIVE LEVEL STRUCTURES.
+      CALL FNDIAM(STNODE, RVNODE, NDSTK, NR, NDEG, LVL, LVLS1,LVLS2, CCSTOR, IDFLT)
+      IF (.not.(ndeg(stnode) <= ndeg(rvnode))) then 
+        ! NFLG INDICATES THE END TO BEGIN NUMBERING ON
+        NFLG = -1
+        STNODE = RVNODE
+      endif
+      CALL SETUP(LVL, LVLS1, LVLS2)
+      ! FIND ALL THE CONNECTED COMPONENTS  (XCC COUNTS THEM)
+      XCC = 0
+      LROOT = 1
+      LVLN = 1
+      DO  I=1,N
+        IF (LVL(I) /= 0) CYCLE
+        XCC = XCC + 1
+        STPT(XCC) = LROOT
+        CALL TREE(I, NDSTK, NR, LVL, CCSTOR, NDEG, LVLWTH, LVLBOT,LVLN, MAXLW, N)
+        SIZEG(XCC) = LVLBOT + LVLWTH - LROOT
+        LROOT = LVLBOT + LVLWTH
+        LVLN = LROOT
+      END DO
+      if (sort2() /= 0) then 
+        CALL PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR)
+      endif
+      ! ON RETURN FROM PIKLVL, ISDIR INDICATES THE DIRECTION THE LARGEST
+      ! COMPONENT FELL.  ISDIR IS MODIFIED NOW TO INDICATE THE NUMBERING
+      ! DIRECTION.  NUM IS SET TO THE PROPER VALUE FOR THIS DIRECTION.
+      ISDIR = ISDIR*NFLG
+      NUM = SBNUM
+      IF (ISDIR < 0) NUM = STNUM
+      CALL NUMBER(STNODE, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLS1,LVL,&
+           & NR, NFLG, IBW2, IPF2, CCSTOR, ISDIR)
+      ! UPDATE STNUM OR SBNUM AFTER NUMBERING
+      IF (ISDIR < 0) STNUM = NUM
+      IF (ISDIR > 0) SBNUM = NUM
+      IF (.not.(sbnum <= stnum)) exit
+    end do
+    IF (IBW2 > IBW1) then 
+      ! IF ORIGINAL NUMBERING IS BETTER THAN NEW ONE, SET UP TO RETURN IT
+      DO I=1,N
+        RENUM(I) = IOLD(I)
+      END DO
+      IBW2 = IBW1
+      IPF2 = IPF1
+      !
+    endif
+    DEALLOCATE(SIZEG,STPT,NHIGH,NLOW,AUX,NACUM)      
+    idpthe = idpth
+    RETURN
+  end subroutine psb_lgps_reduce
+  !
+  SUBROUTINE DGREE(NDSTK, NR, NDEG, IOLD, IBW1, IPF1)
+    !  DGREE COMPUTES THE DEGREE OF EACH NODE IN NDSTK AND STORES
+    !  IT IN THE ARRAY NDEG.  THE BANDWIDTH AND PROFILE FOR THE ORIGINAL
+    !  OR INPUT RENUMBERING OF THE GRAPH IS COMPUTED ALSO.
+    ! USE INTEGER*2 NDSTK  WITH AN IBM 360 OR 370.
+    implicit none 
+    INTEGER(psb_lpk_) :: NR, IBW1, IPF1, NDSTK(NR,IDEG), NDEG(N), IOLD(N)
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    integer(psb_lpk_) :: i, itst, j, idif, irw 
+
+    IBW1 = 0
+    IPF1 = 0
+    DO  I=1,N
+      NDEG(I) = 0
+      IRW = 0
+      DO J=1,IDEG
+        ITST = NDSTK(I,J)
+        IF(ITST <= 0) EXIT
+        NDEG(I) = NDEG(I) + 1
+        IDIF = IOLD(I) - IOLD(ITST)
+        IF (IRW < IDIF) IRW = IDIF
+      END DO
+      IPF1 = IPF1 + IRW
+      IF (IRW > IBW1) IBW1 = IRW
+    END DO
+    RETURN
+  END SUBROUTINE DGREE
+  !
+  SUBROUTINE FNDIAM(SND1, SND2, NDSTK, NR, NDEG, LVL, LVLS1,LVLS2, IWK, IDFLT)
+    !  FNDIAM IS THE CONTROL PROCEDURE FOR FINDING THE PSEUDO-DIAMETER OF
+    !  NDSTK AS WELL AS THE LEVEL STRUCTURE FROM EACH END
+    !  SND1-        ON INPUT THIS IS THE NODE NUMBER OF THE FIRST
+    !               ATTEMPT AT FINDING A DIAMETER.  ON OUTPUT IT
+    !               CONTAINS THE ACTUAL NUMBER USED.
+    !  SND2-        ON OUTPUT CONTAINS OTHER END OF DIAMETER
+    !  LVLS1-       ARRAY CONTAINING LEVEL STRUCTURE WITH SND1 AS ROOT
+    !  LVLS2-       ARRAY CONTAINING LEVEL STRUCTURE WITH SND2 AS ROOT
+    !  IDFLT-       FLAG USED IN PICKING FINAL LEVEL STRUCTURE, SET
+    !               =1 IF WIDTH OF LVLS1 <= WIDTH OF LVLS2, OTHERWISE =2
+    !  LVL,IWK-     WORKING STORAGE
+    ! USE INTEGER*2 NDSTK  WITH AN IBM 360 OR 370.
+    implicit none 
+    INTEGER(psb_lpk_) :: FLAG, SND, SND1, SND2, NR, idflt
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    ! IT IS ASSUMED THAT THE LAST LEVEL HAS AT MOST 100 NODES.
+    ! COMMON /CC/ NDLST(100)
+    integer(psb_lpk_),POINTER :: NDLST(:)
+    integer(psb_lpk_) :: NDSTK(NR,IDEG), NDEG(1), LVL(N), LVLS1(N), LVLS2(N),IWK(N)
+    integer(psb_lpk_) :: i, j, mtw2, ndxn, ndxl, inow, lvlbot,lvln,lvlwth
+    integer(psb_lpk_) :: k,mtw1, maxlw
+    !
+    NDLST => AUX
+    !
+    FLAG = 0
+    MTW2 = N
+    SND = SND1
+    ! ZERO LVL TO INDICATE ALL NODES ARE AVAILABLE TO TREE
+10  DO 20 I=1,N
+       LVL(I) = 0
+20  END DO
+    LVLN = 1
+    ! DROP A TREE FROM SND
+    CALL TREE(SND, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, LVLBOT,LVLN, MAXLW, MTW2)
+    IF (FLAG >= 1) GO TO 50
+    FLAG = 1
+30  IDPTH = LVLN - 1
+    MTW1 = MAXLW
+    ! COPY LEVEL STRUCTURE INTO LVLS1
+    DO 40 I=1,N
+       LVLS1(I) = LVL(I)
+40  END DO
+    NDXN = 1
+    NDXL = 0
+    MTW2 = N
+    ! SORT LAST LEVEL BY DEGREE  AND STORE IN NDLST
+    CALL SORTDG(NDLST, IWK(LVLBOT), NDXL, LVLWTH, NDEG)
+    SND = NDLST(1)
+    GO TO 10
+50  IF (IDPTH >= LVLN-1) GO TO 60
+    ! START AGAIN WITH NEW STARTING NODE
+    SND1 = SND
+    GO TO 30
+60  IF (MAXLW >= MTW2) GO TO 80
+    MTW2 = MAXLW
+    SND2 = SND
+    ! STORE NARROWEST REVERSE LEVEL STRUCTURE IN LVLS2
+    DO 70 I=1,N
+       LVLS2(I) = LVL(I)
+70  END DO
+80  IF (NDXN == NDXL) GO TO 90
+    ! TRY NEXT NODE IN NDLST
+    NDXN = NDXN + 1
+    SND = NDLST(NDXN)
+    GO TO 10
+90  IDFLT = 1
+    IF (MTW2 <= MTW1) IDFLT = 2
+    NULLIFY(NDLST)
+    RETURN
+  END SUBROUTINE FNDIAM
+  !
+  SUBROUTINE TREE(IROOT, NDSTK, NR, LVL, IWK, NDEG, LVLWTH, LVLBOT, LVLN, MAXLW, IBORT)
+    !  TREE DROPS A TREE IN NDSTK FROM IROOT
+    !  LVL-         ARRAY INDICATING AVAILABLE NODES IN NDSTK WITH ZERO
+    !               ENTRIES. TREE ENTERS LEVEL NUMBERS ASSIGNED
+    !               DURING EXECUTION OF THIS PROCEDURE
+    !  IWK-         ON OUTPUT CONTAINS NODE NUMBERS USED IN TREE
+    !               ARRANGED BY LEVELS (IWK(LVLN) CONTAINS IROOT
+    !               AND IWK(LVLBOT+LVLWTH-1) CONTAINS LAST NODE ENTERED)
+    !  LVLWTH-      ON OUTPUT CONTAINS WIDTH OF LAST LEVEL
+    !  LVLBOT-      ON OUTPUT CONTAINS INDEX INTO IWK OF FIRST
+    !               NODE IN LAST LEVEL
+    !  MAXLW-       ON OUTPUT CONTAINS THE MAXIMUM LEVEL WIDTH
+    !  LVLN-        ON INPUT THE FIRST AVAILABLE LOCATION IN IWK
+    !               USUALLY ONE BUT IF IWK IS USED TO STORE PREVIOUS
+    !               CONNECTED COMPONENTS, LVLN IS NEXT AVAILABLE LOCATION.
+    !               ON OUTPUT THE TOTAL NUMBER OF LEVELS + 1
+    !  IBORT-       INPUT PARAM WHICH TRIGGERS EARLY RETURN IF
+    !               MAXLW BECOMES >= IBORT
+    ! USE INTEGER*2 NDSTK  WITH AN IBM 360 OR 370.
+    implicit none 
+    integer(psb_lpk_) :: IROOT, NR, NDSTK(NR,*), LVL(*), IWK(*), NDEG(*)
+    integer(psb_lpk_) :: LVLWTH, LVLBOT, LVLN, MAXLW, IBORT
+    integer(psb_lpk_) :: itest, iwknow, itop, lvltop,j , inow, ndrow
+    MAXLW = 0
+    ITOP = LVLN
+    INOW = LVLN
+    LVLBOT = LVLN
+    LVLTOP = LVLN + 1
+    LVLN = 1
+    LVL(IROOT) = 1
+    IWK(ITOP) = IROOT
+10  LVLN = LVLN + 1
+20  IWKNOW = IWK(INOW)
+    NDROW = NDEG(IWKNOW)
+    DO 30 J=1,NDROW
+       ITEST = NDSTK(IWKNOW,J)
+       IF (LVL(ITEST) /= 0) CYCLE
+       LVL(ITEST) = LVLN
+       ITOP = ITOP + 1
+       IWK(ITOP) = ITEST
+30  END DO
+    INOW = INOW + 1
+    IF (INOW < LVLTOP) GO TO 20
+    LVLWTH = LVLTOP - LVLBOT
+    IF (MAXLW < LVLWTH) MAXLW = LVLWTH
+    IF (MAXLW >= IBORT) RETURN
+    IF (ITOP < LVLTOP) RETURN
+    LVLBOT = INOW
+    LVLTOP = ITOP + 1
+    GO TO 10
+  END SUBROUTINE TREE
+  !
+  SUBROUTINE SORTDG(STK1, STK2, X1, X2, NDEG)
+    ! SORTDG SORTS STK2 BY DEGREE OF THE NODE AND ADDS IT TO THE END
+    ! OF STK1 IN ORDER OF LOWEST TO HIGHEST DEGREE.  X1 AND X2 ARE THE
+    ! NUMBER OF NODES IN STK1 AND STK2 RESPECTIVELY.
+    implicit none 
+    INTEGER(psb_lpk_) :: X1, X2, STK1, STK2, TEMP,NDEG
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    DIMENSION NDEG(N), STK1(X1+X2), STK2(X2)
+    integer(psb_lpk_) :: ind,itest,i,j,istk2,jstk2
+    IND = X2
+10  ITEST = 0
+    IND = IND - 1
+    IF (IND < 1) GO TO 30
+    DO 20 I=1,IND
+       J = I + 1
+       ISTK2 = STK2(I)
+       JSTK2 = STK2(J)
+       IF (NDEG(ISTK2) <= NDEG(JSTK2)) CYCLE
+       ITEST = 1
+       TEMP = STK2(I)
+       STK2(I) = STK2(J)
+       STK2(J) = TEMP
+20  END DO
+    IF (ITEST == 1) GO TO 10
+30  DO 40 I=1,X2
+       X1 = X1 + 1
+       STK1(X1) = STK2(I)
+40  END DO
+    RETURN
+  END SUBROUTINE SORTDG
+  !
+  SUBROUTINE SETUP(LVL, LVLS1, LVLS2)
+    ! SETUP COMPUTES THE REVERSE LEVELING INFO FROM LVLS2 AND STORES
+    ! IT INTO LVLS2.  NACUM(I) IS INITIALIZED TO NODES/ITH LEVEL FOR NODES
+    ! ON THE PSEUDO-DIAMETER OF THE GRAPH.  LVL IS INITIALIZED TO NON-
+    ! ZERO FOR NODES ON THE PSEUDO-DIAM AND NODES IN A DIFFERENT
+    ! COMPONENT OF THE GRAPH.
+    !      COMMON /GRA/ N, IDPTH, IDEG
+    ! IT IS ASSUMED THAT THERE ARE AT MOST 100 LEVELS.
+    ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100)
+    use psb_base_mod
+    implicit none 
+    integer(psb_lpk_) :: LVL(N), LVLS1(N), LVLS2(N)
+    integer(psb_lpk_) :: SZ,i,itemp
+    !-----------------------------------------------------
+    SZ=SIZE(NACUM)
+    IF(SZ < IDPTH) THEN
+       write(psb_out_unit,*) 'GPS_SETUP: on fly reallocation of NACUM'
+       CALL REALLOC(NACUM,SZ,IDPTH) 
+    END IF
+    !-----------------------------------------------------
+    DO 10 I=1,IDPTH
+       NACUM(I) = 0
+10  END DO
+    DO 30 I=1,N
+       LVL(I) = 1
+       LVLS2(I) = IDPTH + 1 - LVLS2(I)
+       ITEMP = LVLS2(I)
+       IF (ITEMP > IDPTH) CYCLE
+       IF (ITEMP /= LVLS1(I)) GO TO 20
+       NACUM(ITEMP) = NACUM(ITEMP) + 1
+       CYCLE
+20     LVL(I) = 0
+30  END DO
+    RETURN
+  END SUBROUTINE SETUP
+  !
+  FUNCTION SORT2() result(val)
+    implicit none 
+    INTEGER(psb_lpk_) ::  val
+    ! SORT2 SORTS SIZEG AND STPT INTO DESCENDING ORDER ACCORDING TO
+    ! VALUES OF SIZEG. XCC=NUMBER OF ENTRIES IN EACH ARRAY
+    INTEGER(psb_lpk_) :: TEMP,ind,itest,i,j
+    ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 CONNECTED COMPONENTS.
+    !COMMON /CC/ XCC, SIZEG(50), STPT(50)
+
+    VAL = 0
+    IF (XCC == 0) RETURN
+    VAL = 1
+    IND = XCC
+10  ITEST = 0
+    IND = IND - 1
+    IF (IND < 1) RETURN
+    DO 20 I=1,IND
+       J = I + 1
+       IF (SIZEG(I) >= SIZEG(J)) CYCLE
+       ITEST = 1
+       TEMP = SIZEG(I)
+       SIZEG(I) = SIZEG(J)
+       SIZEG(J) = TEMP
+       TEMP = STPT(I)
+       STPT(I) = STPT(J)
+       STPT(J) = TEMP
+20  END DO
+    IF (ITEST == 1) GO TO 10
+    RETURN
+  END FUNCTION SORT2
+  !
+  SUBROUTINE PIKLVL(LVLS1, LVLS2, CCSTOR, IDFLT, ISDIR)
+    use psb_base_mod
+    implicit none 
+    ! PIKLVL CHOOSES THE LEVEL STRUCTURE  USED IN NUMBERING GRAPH
+    ! LVLS1-       ON INPUT CONTAINS FORWARD LEVELING INFO
+    ! LVLS2-       ON INPUT CONTAINS REVERSE LEVELING INFO
+    !              ON OUTPUT THE FINAL LEVEL STRUCTURE CHOSEN
+    ! CCSTOR-      ON INPUT CONTAINS CONNECTED COMPONENT INFO
+    ! IDFLT-       ON INPUT =1 IF WDTH LVLS1 <= WDTH LVLS2, =2 OTHERWISE
+    ! NHIGH        KEEPS TRACK OF LEVEL WIDTHS FOR HIGH NUMBERING
+    ! NLOW-        KEEPS TRACK OF LEVEL WIDTHS FOR LOW NUMBERING
+    ! NACUM-       KEEPS TRACK OF LEVEL WIDTHS FOR CHOSEN LEVEL STRUCTURE
+    ! XCC-         NUMBER OF CONNECTED COMPONENTS
+    ! SIZEG(I)-    SIZE OF ITH CONNECTED COMPONENT
+    ! STPT(I)-     INDEX INTO CCSTORE OF 1ST NODE IN ITH CON COMPT
+    ! ISDIR-       FLAG WHICH INDICATES WHICH WAY THE LARGEST CONNECTED
+    !              COMPONENT FELL.  =+1 IF LOW AND -1 IF HIGH
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    ! IT IS ASSUMED THAT THE GRAPH HAS AT MOST 50 COMPONENTS AND
+    ! THAT THERE ARE AT MOST 100 LEVELS.
+    ! COMMON /LVLW/ NHIGH(100), NLOW(100), NACUM(100)
+    ! COMMON /CC/ XCC, SIZEG(50), STPT(50)
+    INTEGER(psb_lpk_) :: LVLS1(N), LVLS2(N), CCSTOR(N)
+    integer(psb_lpk_) :: SZ, ENDC,i,j,max1,max2,inode
+    integer(psb_lpk_) :: lvlnh, it, k, lvlnl,idflt,isdir
+    ! FOR EACH CONNECTED COMPONENT DO
+    DO 80 I=1,XCC
+       J = STPT(I)
+       ENDC= SIZEG(I) + J - 1
+       ! SET NHIGH AND NLOW EQUAL TO NACUM
+       !-----------------------------------------------------
+       SZ=SIZE(NHIGH)
+       IF(SZ < IDPTH) THEN
+          write(psb_out_unit,*) 'GPS_PIKLVL: on fly reallocation of NHIGH'
+          CALL REALLOC(NHIGH,SZ,IDPTH) 
+       END IF
+       SZ=SIZE(NLOW)
+       IF(SZ < IDPTH) THEN
+          write(psb_out_unit,*) 'GPS_PIKLVL: on fly reallocation of NLOW'
+          CALL REALLOC(NLOW,SZ,IDPTH) 
+       END IF
+       !-----------------------------------------------------
+       DO 10 K=1,IDPTH
+          NHIGH(K) = NACUM(K)
+          NLOW(K) = NACUM(K)
+10     END DO
+       ! UPDATE NHIGH AND NLOW FOR EACH NODE IN CONNECTED COMPONENT
+       DO 20 K=J,ENDC
+          INODE = CCSTOR(K)
+          LVLNH = LVLS1(INODE)
+          NHIGH(LVLNH) = NHIGH(LVLNH) + 1
+          LVLNL = LVLS2(INODE)
+          NLOW(LVLNL) = NLOW(LVLNL) + 1
+20     END DO
+       MAX1 = 0
+       MAX2 = 0
+       ! SET MAX1=LARGEST NEW NUMBER IN NHIGH
+       ! SET MAX2=LARGEST NEW NUMBER IN NLOW
+       DO 30 K=1,IDPTH
+          IF (2*NACUM(K) == NLOW(K)+NHIGH(K)) CYCLE
+          IF (NHIGH(K) > MAX1) MAX1 = NHIGH(K)
+          IF (NLOW(K) > MAX2) MAX2 = NLOW(K)
+30     END DO
+       ! SET IT= NUMBER OF LEVEL STRUCTURE TO BE USED
+       IT = 1
+       IF (MAX1 > MAX2) IT = 2
+       IF (MAX1 == MAX2) IT = IDFLT
+       IF (IT == 2) GO TO 60
+       IF (I == 1) ISDIR = -1
+       ! COPY LVLS1 INTO LVLS2 FOR EACH NODE IN CONNECTED COMPONENT
+       DO 40 K=J,ENDC
+          INODE = CCSTOR(K)
+          LVLS2(INODE) = LVLS1(INODE)
+40     END DO
+       ! UPDATE NACUM TO BE THE SAME AS NHIGH
+       DO 50 K=1,IDPTH
+          NACUM(K) = NHIGH(K)
+50     END DO
+       CYCLE
+       ! UPDATE NACUM TO BE THE SAME AS NLOW
+60     DO 70 K=1,IDPTH
+          NACUM(K) = NLOW(K)
+70     END DO
+80  END DO
+    RETURN
+  END SUBROUTINE PIKLVL
+  !
+  SUBROUTINE NUMBER(SND, NUM, NDSTK, LVLS2, NDEG, RENUM, LVLST,LSTPT,&
+       & NR, NFLG, IBW2, IPF2, IPFA, ISDIR)
+    use psb_base_mod
+    implicit none 
+    !  NUMBER PRODUCES THE NUMBERING OF THE GRAPH FOR MIN BANDWIDTH
+    !  SND-         ON INPUT THE NODE TO BEGIN NUMBERING ON
+    !  NUM-         ON INPUT AND OUTPUT, THE NEXT AVAILABLE NUMBER
+    !  LVLS2-       THE LEVEL STRUCTURE TO BE USED IN NUMBERING
+    !  RENUM-       THE ARRAY USED TO STORE THE NEW NUMBERING
+    !  LVLST-       ON OUTPUT CONTAINS LEVEL STRUCTURE
+    !  LSTPT(I)-    ON OUTPUT, INDEX INTO LVLST TO FIRST NODE IN ITH LVL
+    !               LSTPT(I+1) - LSTPT(I) = NUMBER OF NODES IN ITH LVL
+    !  NFLG-        =+1 IF SND IS FORWARD END OF PSEUDO-DIAM
+    !               =-1 IF SND IS REVERSE END OF PSEUDO-DIAM
+    !  IBW2-        BANDWIDTH OF NEW NUMBERING COMPUTED BY NUMBER
+    !  IPF2-        PROFILE OF NEW NUMBERING COMPUTED BY NUMBER
+    !  IPFA-        WORKING STORAGE USED TO COMPUTE PROFILE AND BANDWIDTH
+    !  ISDIR-       INDICATES STEP DIRECTION USED IN NUMBERING(+1 OR -1)
+    ! USE INTEGER*2 NDSTK  WITH AN IBM 360 OR 370.
+    INTEGER(psb_lpk_) :: SND, NUM, XA, XB, XC, XD, CX, ENDC, TEST, NR, ISDIR
+    ! COMMON /GRA/ N, IDPTH, IDEG
+    ! THE STORAGE IN COMMON BLOCKS CC AND LVLW IS NOW FREE AND CAN
+    ! BE USED FOR STACKS.
+    !COMMON /LVLW/ STKA(100), STKB(100), STKC(100)
+    !COMMON /CC/ STKD(100)
+    INTEGER(psb_lpk_) :: IPFA(N), NDSTK(NR,IDEG), LVLS2(N),&
+         & NDEG(N), RENUM(N+1), LVLST(N),LSTPT(N),ipf2,ibw2,nflg, nbw
+    integer(psb_lpk_),POINTER :: STKA(:),STKB(:),STKC(:),STKD(:)
+    integer(psb_lpk_) :: SZ1,SZ2,i,j,nstpt,lvln, lst, lnd, inx, max, ipro,&
+         & lvlnl, k, it
+    !
+    STKA => NHIGH
+    STKB => NLOW
+    STKC => NACUM
+    STKD => AUX
+    !
+    ! SET UP LVLST AND LSTPT FROM LVLS2
+    DO 10 I=1,N
+       IPFA(I) = 0
+10  END DO
+    NSTPT = 1
+    DO 30 I=1,IDPTH
+       LSTPT(I) = NSTPT
+       DO 20 J=1,N
+          IF (LVLS2(J) /= I) CYCLE
+          LVLST(NSTPT) = J
+          NSTPT = NSTPT + 1
+20     END DO
+30  END DO
+    LSTPT(IDPTH+1) = NSTPT
+    ! STKA, STKB, STKC AND STKD ARE STACKS WITH POINTERS
+    ! XA,XB,XC, AND XD.  CX IS A SPECIAL POINTER INTO STKC WHICH
+    ! INDICATES THE PARTICULAR NODE BEING PROCESSED.
+    ! LVLN KEEPS TRACK OF THE LEVEL WE ARE WORKING AT.
+    ! INITIALLY STKC CONTAINS ONLY THE INITIAL NODE, SND.
+    LVLN = 0
+    IF (NFLG < 0) LVLN = IDPTH + 1
+    XC = 1
+    STKC(XC) = SND
+40  CX = 1
+    XD = 0
+    LVLN = LVLN + NFLG
+    LST = LSTPT(LVLN)
+    LND = LSTPT(LVLN+1) - 1
+    ! BEGIN PROCESSING NODE STKC(CX)
+50  IPRO = STKC(CX)
+    RENUM(IPRO) = NUM
+    NUM = NUM + ISDIR
+    ENDC = NDEG(IPRO)
+    XA = 0
+    XB = 0
+    ! CHECK ALL ADJACENT NODES
+    DO 80 I=1,ENDC
+       TEST = NDSTK(IPRO,I)
+       INX = RENUM(TEST)
+       ! ONLY NODES NOT NUMBERED OR ALREADY ON A STACK ARE ADDED
+       IF (INX == 0) GO TO 60
+       IF (INX < 0) CYCLE
+       ! DO PRELIMINARY BANDWIDTH AND PROFILE CALCULATIONS
+       NBW = (RENUM(IPRO)-INX)*ISDIR
+       IF (ISDIR > 0) INX = RENUM(IPRO)
+       IF (IPFA(INX) < NBW) IPFA(INX) = NBW
+       CYCLE
+60     RENUM(TEST) = -1
+       ! PUT NODES ON SAME LEVEL ON STKA, ALL OTHERS ON STKB
+       IF (LVLS2(TEST) == LVLS2(IPRO)) GO TO 70
+       XB = XB + 1
+       STKB(XB) = TEST
+       CYCLE
+70     XA = XA + 1
+       STKA(XA) = TEST
+80  END DO
+    ! SORT STKA AND STKB INTO INCREASING DEGREE AND ADD STKA TO STKC
+    ! AND STKB TO STKD
+    IF (XA == 0) GO TO 100
+    IF (XA == 1) GO TO 90
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKC)
+    SZ2=XC+XA
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #1: on fly reallocation of STKC'
+       CALL REALLOC(NACUM,SZ1,SZ2)
+       STKC => NACUM
+    END IF
+    !-----------------------------------------------------------------
+    CALL SORTDG(STKC, STKA, XC, XA, NDEG)
+    GO TO 100
+90  XC = XC + 1
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKC)
+    SZ2=XC
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #2: on fly reallocation of STKC'
+       SZ2=SZ2+INIT
+       CALL REALLOC(NACUM,SZ1,SZ2)
+       STKC => NACUM
+    END IF
+    !-----------------------------------------------------------------
+    STKC(XC) = STKA(XA)
+100 IF (XB == 0) GO TO 120
+    IF (XB == 1) GO TO 110
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKD)
+    SZ2=XD+XB
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #3: on fly reallocation of STKD'
+       CALL REALLOC(AUX,SZ1,SZ2)
+       STKD => AUX
+    END IF
+    !-----------------------------------------------------------------
+    CALL SORTDG(STKD, STKB, XD, XB, NDEG)
+    GO TO 120
+110 XD = XD + 1
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKD)
+    SZ2=XD
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #4: on fly reallocation of STKD'
+       SZ2=SZ2+INIT
+       CALL REALLOC(AUX,SZ1,SZ2)
+       STKD => AUX
+    END IF
+    !-----------------------------------------------------------------
+    STKD(XD) = STKB(XB)
+    ! BE SURE TO PROCESS ALL NODES IN STKC
+120 CX = CX + 1
+    IF (XC >= CX) GO TO 50
+    ! WHEN STKC IS EXHAUSTED LOOK FOR MIN DEGREE NODE IN SAME LEVEL
+    ! WHICH HAS NOT BEEN PROCESSED
+    MAX = IDEG + 1
+    SND = N + 1
+    DO 130 I=LST,LND
+       TEST = LVLST(I)
+       IF (RENUM(TEST) /= 0) CYCLE
+       IF (NDEG(TEST) >= MAX) CYCLE
+       RENUM(SND) = 0
+       RENUM(TEST) = -1
+       MAX = NDEG(TEST)
+       SND = TEST
+130 END DO
+    IF (SND == N+1) GO TO 140
+    XC = XC + 1
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKC)
+    SZ2=XC
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #5: on fly reallocation of STKC'
+       SZ2=SZ2+INIT
+       CALL REALLOC(NACUM,SZ1,SZ2)
+       STKC => NACUM
+    END IF
+    !-----------------------------------------------------------------
+    STKC(XC) = SND
+    GO TO 50
+    ! IF STKD IS EMPTY WE ARE DONE, OTHERWISE COPY STKD ONTO STKC
+    ! AND BEGIN PROCESSING NEW STKC
+140 IF (XD == 0) GO TO 160
+    !-----------------------------------------------------------------
+    SZ1=SIZE(STKC)
+    SZ2=XD
+    IF(SZ1 < SZ2) THEN
+       write(psb_out_unit,*) 'GPS_NUMBER - Check #6: on fly reallocation of STKC'
+       SZ2=SZ2+INIT
+       CALL REALLOC(NACUM,SZ1,SZ2)
+       STKC => NACUM
+    END IF
+    !-----------------------------------------------------------------
+    DO 150 I=1,XD
+       STKC(I) = STKD(I)
+150 END DO
+    XC = XD
+    GO TO 40
+    ! DO FINAL BANDWIDTH AND PROFILE CALCULATIONS
+160 DO 170 I=1,N
+       IF (IPFA(I) > IBW2) IBW2 = IPFA(I)
+       IPF2 = IPF2 + IPFA(I)
+170 END DO
+    !
+    RETURN
+  END SUBROUTINE NUMBER
+  !
+  ! ---------------------------------------------------------
+  SUBROUTINE REALLOC(VET,SZ1,SZ2)
+    use psb_base_mod  
+    ! PERFORM ON FLY REALLOCATION OF POINTER VET INCREASING
+    ! ITS SIZE FROM SZ1 TO SZ2
+    IMPLICIT NONE
+    integer(psb_lpk_),allocatable  :: VET(:)
+    integer(psb_lpk_) :: SZ1,SZ2
+    integer(psb_ipk_) :: info 
+    call psb_realloc(sz2,vet,info)
+    IF(INFO /= psb_success_) THEN
+       write(psb_out_unit,*) 'Error! Memory allocation failure in REALLOC'
+       STOP
+    END IF
+    RETURN
+  END SUBROUTINE REALLOC
+  !
+end module psb_lgps_mod
diff --git a/util/psb_renum_mod.f90 b/util/psb_renum_mod.f90
index 82d51712..9a060cc6 100644
--- a/util/psb_renum_mod.f90
+++ b/util/psb_renum_mod.f90
@@ -32,101 +32,8 @@
 module psb_renum_mod
   use psb_base_mod
 
-  integer(psb_ipk_), parameter :: psb_mat_renum_identity_ = 0
-  integer(psb_ipk_), parameter :: psb_mat_renum_gps_ = 456
-  integer(psb_ipk_), parameter :: psb_mat_renum_amd_ = psb_mat_renum_gps_ + 1 
-  
-  
-  interface psb_mat_renum
-    subroutine psb_d_mat_renums(alg,mat,info,perm)
-      import :: psb_ipk_, psb_dspmat_type
-      character(len=*), intent(in) :: alg
-      type(psb_dspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_d_mat_renums
-    subroutine psb_d_mat_renum(alg,mat,info,perm)
-      import :: psb_ipk_, psb_dspmat_type
-      integer(psb_ipk_), intent(in) :: alg
-      type(psb_dspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_d_mat_renum
-    subroutine psb_s_mat_renums(alg,mat,info,perm)
-      import :: psb_ipk_, psb_sspmat_type
-      character(len=*), intent(in) :: alg
-      type(psb_sspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_s_mat_renums
-    subroutine psb_s_mat_renum(alg,mat,info,perm)
-      import :: psb_ipk_, psb_sspmat_type
-      integer(psb_ipk_), intent(in) :: alg
-      type(psb_sspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_s_mat_renum
-    subroutine psb_z_mat_renums(alg,mat,info,perm)
-      import :: psb_ipk_, psb_zspmat_type
-      character(len=*), intent(in) :: alg
-      type(psb_zspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_z_mat_renums
-    subroutine psb_z_mat_renum(alg,mat,info,perm)
-      import :: psb_ipk_, psb_zspmat_type
-      integer(psb_ipk_), intent(in) :: alg
-      type(psb_zspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_z_mat_renum
-    subroutine psb_c_mat_renums(alg,mat,info,perm)
-      import :: psb_ipk_, psb_cspmat_type
-      character(len=*), intent(in) :: alg
-      type(psb_cspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_c_mat_renums
-    subroutine psb_c_mat_renum(alg,mat,info,perm)
-      import :: psb_ipk_, psb_cspmat_type
-      integer(psb_ipk_), intent(in) :: alg
-      type(psb_cspmat_type), intent(inout) :: mat
-      integer(psb_ipk_), intent(out) :: info
-      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
-    end subroutine psb_c_mat_renum
-  end interface psb_mat_renum
-
-  
-  interface psb_cmp_bwpf
-    subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info)
-      import :: psb_ipk_, psb_sspmat_type
-      type(psb_sspmat_type), intent(in) :: mat
-      integer(psb_ipk_), intent(out) :: bwl, bwu
-      integer(psb_ipk_), intent(out) :: prf
-      integer(psb_ipk_), intent(out) :: info
-    end subroutine psb_s_cmp_bwpf
-    subroutine psb_d_cmp_bwpf(mat,bwl,bwu,prf,info)
-      import :: psb_ipk_, psb_dspmat_type
-      type(psb_dspmat_type), intent(in) :: mat
-      integer(psb_ipk_), intent(out) :: bwl, bwu
-      integer(psb_ipk_), intent(out) :: prf
-      integer(psb_ipk_), intent(out) :: info
-    end subroutine psb_d_cmp_bwpf
-    subroutine psb_c_cmp_bwpf(mat,bwl,bwu,prf,info)
-      import :: psb_ipk_, psb_cspmat_type
-      type(psb_cspmat_type), intent(in) :: mat
-      integer(psb_ipk_), intent(out) :: bwl, bwu
-      integer(psb_ipk_), intent(out) :: prf
-      integer(psb_ipk_), intent(out) :: info
-    end subroutine psb_c_cmp_bwpf
-    subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info)
-      import :: psb_ipk_, psb_zspmat_type
-      type(psb_zspmat_type), intent(in) :: mat
-      integer(psb_ipk_), intent(out) :: bwl, bwu
-      integer(psb_ipk_), intent(out) :: prf
-      integer(psb_ipk_), intent(out) :: info
-    end subroutine psb_z_cmp_bwpf
-  end interface psb_cmp_bwpf
-
-
+  use psb_s_renum_mod
+  use psb_c_renum_mod
+  use psb_d_renum_mod
+  use psb_z_renum_mod
 end module psb_renum_mod
diff --git a/util/psb_s_renum_impl.F90 b/util/psb_s_renum_impl.F90
index 008bbbb0..6bb37a3c 100644
--- a/util/psb_s_renum_impl.F90
+++ b/util/psb_s_renum_impl.F90
@@ -29,37 +29,61 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-subroutine psb_s_mat_renums(alg,mat,info,perm)
+subroutine psb_s_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_s_mat_renums
+  use psb_renum_mod, psb_protect_name => psb_s_mat_renum
   implicit none 
   character(len=*), intent(in) :: alg
   type(psb_sspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
   integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, ialg
+  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
+  nr = mat%get_nrows()
+  nc = mat%get_ncols()
+  if (nr /= nc) then 
+    info = psb_err_rectangular_mat_unsupported_
+    ierr(1) = nr; ierr(2) = nc;
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999
+  end if
+
   info = psb_success_
   select case (psb_toupper(alg))
   case ('GPS')
-    ialg = psb_mat_renum_gps_
-  case ('AMD')
-    ialg = psb_mat_renum_amd_
+
+    call psb_mat_renum_gps(mat,info,perm)
+
+  case('AMD') 
+
+    call psb_mat_renum_amd(mat,info,perm)
+
   case ('NONE', 'ID') 
-    ialg = psb_mat_renum_identity_
+    nr = mat%get_nrows()
+    allocate(perm(nr),stat=info) 
+    if (info == 0) then 
+      do i=1,nr
+        perm(i) = i
+      end do
+    else
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
   case default
     write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
-    ialg = -1
+    info = psb_err_input_value_invalid_i_
+    ierr(1) = 1; 
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999 
   end select
-
-  call psb_mat_renum(ialg,mat,info,perm)
-
+  
   if (info /= psb_success_) then 
     info = psb_err_from_subroutine_non_
     call psb_errpush(info,name)
@@ -72,26 +96,218 @@ subroutine psb_s_mat_renums(alg,mat,info,perm)
 9999 call psb_error_handler(err_act)
   return
 
-end subroutine psb_s_mat_renums
-  
-subroutine psb_s_mat_renum(alg,mat,info,perm)
+contains
+
+  subroutine psb_mat_renum_gps(a,info,operm)
+    use psb_base_mod
+    use psb_gps_mod
+    implicit none 
+    type(psb_sspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+    class(psb_s_base_sparse_mat), allocatable :: aa
+    type(psb_s_csr_sparse_mat)  :: acsr
+    type(psb_s_coo_sparse_mat)  :: acoo
+    
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+
+    info = psb_success_
+    name = 'mat_renum_gps'
+    call psb_erractionsave(err_act)
+
+    info = psb_success_
+
+    call a%mold(aa)
+    call a%mv_to(aa)
+    call aa%mv_to_fmt(acsr,info)
+    ! Insert call to gps_reduce
+    nr   = acsr%get_nrows()
+    ideg = 0
+    do i=1, nr
+      ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i))
+    end do
+    allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info, name) 
+      goto 9999
+    end if
+    do i=1, nr
+      iold(i) = i 
+      ndstk(i,:) = 0
+      k  = 0
+      do j=acsr%irp(i),acsr%irp(i+1)-1
+        k = k + 1
+        ndstk(i,k) = acsr%ja(j)
+      end do
+    end do
+    perm = 0
+
+    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+
+    if (.not.psb_isaperm(nr,perm)) then 
+      write(0,*) 'Something wrong: bad perm from gps_reduce'
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+    ! Move to coordinate to apply renumbering
+    call acsr%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa)
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+
+  end subroutine psb_mat_renum_gps
+
+
+  subroutine psb_mat_renum_amd(a,info,operm)
+#if defined(HAVE_AMD) 
+    use iso_c_binding
+#endif
+    use psb_base_mod    
+    implicit none 
+    type(psb_sspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+#if defined(HAVE_AMD) 
+    interface 
+      function psb_amd_order(n,ap,ai,p)&
+           & result(res) bind(c,name='psb_amd_order')
+        use iso_c_binding
+        integer(c_int) :: res
+        integer(c_int), value :: n
+        integer(c_int) :: ap(*), ai(*), p(*)
+      end function psb_amd_order
+    end interface
+#endif
+
+    type(psb_s_csc_sparse_mat)  :: acsc
+    class(psb_s_base_sparse_mat), allocatable :: aa
+    type(psb_s_coo_sparse_mat)  :: acoo
+
+    integer(psb_ipk_), allocatable :: perm(:) 
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+
+    info = psb_success_
+    name = 'mat_renum_amd'
+    call psb_erractionsave(err_act)
+
+#if defined(HAVE_AMD)  && defined(IPK4)
+
+    info = psb_success_
+    nr   = a%get_nrows()
+    nz   = a%get_nzeros()
+    allocate(perm(nr),stat=info)
+    if (info /= 0) then 
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+
+
+    allocate(aa, mold=a%a)
+    call a%mv_to(acsc)
+
+    acsc%ia(:)  = acsc%ia(:) - 1
+    acsc%icp(:) = acsc%icp(:) - 1
+
+    info = psb_amd_order(nr,acsc%icp,acsc%ia,perm)
+    if (info /= psb_success_) then 
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_amd_order') 
+      goto 9999
+    end if
+
+    perm(:)     = perm(:) + 1
+    acsc%ia(:)  = acsc%ia(:) + 1
+    acsc%icp(:) = acsc%icp(:) + 1
+
+    call acsc%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa,perm)
+
+#else 
+
+    info = psb_err_missing_aux_lib_
+    call psb_errpush(info, name) 
+    goto 9999
+#endif
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+  end subroutine psb_mat_renum_amd
+
+end subroutine psb_s_mat_renum
+
+subroutine psb_ls_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_s_mat_renum
+  use psb_renum_mod, psb_protect_name => psb_ls_mat_renum
   implicit none 
-  integer(psb_ipk_), intent(in) :: alg
-  type(psb_sspmat_type), intent(inout) :: mat
+  character(len=*), intent(in) :: alg
+  type(psb_lsspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
-  integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+  integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
+  integer(psb_lpk_) :: nr, nc, i
+  integer(psb_ipk_) :: err_act, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
-  info = psb_success_
-  
   nr = mat%get_nrows()
   nc = mat%get_ncols()
   if (nr /= nc) then 
@@ -101,16 +317,17 @@ subroutine psb_s_mat_renum(alg,mat,info,perm)
     goto 9999
   end if
 
-  select case (alg)
-  case(psb_mat_renum_gps_) 
+  info = psb_success_
+  select case (psb_toupper(alg))
+  case ('GPS')
 
-    call psb_mat_renum_gps(mat,info,perm)
+    call psb_lmat_renum_gps(mat,info,perm)
 
-  case(psb_mat_renum_amd_) 
+  case('AMD') 
 
-    call psb_mat_renum_amd(mat,info,perm)
+    call psb_lmat_renum_amd(mat,info,perm)
 
-  case(psb_mat_renum_identity_) 
+  case ('NONE', 'ID') 
     nr = mat%get_nrows()
     allocate(perm(nr),stat=info) 
     if (info == 0) then 
@@ -123,8 +340,9 @@ subroutine psb_s_mat_renum(alg,mat,info,perm)
       goto 9999
     endif
   case default
+    write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
     info = psb_err_input_value_invalid_i_
-    ierr(1) = 1; ierr(2) = alg;
+    ierr(1) = 1; 
     call psb_errpush(info,name,i_err=ierr)
     goto 9999 
   end select
@@ -143,26 +361,26 @@ subroutine psb_s_mat_renum(alg,mat,info,perm)
 
 contains
 
-  subroutine psb_mat_renum_gps(a,info,operm)
+  subroutine psb_lmat_renum_gps(a,info,operm)
     use psb_base_mod
-    use psb_gps_mod
+    use psb_lgps_mod
     implicit none 
-    type(psb_sspmat_type), intent(inout) :: a
+    type(psb_lsspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
-    class(psb_s_base_sparse_mat), allocatable :: aa
-    type(psb_s_csr_sparse_mat)  :: acsr
-    type(psb_s_coo_sparse_mat)  :: acoo
+    class(psb_ls_base_sparse_mat), allocatable :: aa
+    type(psb_ls_csr_sparse_mat)  :: acsr
+    type(psb_ls_coo_sparse_mat)  :: acoo
     
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+    integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
 
     info = psb_success_
-    name = 'mat_renum'
+    name = 'mat_renum_gps'
     call psb_erractionsave(err_act)
 
     info = psb_success_
@@ -193,7 +411,7 @@ contains
     end do
     perm = 0
 
-    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+    call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
 
     if (.not.psb_isaperm(nr,perm)) then 
       write(0,*) 'Something wrong: bad perm from gps_reduce'
@@ -229,18 +447,18 @@ contains
 9999 call psb_error_handler(err_act)
     return
 
-  end subroutine psb_mat_renum_gps
+  end subroutine psb_lmat_renum_gps
 
 
-  subroutine psb_mat_renum_amd(a,info,operm)
+  subroutine psb_lmat_renum_amd(a,info,operm)
 #if defined(HAVE_AMD) 
     use iso_c_binding
 #endif
     use psb_base_mod    
     implicit none 
-    type(psb_sspmat_type), intent(inout) :: a
+    type(psb_lsspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
 #if defined(HAVE_AMD) 
@@ -255,20 +473,20 @@ contains
     end interface
 #endif
 
-    type(psb_s_csc_sparse_mat)  :: acsc
-    class(psb_s_base_sparse_mat), allocatable :: aa
-    type(psb_s_coo_sparse_mat)  :: acoo
+    type(psb_ls_csc_sparse_mat)  :: acsc
+    class(psb_ls_base_sparse_mat), allocatable :: aa
+    type(psb_ls_coo_sparse_mat)  :: acoo
 
     integer(psb_ipk_), allocatable :: perm(:) 
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
 
     info = psb_success_
     name = 'mat_renum_amd'
     call psb_erractionsave(err_act)
 
-#if defined(HAVE_AMD)  && defined(IPK4)
+#if defined(HAVE_AMD)  && defined(LPK4)
 
     info = psb_success_
     nr   = a%get_nrows()
@@ -332,11 +550,9 @@ contains
 
 9999 call psb_error_handler(err_act)
     return
+  end subroutine psb_lmat_renum_amd
 
-  end subroutine psb_mat_renum_amd
-
-end subroutine psb_s_mat_renum
-
+end subroutine psb_ls_mat_renum
 
 subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info)
   use psb_base_mod
@@ -387,3 +603,52 @@ subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info)
   end select
   
 end subroutine psb_s_cmp_bwpf
+
+subroutine psb_ls_cmp_bwpf(mat,bwl,bwu,prf,info)
+  use psb_base_mod
+  use psb_renum_mod, psb_protect_name => psb_ls_cmp_bwpf
+  implicit none 
+  type(psb_lsspmat_type), intent(in) :: mat
+  integer(psb_lpk_), intent(out) :: bwl, bwu
+  integer(psb_lpk_), intent(out) :: prf
+  integer(psb_ipk_), intent(out) :: info
+  !
+  integer(psb_lpk_), allocatable :: irow(:), icol(:)
+  real(psb_spk_), allocatable :: val(:)
+  integer(psb_lpk_) :: nz, i, j, lrbu, lrbl
+  
+  info = psb_success_
+  bwl = 0
+  bwu = 0
+  prf = 0
+  select type (aa=>mat%a)
+  class is (psb_ls_csr_sparse_mat)
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      do j = aa%irp(i), aa%irp(i+1) - 1
+        lrbl = max(lrbl,i-aa%ja(j))
+        lrbu = max(lrbu,aa%ja(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+    
+  class default
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      call aa%csget(i,i,nz,irow,icol,val,info)
+      if (info /= psb_success_) return
+      do j=1, nz
+        lrbl = max(lrbl,i-icol(j))
+        lrbu = max(lrbu,icol(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+  end select
+  
+end subroutine psb_ls_cmp_bwpf
diff --git a/util/psb_s_renum_mod.f90 b/util/psb_s_renum_mod.f90
new file mode 100644
index 00000000..1e048df5
--- /dev/null
+++ b/util/psb_s_renum_mod.f90
@@ -0,0 +1,69 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+module psb_s_renum_mod
+  use psb_base_mod
+  
+  interface psb_mat_renum
+    subroutine psb_s_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_sspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_sspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_s_mat_renum
+    subroutine psb_ls_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_lpk_, psb_lsspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_lsspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_ls_mat_renum
+  end interface psb_mat_renum
+  
+  interface psb_cmp_bwpf
+    subroutine psb_s_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_sspmat_type
+      type(psb_sspmat_type), intent(in) :: mat
+      integer(psb_ipk_), intent(out) :: bwl, bwu
+      integer(psb_ipk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_s_cmp_bwpf
+    subroutine psb_ls_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_lpk_, psb_lsspmat_type
+      type(psb_lsspmat_type), intent(in) :: mat
+      integer(psb_lpk_), intent(out) :: bwl, bwu
+      integer(psb_lpk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_ls_cmp_bwpf
+  end interface psb_cmp_bwpf
+
+end module psb_s_renum_mod
diff --git a/util/psb_z_renum_impl.F90 b/util/psb_z_renum_impl.F90
index aa8f6b72..a27d4523 100644
--- a/util/psb_z_renum_impl.F90
+++ b/util/psb_z_renum_impl.F90
@@ -29,37 +29,61 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-subroutine psb_z_mat_renums(alg,mat,info,perm)
+subroutine psb_z_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_z_mat_renums
+  use psb_renum_mod, psb_protect_name => psb_z_mat_renum
   implicit none 
   character(len=*), intent(in) :: alg
   type(psb_zspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
   integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, ialg
+  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
+  nr = mat%get_nrows()
+  nc = mat%get_ncols()
+  if (nr /= nc) then 
+    info = psb_err_rectangular_mat_unsupported_
+    ierr(1) = nr; ierr(2) = nc;
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999
+  end if
+
   info = psb_success_
   select case (psb_toupper(alg))
   case ('GPS')
-    ialg = psb_mat_renum_gps_
-  case ('AMD')
-    ialg = psb_mat_renum_amd_
+
+    call psb_mat_renum_gps(mat,info,perm)
+
+  case('AMD') 
+
+    call psb_mat_renum_amd(mat,info,perm)
+
   case ('NONE', 'ID') 
-    ialg = psb_mat_renum_identity_
+    nr = mat%get_nrows()
+    allocate(perm(nr),stat=info) 
+    if (info == 0) then 
+      do i=1,nr
+        perm(i) = i
+      end do
+    else
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name)
+      goto 9999
+    endif
   case default
     write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
-    ialg = -1
+    info = psb_err_input_value_invalid_i_
+    ierr(1) = 1; 
+    call psb_errpush(info,name,i_err=ierr)
+    goto 9999 
   end select
-
-  call psb_mat_renum(ialg,mat,info,perm)
-
+  
   if (info /= psb_success_) then 
     info = psb_err_from_subroutine_non_
     call psb_errpush(info,name)
@@ -72,26 +96,218 @@ subroutine psb_z_mat_renums(alg,mat,info,perm)
 9999 call psb_error_handler(err_act)
   return
 
-end subroutine psb_z_mat_renums
-  
-subroutine psb_z_mat_renum(alg,mat,info,perm)
+contains
+
+  subroutine psb_mat_renum_gps(a,info,operm)
+    use psb_base_mod
+    use psb_gps_mod
+    implicit none 
+    type(psb_zspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+    class(psb_z_base_sparse_mat), allocatable :: aa
+    type(psb_z_csr_sparse_mat)  :: acsr
+    type(psb_z_coo_sparse_mat)  :: acoo
+    
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+
+    info = psb_success_
+    name = 'mat_renum_gps'
+    call psb_erractionsave(err_act)
+
+    info = psb_success_
+
+    call a%mold(aa)
+    call a%mv_to(aa)
+    call aa%mv_to_fmt(acsr,info)
+    ! Insert call to gps_reduce
+    nr   = acsr%get_nrows()
+    ideg = 0
+    do i=1, nr
+      ideg = max(ideg,acsr%irp(i+1)-acsr%irp(i))
+    end do
+    allocate(ndstk(nr,ideg), iold(nr), perm(nr+1), ndeg(nr),stat=info)
+    if (info /= 0) then
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info, name) 
+      goto 9999
+    end if
+    do i=1, nr
+      iold(i) = i 
+      ndstk(i,:) = 0
+      k  = 0
+      do j=acsr%irp(i),acsr%irp(i+1)-1
+        k = k + 1
+        ndstk(i,k) = acsr%ja(j)
+      end do
+    end do
+    perm = 0
+
+    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+
+    if (.not.psb_isaperm(nr,perm)) then 
+      write(0,*) 'Something wrong: bad perm from gps_reduce'
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+    ! Move to coordinate to apply renumbering
+    call acsr%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa)
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+
+  end subroutine psb_mat_renum_gps
+
+
+  subroutine psb_mat_renum_amd(a,info,operm)
+#if defined(HAVE_AMD) 
+    use iso_c_binding
+#endif
+    use psb_base_mod    
+    implicit none 
+    type(psb_zspmat_type), intent(inout) :: a
+    integer(psb_ipk_), intent(out) :: info
+    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+
+    ! 
+#if defined(HAVE_AMD) 
+    interface 
+      function psb_amd_order(n,ap,ai,p)&
+           & result(res) bind(c,name='psb_amd_order')
+        use iso_c_binding
+        integer(c_int) :: res
+        integer(c_int), value :: n
+        integer(c_int) :: ap(*), ai(*), p(*)
+      end function psb_amd_order
+    end interface
+#endif
+
+    type(psb_z_csc_sparse_mat)  :: acsc
+    class(psb_z_base_sparse_mat), allocatable :: aa
+    type(psb_z_coo_sparse_mat)  :: acoo
+
+    integer(psb_ipk_), allocatable :: perm(:) 
+    integer(psb_ipk_) :: err_act
+    character(len=20)           :: name
+    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+
+    info = psb_success_
+    name = 'mat_renum_amd'
+    call psb_erractionsave(err_act)
+
+#if defined(HAVE_AMD)  && defined(IPK4)
+
+    info = psb_success_
+    nr   = a%get_nrows()
+    nz   = a%get_nzeros()
+    allocate(perm(nr),stat=info)
+    if (info /= 0) then 
+      info = psb_err_alloc_dealloc_
+      call psb_errpush(info,name) 
+      goto 9999
+    end if
+
+
+    allocate(aa, mold=a%a)
+    call a%mv_to(acsc)
+
+    acsc%ia(:)  = acsc%ia(:) - 1
+    acsc%icp(:) = acsc%icp(:) - 1
+
+    info = psb_amd_order(nr,acsc%icp,acsc%ia,perm)
+    if (info /= psb_success_) then 
+      info = psb_err_from_subroutine_
+      call psb_errpush(info,name,a_err='psb_amd_order') 
+      goto 9999
+    end if
+
+    perm(:)     = perm(:) + 1
+    acsc%ia(:)  = acsc%ia(:) + 1
+    acsc%icp(:) = acsc%icp(:) + 1
+
+    call acsc%mv_to_coo(acoo,info)
+    do i=1, acoo%get_nzeros()
+      acoo%ia(i) = perm(acoo%ia(i))
+      acoo%ja(i) = perm(acoo%ja(i))
+    end do
+    call acoo%fix(info) 
+
+    ! Get back to where we started from
+    call aa%mv_from_coo(acoo,info)
+    call a%mv_from(aa)
+    if (present(operm)) then 
+      call psb_realloc(nr,operm,info)
+      if (info /= psb_success_) then 
+        info = psb_err_alloc_dealloc_
+        call psb_errpush(info,name) 
+        goto 9999
+      end if
+      operm(1:nr) = perm(1:nr)
+    end if
+
+    deallocate(aa,perm)
+
+#else 
+
+    info = psb_err_missing_aux_lib_
+    call psb_errpush(info, name) 
+    goto 9999
+#endif
+
+    call psb_erractionrestore(err_act)
+    return
+
+9999 call psb_error_handler(err_act)
+    return
+  end subroutine psb_mat_renum_amd
+
+end subroutine psb_z_mat_renum
+
+subroutine psb_lz_mat_renum(alg,mat,info,perm)
   use psb_base_mod
-  use psb_renum_mod, psb_protect_name => psb_z_mat_renum
+  use psb_renum_mod, psb_protect_name => psb_lz_mat_renum
   implicit none 
-  integer(psb_ipk_), intent(in) :: alg
-  type(psb_zspmat_type), intent(inout) :: mat
+  character(len=*), intent(in) :: alg
+  type(psb_lzspmat_type), intent(inout) :: mat
   integer(psb_ipk_), intent(out) :: info
-  integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+  integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
   
-  integer(psb_ipk_) :: err_act, nr, nc, i, ierr(5)
+  integer(psb_lpk_) :: nr, nc, i
+  integer(psb_ipk_) :: err_act, ierr(5)
   character(len=20)  :: name
 
   info = psb_success_
   name = 'mat_renum'
   call psb_erractionsave(err_act)
 
-  info = psb_success_
-  
   nr = mat%get_nrows()
   nc = mat%get_ncols()
   if (nr /= nc) then 
@@ -101,16 +317,17 @@ subroutine psb_z_mat_renum(alg,mat,info,perm)
     goto 9999
   end if
 
-  select case (alg)
-  case(psb_mat_renum_gps_) 
+  info = psb_success_
+  select case (psb_toupper(alg))
+  case ('GPS')
 
-    call psb_mat_renum_gps(mat,info,perm)
+    call psb_lmat_renum_gps(mat,info,perm)
 
-  case(psb_mat_renum_amd_) 
+  case('AMD') 
 
-    call psb_mat_renum_amd(mat,info,perm)
+    call psb_lmat_renum_amd(mat,info,perm)
 
-  case(psb_mat_renum_identity_) 
+  case ('NONE', 'ID') 
     nr = mat%get_nrows()
     allocate(perm(nr),stat=info) 
     if (info == 0) then 
@@ -123,8 +340,9 @@ subroutine psb_z_mat_renum(alg,mat,info,perm)
       goto 9999
     endif
   case default
+    write(0,*) 'Unknown algorithm "',psb_toupper(alg),'"'
     info = psb_err_input_value_invalid_i_
-    ierr(1) = 1; ierr(2) = alg;
+    ierr(1) = 1; 
     call psb_errpush(info,name,i_err=ierr)
     goto 9999 
   end select
@@ -143,26 +361,26 @@ subroutine psb_z_mat_renum(alg,mat,info,perm)
 
 contains
 
-  subroutine psb_mat_renum_gps(a,info,operm)
+  subroutine psb_lmat_renum_gps(a,info,operm)
     use psb_base_mod
-    use psb_gps_mod
+    use psb_lgps_mod
     implicit none 
-    type(psb_zspmat_type), intent(inout) :: a
+    type(psb_lzspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
-    class(psb_z_base_sparse_mat), allocatable :: aa
-    type(psb_z_csr_sparse_mat)  :: acsr
-    type(psb_z_coo_sparse_mat)  :: acoo
+    class(psb_lz_base_sparse_mat), allocatable :: aa
+    type(psb_lz_csr_sparse_mat)  :: acsr
+    type(psb_lz_coo_sparse_mat)  :: acoo
     
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
+    integer(psb_lpk_), allocatable :: ndstk(:,:), iold(:), ndeg(:), perm(:) 
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth
 
     info = psb_success_
-    name = 'mat_renum'
+    name = 'mat_renum_gps'
     call psb_erractionsave(err_act)
 
     info = psb_success_
@@ -193,7 +411,7 @@ contains
     end do
     perm = 0
 
-    call psb_gps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
+    call psb_lgps_reduce(ndstk,nr,ideg,iold,perm,ndeg,ibw,ipf,idpth)
 
     if (.not.psb_isaperm(nr,perm)) then 
       write(0,*) 'Something wrong: bad perm from gps_reduce'
@@ -228,18 +446,19 @@ contains
 
 9999 call psb_error_handler(err_act)
     return
-  end subroutine psb_mat_renum_gps
 
+  end subroutine psb_lmat_renum_gps
 
-  subroutine psb_mat_renum_amd(a,info,operm)
+
+  subroutine psb_lmat_renum_amd(a,info,operm)
 #if defined(HAVE_AMD) 
     use iso_c_binding
 #endif
     use psb_base_mod    
     implicit none 
-    type(psb_zspmat_type), intent(inout) :: a
+    type(psb_lzspmat_type), intent(inout) :: a
     integer(psb_ipk_), intent(out) :: info
-    integer(psb_ipk_), allocatable, optional, intent(out) :: operm(:)
+    integer(psb_lpk_), allocatable, optional, intent(out) :: operm(:)
 
     ! 
 #if defined(HAVE_AMD) 
@@ -254,20 +473,20 @@ contains
     end interface
 #endif
 
-    type(psb_z_csc_sparse_mat)  :: acsc
-    class(psb_z_base_sparse_mat), allocatable :: aa
-    type(psb_z_coo_sparse_mat)  :: acoo
+    type(psb_lz_csc_sparse_mat)  :: acsc
+    class(psb_lz_base_sparse_mat), allocatable :: aa
+    type(psb_lz_coo_sparse_mat)  :: acoo
 
     integer(psb_ipk_), allocatable :: perm(:) 
     integer(psb_ipk_) :: err_act
     character(len=20)           :: name
-    integer(psb_ipk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
+    integer(psb_lpk_) :: i, j, k, ideg, nr, ibw, ipf, idpth, nz
 
     info = psb_success_
     name = 'mat_renum_amd'
     call psb_erractionsave(err_act)
 
-#if defined(HAVE_AMD)  && defined(IPK4)
+#if defined(HAVE_AMD)  && defined(LPK4)
 
     info = psb_success_
     nr   = a%get_nrows()
@@ -331,10 +550,9 @@ contains
 
 9999 call psb_error_handler(err_act)
     return
-  end subroutine psb_mat_renum_amd
-
-end subroutine psb_z_mat_renum
+  end subroutine psb_lmat_renum_amd
 
+end subroutine psb_lz_mat_renum
 
 subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info)
   use psb_base_mod
@@ -385,3 +603,52 @@ subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info)
   end select
   
 end subroutine psb_z_cmp_bwpf
+
+subroutine psb_lz_cmp_bwpf(mat,bwl,bwu,prf,info)
+  use psb_base_mod
+  use psb_renum_mod, psb_protect_name => psb_lz_cmp_bwpf
+  implicit none 
+  type(psb_lzspmat_type), intent(in) :: mat
+  integer(psb_lpk_), intent(out) :: bwl, bwu
+  integer(psb_lpk_), intent(out) :: prf
+  integer(psb_ipk_), intent(out) :: info
+  !
+  integer(psb_lpk_), allocatable :: irow(:), icol(:)
+  complex(psb_dpk_), allocatable :: val(:)
+  integer(psb_lpk_) :: nz, i, j, lrbu, lrbl
+  
+  info = psb_success_
+  bwl = 0
+  bwu = 0
+  prf = 0
+  select type (aa=>mat%a)
+  class is (psb_lz_csr_sparse_mat)
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      do j = aa%irp(i), aa%irp(i+1) - 1
+        lrbl = max(lrbl,i-aa%ja(j))
+        lrbu = max(lrbu,aa%ja(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+    
+  class default
+    do i=1, aa%get_nrows()
+      lrbl = 0
+      lrbu = 0
+      call aa%csget(i,i,nz,irow,icol,val,info)
+      if (info /= psb_success_) return
+      do j=1, nz
+        lrbl = max(lrbl,i-icol(j))
+        lrbu = max(lrbu,icol(j)-i)
+      end do
+      prf = prf + lrbl+lrbu
+      bwu  = max(bwu,lrbu)
+      bwl  = max(bwl,lrbu)
+    end do
+  end select
+  
+end subroutine psb_lz_cmp_bwpf
diff --git a/util/psb_z_renum_mod.f90 b/util/psb_z_renum_mod.f90
new file mode 100644
index 00000000..0bb56c35
--- /dev/null
+++ b/util/psb_z_renum_mod.f90
@@ -0,0 +1,69 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+module psb_z_renum_mod
+  use psb_base_mod
+  
+  interface psb_mat_renum
+    subroutine psb_z_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_zspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_zspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_ipk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_z_mat_renum
+    subroutine psb_lz_mat_renum(alg,mat,info,perm)
+      import :: psb_ipk_, psb_lpk_, psb_lzspmat_type
+      character(len=*), intent(in) :: alg
+      type(psb_lzspmat_type), intent(inout) :: mat
+      integer(psb_ipk_), intent(out) :: info
+      integer(psb_lpk_), allocatable, optional, intent(out) :: perm(:)
+    end subroutine psb_lz_mat_renum
+  end interface psb_mat_renum
+  
+  interface psb_cmp_bwpf
+    subroutine psb_z_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_zspmat_type
+      type(psb_zspmat_type), intent(in) :: mat
+      integer(psb_ipk_), intent(out) :: bwl, bwu
+      integer(psb_ipk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_z_cmp_bwpf
+    subroutine psb_lz_cmp_bwpf(mat,bwl,bwu,prf,info)
+      import :: psb_ipk_, psb_lpk_, psb_lzspmat_type
+      type(psb_lzspmat_type), intent(in) :: mat
+      integer(psb_lpk_), intent(out) :: bwl, bwu
+      integer(psb_lpk_), intent(out) :: prf
+      integer(psb_ipk_), intent(out) :: info
+    end subroutine psb_lz_cmp_bwpf
+  end interface psb_cmp_bwpf
+
+end module psb_z_renum_mod