diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90
index 38b740a7..3fe001c8 100644
--- a/base/modules/auxil/psi_c_serial_mod.f90
+++ b/base/modules/auxil/psi_c_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_c_serial_mod
     end subroutine psi_caxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_spk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_c_serial_mod
       complex(psb_spk_), intent (inout)    ::  z(:)
       complex(psb_spk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_cabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_c_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90
index 1d65c5f6..a08263df 100644
--- a/base/modules/auxil/psi_d_serial_mod.f90
+++ b/base/modules/auxil/psi_d_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_d_serial_mod
     end subroutine psi_daxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_dpk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_d_serial_mod
       real(psb_dpk_), intent (inout)    ::  z(:)
       real(psb_dpk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_dabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_d_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90
index 6f4e8c06..1f1bebd7 100644
--- a/base/modules/auxil/psi_e_serial_mod.f90
+++ b/base/modules/auxil/psi_e_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_e_serial_mod
     end subroutine psi_eaxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_e_serial_mod
       integer(psb_epk_), intent (inout)    ::  z(:)
       integer(psb_epk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_eabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_e_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90
index ffa14059..770d3256 100644
--- a/base/modules/auxil/psi_i2_serial_mod.f90
+++ b/base/modules/auxil/psi_i2_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_i2_serial_mod
     end subroutine psi_i2axpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_i2_serial_mod
       integer(psb_i2pk_), intent (inout)    ::  z(:)
       integer(psb_i2pk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_i2abgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_i2_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90
index 5661fdbf..3583cccc 100644
--- a/base/modules/auxil/psi_m_serial_mod.f90
+++ b/base/modules/auxil/psi_m_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_m_serial_mod
     end subroutine psi_maxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_m_serial_mod
       integer(psb_mpk_), intent (inout)    ::  z(:)
       integer(psb_mpk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_mabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_m_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90
index 5cc17d58..3e0c6d91 100644
--- a/base/modules/auxil/psi_s_serial_mod.f90
+++ b/base/modules/auxil/psi_s_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_s_serial_mod
     end subroutine psi_saxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_spk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_s_serial_mod
       real(psb_spk_), intent (inout)    ::  z(:)
       real(psb_spk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_sabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_s_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90
index 8a3f053d..a8ea734e 100644
--- a/base/modules/auxil/psi_z_serial_mod.f90
+++ b/base/modules/auxil/psi_z_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_z_serial_mod
     end subroutine psi_zaxpbyv2
   end interface psb_geaxpby
 
-  interface psi_abgdxyz
-    subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  interface psi_upd_xyz
+    subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
       import :: psb_ipk_, psb_dpk_
       implicit none
       integer(psb_ipk_), intent(in)      :: m
@@ -109,8 +109,8 @@ module psi_z_serial_mod
       complex(psb_dpk_), intent (inout)    ::  z(:)
       complex(psb_dpk_), intent (in)       :: alpha, beta,gamma,delta
       integer(psb_ipk_), intent(out)     :: info
-    end subroutine psi_zabgdxyz
-  end interface psi_abgdxyz
+    end subroutine psi_z_upd_xyz
+  end interface psi_upd_xyz
   
   interface psi_xyzw
     subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90
index 7f7f937c..591dec09 100644
--- a/base/modules/psblas/psb_c_psblas_mod.F90
+++ b/base/modules/psblas/psb_c_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_c_psblas_mod
     end subroutine psb_caxpby
   end interface
 
-  interface psb_abgdxyz
-    subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+  interface psb_upd_xyz
+    subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
          & desc_a, info)
       import :: psb_desc_type, psb_spk_, psb_ipk_, &
            & psb_c_vect_type, psb_cspmat_type
@@ -154,8 +154,8 @@ module psb_c_psblas_mod
       complex(psb_spk_), intent (in)        :: alpha, beta, gamma, delta
       type(psb_desc_type), intent (in)      :: desc_a
       integer(psb_ipk_), intent(out)        :: info
-    end subroutine psb_cabgdxyz_vect
-  end interface psb_abgdxyz
+    end subroutine psb_c_upd_xyz_vect
+  end interface psb_upd_xyz
   
   interface psb_geamax
     function psb_camax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90
index 12090956..b200bc8a 100644
--- a/base/modules/psblas/psb_d_psblas_mod.F90
+++ b/base/modules/psblas/psb_d_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_d_psblas_mod
     end subroutine psb_daxpby
   end interface
 
-  interface psb_abgdxyz
-    subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+  interface psb_upd_xyz
+    subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
          & desc_a, info)
       import :: psb_desc_type, psb_dpk_, psb_ipk_, &
            & psb_d_vect_type, psb_dspmat_type
@@ -154,8 +154,8 @@ module psb_d_psblas_mod
       real(psb_dpk_), intent (in)        :: alpha, beta, gamma, delta
       type(psb_desc_type), intent (in)      :: desc_a
       integer(psb_ipk_), intent(out)        :: info
-    end subroutine psb_dabgdxyz_vect
-  end interface psb_abgdxyz
+    end subroutine psb_d_upd_xyz_vect
+  end interface psb_upd_xyz
   
   interface psb_geamax
     function psb_damax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90
index 7a7ce783..a60da025 100644
--- a/base/modules/psblas/psb_s_psblas_mod.F90
+++ b/base/modules/psblas/psb_s_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_s_psblas_mod
     end subroutine psb_saxpby
   end interface
 
-  interface psb_abgdxyz
-    subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+  interface psb_upd_xyz
+    subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
          & desc_a, info)
       import :: psb_desc_type, psb_spk_, psb_ipk_, &
            & psb_s_vect_type, psb_sspmat_type
@@ -154,8 +154,8 @@ module psb_s_psblas_mod
       real(psb_spk_), intent (in)        :: alpha, beta, gamma, delta
       type(psb_desc_type), intent (in)      :: desc_a
       integer(psb_ipk_), intent(out)        :: info
-    end subroutine psb_sabgdxyz_vect
-  end interface psb_abgdxyz
+    end subroutine psb_s_upd_xyz_vect
+  end interface psb_upd_xyz
   
   interface psb_geamax
     function psb_samax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90
index bcfe9caa..241df2b9 100644
--- a/base/modules/psblas/psb_z_psblas_mod.F90
+++ b/base/modules/psblas/psb_z_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_z_psblas_mod
     end subroutine psb_zaxpby
   end interface
 
-  interface psb_abgdxyz
-    subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+  interface psb_upd_xyz
+    subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
          & desc_a, info)
       import :: psb_desc_type, psb_dpk_, psb_ipk_, &
            & psb_z_vect_type, psb_zspmat_type
@@ -154,8 +154,8 @@ module psb_z_psblas_mod
       complex(psb_dpk_), intent (in)        :: alpha, beta, gamma, delta
       type(psb_desc_type), intent (in)      :: desc_a
       integer(psb_ipk_), intent(out)        :: info
-    end subroutine psb_zabgdxyz_vect
-  end interface psb_abgdxyz
+    end subroutine psb_z_upd_xyz_vect
+  end interface psb_upd_xyz
   
   interface psb_geamax
     function psb_zamax(x, desc_a, info, jx,global)
diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90
index 41bab5ab..4dac86d6 100644
--- a/base/modules/serial/psb_c_base_vect_mod.F90
+++ b/base/modules/serial/psb_c_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_c_base_vect_mod
     procedure, pass(z) :: axpby_v2  => c_base_axpby_v2
     procedure, pass(z) :: axpby_a2  => c_base_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => c_base_abgdxyz
+    procedure, pass(z) :: upd_xyz  => c_base_upd_xyz
     procedure, pass(w) :: xyzw     => c_base_xyzw
     
     !
@@ -1130,12 +1130,12 @@ contains
   end subroutine c_base_axpby_a2
 
   !
-  ! ABGDXYZ is invoked via Z, hence the structure below.
+  ! UPD_XYZ is invoked via Z, hence the structure below.
   !
   !
-  !> Function  base_abgdxyz
+  !> Function  base_upd_xyz
   !! \memberof  psb_c_base_vect_type
-  !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+  !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
   !! \param m    Number of entries to be considered
   !! \param alpha scalar alpha
   !! \param beta scalar beta 
@@ -1146,7 +1146,7 @@ contains
   !! \param z     The class(base_vect) to be added
   !! \param info   return code
   !!
-  subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -1159,11 +1159,11 @@ contains
     if (x%is_dev().and.(alpha/=czero)) call x%sync()
     if (y%is_dev().and.(beta/=czero))   call y%sync()
     if (z%is_dev().and.(delta/=czero))  call z%sync()
-    call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+    call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
     call y%set_host()
     call z%set_host()
         
-  end subroutine c_base_abgdxyz
+  end subroutine c_base_upd_xyz
 
   subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90
index 865f9456..1e9510f2 100644
--- a/base/modules/serial/psb_c_vect_mod.F90
+++ b/base/modules/serial/psb_c_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_c_vect_mod
     procedure, pass(z) :: axpby_v2  => c_vect_axpby_v2
     procedure, pass(z) :: axpby_a2  => c_vect_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => c_vect_abgdxyz
+    procedure, pass(z) :: upd_xyz  => c_vect_upd_xyz
     procedure, pass(z) :: xyzw     => c_vect_xyzw
     
     procedure, pass(y) :: mlt_v    => c_vect_mlt_v
@@ -774,7 +774,7 @@ contains
 
   end subroutine c_vect_axpby_a2
 
-  subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+  subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)            :: m
@@ -785,9 +785,9 @@ contains
     integer(psb_ipk_), intent(out)   :: info
 
     if (allocated(z%v)) &
-         call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+         call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
     
-  end subroutine c_vect_abgdxyz
+  end subroutine c_vect_upd_xyz
 
   subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90
index 1ad1ffa5..f07b5aed 100644
--- a/base/modules/serial/psb_d_base_vect_mod.F90
+++ b/base/modules/serial/psb_d_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_d_base_vect_mod
     procedure, pass(z) :: axpby_v2  => d_base_axpby_v2
     procedure, pass(z) :: axpby_a2  => d_base_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => d_base_abgdxyz
+    procedure, pass(z) :: upd_xyz  => d_base_upd_xyz
     procedure, pass(w) :: xyzw     => d_base_xyzw
     
     !
@@ -1137,12 +1137,12 @@ contains
   end subroutine d_base_axpby_a2
 
   !
-  ! ABGDXYZ is invoked via Z, hence the structure below.
+  ! UPD_XYZ is invoked via Z, hence the structure below.
   !
   !
-  !> Function  base_abgdxyz
+  !> Function  base_upd_xyz
   !! \memberof  psb_d_base_vect_type
-  !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+  !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
   !! \param m    Number of entries to be considered
   !! \param alpha scalar alpha
   !! \param beta scalar beta 
@@ -1153,7 +1153,7 @@ contains
   !! \param z     The class(base_vect) to be added
   !! \param info   return code
   !!
-  subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -1166,11 +1166,11 @@ contains
     if (x%is_dev().and.(alpha/=dzero)) call x%sync()
     if (y%is_dev().and.(beta/=dzero))   call y%sync()
     if (z%is_dev().and.(delta/=dzero))  call z%sync()
-    call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+    call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
     call y%set_host()
     call z%set_host()
         
-  end subroutine d_base_abgdxyz
+  end subroutine d_base_upd_xyz
 
   subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90
index 55dd8230..ae3062dd 100644
--- a/base/modules/serial/psb_d_vect_mod.F90
+++ b/base/modules/serial/psb_d_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_d_vect_mod
     procedure, pass(z) :: axpby_v2  => d_vect_axpby_v2
     procedure, pass(z) :: axpby_a2  => d_vect_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => d_vect_abgdxyz
+    procedure, pass(z) :: upd_xyz  => d_vect_upd_xyz
     procedure, pass(z) :: xyzw     => d_vect_xyzw
     
     procedure, pass(y) :: mlt_v    => d_vect_mlt_v
@@ -781,7 +781,7 @@ contains
 
   end subroutine d_vect_axpby_a2
 
-  subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+  subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)            :: m
@@ -792,9 +792,9 @@ contains
     integer(psb_ipk_), intent(out)   :: info
 
     if (allocated(z%v)) &
-         call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+         call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
     
-  end subroutine d_vect_abgdxyz
+  end subroutine d_vect_upd_xyz
 
   subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90
index 26b82c31..596cd634 100644
--- a/base/modules/serial/psb_s_base_vect_mod.F90
+++ b/base/modules/serial/psb_s_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_s_base_vect_mod
     procedure, pass(z) :: axpby_v2  => s_base_axpby_v2
     procedure, pass(z) :: axpby_a2  => s_base_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => s_base_abgdxyz
+    procedure, pass(z) :: upd_xyz  => s_base_upd_xyz
     procedure, pass(w) :: xyzw     => s_base_xyzw
     
     !
@@ -1137,12 +1137,12 @@ contains
   end subroutine s_base_axpby_a2
 
   !
-  ! ABGDXYZ is invoked via Z, hence the structure below.
+  ! UPD_XYZ is invoked via Z, hence the structure below.
   !
   !
-  !> Function  base_abgdxyz
+  !> Function  base_upd_xyz
   !! \memberof  psb_s_base_vect_type
-  !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+  !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
   !! \param m    Number of entries to be considered
   !! \param alpha scalar alpha
   !! \param beta scalar beta 
@@ -1153,7 +1153,7 @@ contains
   !! \param z     The class(base_vect) to be added
   !! \param info   return code
   !!
-  subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -1166,11 +1166,11 @@ contains
     if (x%is_dev().and.(alpha/=szero)) call x%sync()
     if (y%is_dev().and.(beta/=szero))   call y%sync()
     if (z%is_dev().and.(delta/=szero))  call z%sync()
-    call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+    call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
     call y%set_host()
     call z%set_host()
         
-  end subroutine s_base_abgdxyz
+  end subroutine s_base_upd_xyz
 
   subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90
index a50b2a0a..cad4659c 100644
--- a/base/modules/serial/psb_s_vect_mod.F90
+++ b/base/modules/serial/psb_s_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_s_vect_mod
     procedure, pass(z) :: axpby_v2  => s_vect_axpby_v2
     procedure, pass(z) :: axpby_a2  => s_vect_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => s_vect_abgdxyz
+    procedure, pass(z) :: upd_xyz  => s_vect_upd_xyz
     procedure, pass(z) :: xyzw     => s_vect_xyzw
     
     procedure, pass(y) :: mlt_v    => s_vect_mlt_v
@@ -781,7 +781,7 @@ contains
 
   end subroutine s_vect_axpby_a2
 
-  subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+  subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)            :: m
@@ -792,9 +792,9 @@ contains
     integer(psb_ipk_), intent(out)   :: info
 
     if (allocated(z%v)) &
-         call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+         call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
     
-  end subroutine s_vect_abgdxyz
+  end subroutine s_vect_upd_xyz
 
   subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90
index a3afc9c1..1bbdfba1 100644
--- a/base/modules/serial/psb_z_base_vect_mod.F90
+++ b/base/modules/serial/psb_z_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_z_base_vect_mod
     procedure, pass(z) :: axpby_v2  => z_base_axpby_v2
     procedure, pass(z) :: axpby_a2  => z_base_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => z_base_abgdxyz
+    procedure, pass(z) :: upd_xyz  => z_base_upd_xyz
     procedure, pass(w) :: xyzw     => z_base_xyzw
     
     !
@@ -1130,12 +1130,12 @@ contains
   end subroutine z_base_axpby_a2
 
   !
-  ! ABGDXYZ is invoked via Z, hence the structure below.
+  ! UPD_XYZ is invoked via Z, hence the structure below.
   !
   !
-  !> Function  base_abgdxyz
+  !> Function  base_upd_xyz
   !! \memberof  psb_z_base_vect_type
-  !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+  !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
   !! \param m    Number of entries to be considered
   !! \param alpha scalar alpha
   !! \param beta scalar beta 
@@ -1146,7 +1146,7 @@ contains
   !! \param z     The class(base_vect) to be added
   !! \param info   return code
   !!
-  subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -1159,11 +1159,11 @@ contains
     if (x%is_dev().and.(alpha/=zzero)) call x%sync()
     if (y%is_dev().and.(beta/=zzero))   call y%sync()
     if (z%is_dev().and.(delta/=zzero))  call z%sync()
-    call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+    call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
     call y%set_host()
     call z%set_host()
         
-  end subroutine z_base_abgdxyz
+  end subroutine z_base_upd_xyz
 
   subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
     use psi_serial_mod
diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90
index 21e0c546..48f2e947 100644
--- a/base/modules/serial/psb_z_vect_mod.F90
+++ b/base/modules/serial/psb_z_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_z_vect_mod
     procedure, pass(z) :: axpby_v2  => z_vect_axpby_v2
     procedure, pass(z) :: axpby_a2  => z_vect_axpby_a2
     generic, public    :: axpby    => axpby_v, axpby_a, axpby_v2, axpby_a2
-    procedure, pass(z) :: abgdxyz  => z_vect_abgdxyz
+    procedure, pass(z) :: upd_xyz  => z_vect_upd_xyz
     procedure, pass(z) :: xyzw     => z_vect_xyzw
     
     procedure, pass(y) :: mlt_v    => z_vect_mlt_v
@@ -774,7 +774,7 @@ contains
 
   end subroutine z_vect_axpby_a2
 
-  subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+  subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)            :: m
@@ -785,9 +785,9 @@ contains
     integer(psb_ipk_), intent(out)   :: info
 
     if (allocated(z%v)) &
-         call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+         call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
     
-  end subroutine z_vect_abgdxyz
+  end subroutine z_vect_upd_xyz
 
   subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
     use psi_serial_mod
diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90
index 7c22bb06..5d80ef00 100644
--- a/base/psblas/psb_caxpby.f90
+++ b/base/psblas/psb_caxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
 end subroutine psb_caddconst_vect
 
 
-subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
      & desc_a, info)
-  use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
+  use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect
   implicit none 
   type(psb_c_vect_type), intent (inout) :: x
   type(psb_c_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
   end if
 
   if(desc_a%get_local_rows() > 0) then
-    call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+    call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
   end if
 
   call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
 
   return
 
-end subroutine psb_cabgdxyz_vect
+end subroutine psb_c_upd_xyz_vect
 
diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90
index 1de77647..38ebe465 100644
--- a/base/psblas/psb_daxpby.f90
+++ b/base/psblas/psb_daxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
 end subroutine psb_daddconst_vect
 
 
-subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
      & desc_a, info)
-  use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
+  use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect
   implicit none 
   type(psb_d_vect_type), intent (inout) :: x
   type(psb_d_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
   end if
 
   if(desc_a%get_local_rows() > 0) then
-    call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+    call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
   end if
 
   call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
 
   return
 
-end subroutine psb_dabgdxyz_vect
+end subroutine psb_d_upd_xyz_vect
 
diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90
index 1b1f24e6..0055fdbe 100644
--- a/base/psblas/psb_saxpby.f90
+++ b/base/psblas/psb_saxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
 end subroutine psb_saddconst_vect
 
 
-subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
      & desc_a, info)
-  use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
+  use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect
   implicit none 
   type(psb_s_vect_type), intent (inout) :: x
   type(psb_s_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
   end if
 
   if(desc_a%get_local_rows() > 0) then
-    call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+    call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
   end if
 
   call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
 
   return
 
-end subroutine psb_sabgdxyz_vect
+end subroutine psb_s_upd_xyz_vect
 
diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90
index 0f37a1f4..e93488e3 100644
--- a/base/psblas/psb_zaxpby.f90
+++ b/base/psblas/psb_zaxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
 end subroutine psb_zaddconst_vect
 
 
-subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
      & desc_a, info)
-  use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
+  use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect
   implicit none 
   type(psb_z_vect_type), intent (inout) :: x
   type(psb_z_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
   end if
 
   if(desc_a%get_local_rows() > 0) then
-    call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+    call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
   end if
 
   call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
 
   return
 
-end subroutine psb_zabgdxyz_vect
+end subroutine psb_z_upd_xyz_vect
 
diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90
index e230a1e0..e3f1d9a3 100644
--- a/base/serial/psi_c_serial_impl.F90
+++ b/base/serial/psi_c_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine caxpbyv2
 
-subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='cabgdxyz'
+  name='c_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_cabgdxyz
+end subroutine psi_c_upd_xyz
 
 subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='cabgdxyz'
+  name='c_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90
index bf1b2917..d6a9a31d 100644
--- a/base/serial/psi_d_serial_impl.F90
+++ b/base/serial/psi_d_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine daxpbyv2
 
-subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='dabgdxyz'
+  name='d_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_dabgdxyz
+end subroutine psi_d_upd_xyz
 
 subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='dabgdxyz'
+  name='d_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90
index 911ab4ec..37b11a94 100644
--- a/base/serial/psi_e_serial_impl.F90
+++ b/base/serial/psi_e_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine eaxpbyv2
 
-subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='eabgdxyz'
+  name='e_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_eabgdxyz
+end subroutine psi_e_upd_xyz
 
 subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='eabgdxyz'
+  name='e_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90
index fb42dfcd..c20cd60b 100644
--- a/base/serial/psi_i2_serial_impl.F90
+++ b/base/serial/psi_i2_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine i2axpbyv2
 
-subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='i2abgdxyz'
+  name='i2_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_i2abgdxyz
+end subroutine psi_i2_upd_xyz
 
 subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='i2abgdxyz'
+  name='i2_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90
index 346fd897..55913a16 100644
--- a/base/serial/psi_m_serial_impl.F90
+++ b/base/serial/psi_m_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine maxpbyv2
 
-subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='mabgdxyz'
+  name='m_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_mabgdxyz
+end subroutine psi_m_upd_xyz
 
 subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='mabgdxyz'
+  name='m_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90
index 52f86bcd..c3846c8e 100644
--- a/base/serial/psi_s_serial_impl.F90
+++ b/base/serial/psi_s_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine saxpbyv2
 
-subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='sabgdxyz'
+  name='s_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_sabgdxyz
+end subroutine psi_s_upd_xyz
 
 subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='sabgdxyz'
+  name='s_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90
index 7e680273..763eae22 100644
--- a/base/serial/psi_z_serial_impl.F90
+++ b/base/serial/psi_z_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine  zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
 
 end subroutine zaxpbyv2
 
-subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
   use psb_const_mod
   use psb_error_mod
   implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='zabgdxyz'
+  name='z_upd_xyz'
 
   info = psb_success_
   if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
   call fcpsb_serror()
   return
 
-end subroutine psi_zabgdxyz
+end subroutine psi_z_upd_xyz
 
 subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
   integer(psb_ipk_) :: i
   integer(psb_ipk_) :: int_err(5)
   character  name*20
-  name='zabgdxyz'
+  name='z_xyzw'
 
   info = psb_success_
   if (m.lt.0) then
diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c
index cdfda481..65d41893 100644
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@@ -255,7 +255,7 @@ int axpbyMultiVecDeviceFloatComplex(int n,cuFloatComplex alpha, void* devMultiVe
   return(i);
 }
 
-int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex  alpha,cuFloatComplex  beta,
+int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex  alpha,cuFloatComplex  beta,
 				      cuFloatComplex  gamma, cuFloatComplex  delta, 
 				      void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
 { int j=0, i=0;
@@ -268,7 +268,7 @@ int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex  alpha,cuFloatComplex
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
 
-  spgpuCabgdxyz(handle,n, alpha,beta,gamma,delta, 
+  spgpuCupd_xyz(handle,n, alpha,beta,gamma,delta, 
 		(cuFloatComplex *)devVecX->v_,(cuFloatComplex *) devVecY->v_,(cuFloatComplex *) devVecZ->v_);
   return(i);
 }
diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h
index 62693e27..8c40b95d 100644
--- a/cuda/cvectordev.h
+++ b/cuda/cvectordev.h
@@ -69,7 +69,7 @@ int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
 int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
 
 int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
-int abgdxyzMultiVecDeviceFloatComplex(int n,cuFloatComplex  alpha,cuFloatComplex  beta,
+int upd_xyzMultiVecDeviceFloatComplex(int n,cuFloatComplex  alpha,cuFloatComplex  beta,
 				      cuFloatComplex  gamma, cuFloatComplex  delta, 
 				      void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
 int xyzwMultiVecDeviceFloatComplex(int n,cuFloatComplex  a,cuFloatComplex  b,
diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c
index 723f48d8..a69e1b71 100644
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@@ -241,7 +241,7 @@ int axpbyMultiVecDeviceDouble(int n,double alpha, void* devMultiVecX,
   return(i);
 }
  
-int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, 
+int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, 
 				void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
 { int j=0, i=0;
   int pitch = 0;
@@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, do
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
 
-  spgpuDabgdxyz(handle,n, alpha,beta,gamma,delta, 
+  spgpuDupd_xyz(handle,n, alpha,beta,gamma,delta, 
 	      (double*)devVecX->v_,(double*) devVecY->v_,(double*) devVecZ->v_);
   return(i);
 }
diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h
index c2bfa1b5..3834c0d3 100644
--- a/cuda/dvectordev.h
+++ b/cuda/dvectordev.h
@@ -67,7 +67,7 @@ int asumMultiVecDeviceDouble(double* y_res, int n, void* devVecA);
 int dotMultiVecDeviceDouble(double* y_res, int n, void* devVecA, void* devVecB);
 
 int axpbyMultiVecDeviceDouble(int n, double alpha, void* devVecX, double beta, void* devVecY);
-int abgdxyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, 
+int upd_xyzMultiVecDeviceDouble(int n,double alpha,double beta, double gamma, double delta, 
 				void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
 int xyzwMultiVecDeviceDouble(int n,double a, double b, double c, double d, double e, double f, 
 			     void* devMultiVecX, void* devMultiVecY,
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 2c2a4f61..45fafe0a 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod
     procedure, pass(x) :: dot_a    => c_cuda_dot_a
     procedure, pass(y) :: axpby_v  => c_cuda_axpby_v
     procedure, pass(y) :: axpby_a  => c_cuda_axpby_a
-    procedure, pass(z) :: abgdxyz  => c_cuda_abgdxyz
+    procedure, pass(z) :: upd_xyz  => c_cuda_upd_xyz
     procedure, pass(y) :: mlt_v    => c_cuda_mlt_v
     procedure, pass(y) :: mlt_a    => c_cuda_mlt_a
     procedure, pass(z) :: mlt_a_2  => c_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
 
   end subroutine c_cuda_axpby_v
 
-  subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -946,7 +946,7 @@ contains
             if ((nx<m).or.(ny<m).or.(nz<m)) then
               info = psb_err_internal_error_
             else
-              info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
+              info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
                    & xx%deviceVect,yy%deviceVect,zz%deviceVect)
             end if
             call yy%set_dev()
@@ -972,7 +972,7 @@ contains
       call z%axpby(m,gamma,y,delta,info)
     end if
 
-  end subroutine c_cuda_abgdxyz
+  end subroutine c_cuda_upd_xyz
 
   subroutine c_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
     use psi_serial_mod
diff --git a/cuda/psb_c_vectordev_mod.F90 b/cuda/psb_c_vectordev_mod.F90
index a9dd1ee0..56531c4d 100644
--- a/cuda/psb_c_vectordev_mod.F90
+++ b/cuda/psb_c_vectordev_mod.F90
@@ -313,16 +313,16 @@ module psb_c_vectordev_mod
     end function axpbyMultiVecDeviceFloatComplex
   end interface
 
-  interface abgdxyzMultiVecDevice
-    function abgdxyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
+  interface upd_xyzMultiVecDevice
+    function upd_xyzMultiVecDeviceFloatComplex(n,alpha,beta,gamma,delta,deviceVecX,&
          & deviceVecY,deviceVecZ) &
-         & result(res) bind(c,name='abgdxyzMultiVecDeviceFloatComplex')
+         & result(res) bind(c,name='upd_xyzMultiVecDeviceFloatComplex')
       use iso_c_binding
       integer(c_int)      :: res
       integer(c_int), value :: n
       complex(c_float_complex), value :: alpha, beta,gamma,delta
       type(c_ptr), value  :: deviceVecX, deviceVecY, deviceVecZ
-    end function abgdxyzMultiVecDeviceFloatComplex
+    end function upd_xyzMultiVecDeviceFloatComplex
   end interface
 
   interface xyzwMultiVecDevice
diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90
index dbc68111..e7e563ff 100644
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_d_cuda_vect_mod
     procedure, pass(x) :: dot_a    => d_cuda_dot_a
     procedure, pass(y) :: axpby_v  => d_cuda_axpby_v
     procedure, pass(y) :: axpby_a  => d_cuda_axpby_a
-    procedure, pass(z) :: abgdxyz  => d_cuda_abgdxyz
+    procedure, pass(z) :: upd_xyz  => d_cuda_upd_xyz
     procedure, pass(y) :: mlt_v    => d_cuda_mlt_v
     procedure, pass(y) :: mlt_a    => d_cuda_mlt_a
     procedure, pass(z) :: mlt_a_2  => d_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
 
   end subroutine d_cuda_axpby_v
 
-  subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -946,7 +946,7 @@ contains
             if ((nx<m).or.(ny<m).or.(nz<m)) then
               info = psb_err_internal_error_
             else
-              info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
+              info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
                    & xx%deviceVect,yy%deviceVect,zz%deviceVect)
             end if
             call yy%set_dev()
@@ -972,7 +972,7 @@ contains
       call z%axpby(m,gamma,y,delta,info)
     end if
 
-  end subroutine d_cuda_abgdxyz
+  end subroutine d_cuda_upd_xyz
 
   subroutine d_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
     use psi_serial_mod
diff --git a/cuda/psb_d_vectordev_mod.F90 b/cuda/psb_d_vectordev_mod.F90
index b6c851dd..f5ec640e 100644
--- a/cuda/psb_d_vectordev_mod.F90
+++ b/cuda/psb_d_vectordev_mod.F90
@@ -313,16 +313,16 @@ module psb_d_vectordev_mod
     end function axpbyMultiVecDeviceDouble
   end interface
 
-  interface abgdxyzMultiVecDevice
-    function abgdxyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
+  interface upd_xyzMultiVecDevice
+    function upd_xyzMultiVecDeviceDouble(n,alpha,beta,gamma,delta,deviceVecX,&
          & deviceVecY,deviceVecZ) &
-         & result(res) bind(c,name='abgdxyzMultiVecDeviceDouble')
+         & result(res) bind(c,name='upd_xyzMultiVecDeviceDouble')
       use iso_c_binding
       integer(c_int)      :: res
       integer(c_int), value :: n
       real(c_double), value :: alpha, beta,gamma,delta
       type(c_ptr), value  :: deviceVecX, deviceVecY, deviceVecZ
-    end function abgdxyzMultiVecDeviceDouble
+    end function upd_xyzMultiVecDeviceDouble
   end interface
 
   interface xyzwMultiVecDevice
diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90
index 2fab51ec..a2c69934 100644
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_s_cuda_vect_mod
     procedure, pass(x) :: dot_a    => s_cuda_dot_a
     procedure, pass(y) :: axpby_v  => s_cuda_axpby_v
     procedure, pass(y) :: axpby_a  => s_cuda_axpby_a
-    procedure, pass(z) :: abgdxyz  => s_cuda_abgdxyz
+    procedure, pass(z) :: upd_xyz  => s_cuda_upd_xyz
     procedure, pass(y) :: mlt_v    => s_cuda_mlt_v
     procedure, pass(y) :: mlt_a    => s_cuda_mlt_a
     procedure, pass(z) :: mlt_a_2  => s_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
 
   end subroutine s_cuda_axpby_v
 
-  subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -946,7 +946,7 @@ contains
             if ((nx<m).or.(ny<m).or.(nz<m)) then
               info = psb_err_internal_error_
             else
-              info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
+              info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
                    & xx%deviceVect,yy%deviceVect,zz%deviceVect)
             end if
             call yy%set_dev()
@@ -972,7 +972,7 @@ contains
       call z%axpby(m,gamma,y,delta,info)
     end if
 
-  end subroutine s_cuda_abgdxyz
+  end subroutine s_cuda_upd_xyz
 
   subroutine s_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
     use psi_serial_mod
diff --git a/cuda/psb_s_vectordev_mod.F90 b/cuda/psb_s_vectordev_mod.F90
index a43fe0b6..38052ca5 100644
--- a/cuda/psb_s_vectordev_mod.F90
+++ b/cuda/psb_s_vectordev_mod.F90
@@ -313,16 +313,16 @@ module psb_s_vectordev_mod
     end function axpbyMultiVecDeviceFloat
   end interface
 
-  interface abgdxyzMultiVecDevice
-    function abgdxyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
+  interface upd_xyzMultiVecDevice
+    function upd_xyzMultiVecDeviceFloat(n,alpha,beta,gamma,delta,deviceVecX,&
          & deviceVecY,deviceVecZ) &
-         & result(res) bind(c,name='abgdxyzMultiVecDeviceFloat')
+         & result(res) bind(c,name='upd_xyzMultiVecDeviceFloat')
       use iso_c_binding
       integer(c_int)      :: res
       integer(c_int), value :: n
       real(c_float), value :: alpha, beta,gamma,delta
       type(c_ptr), value  :: deviceVecX, deviceVecY, deviceVecZ
-    end function abgdxyzMultiVecDeviceFloat
+    end function upd_xyzMultiVecDeviceFloat
   end interface
 
   interface xyzwMultiVecDevice
diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90
index 769f9ea9..dfeafa6e 100644
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_z_cuda_vect_mod
     procedure, pass(x) :: dot_a    => z_cuda_dot_a
     procedure, pass(y) :: axpby_v  => z_cuda_axpby_v
     procedure, pass(y) :: axpby_a  => z_cuda_axpby_a
-    procedure, pass(z) :: abgdxyz  => z_cuda_abgdxyz
+    procedure, pass(z) :: upd_xyz  => z_cuda_upd_xyz
     procedure, pass(y) :: mlt_v    => z_cuda_mlt_v
     procedure, pass(y) :: mlt_a    => z_cuda_mlt_a
     procedure, pass(z) :: mlt_a_2  => z_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
 
   end subroutine z_cuda_axpby_v
 
-  subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+  subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
     use psi_serial_mod
     implicit none
     integer(psb_ipk_), intent(in)               :: m
@@ -946,7 +946,7 @@ contains
             if ((nx<m).or.(ny<m).or.(nz<m)) then
               info = psb_err_internal_error_
             else
-              info = abgdxyzMultiVecDevice(m,alpha,beta,gamma,delta,&
+              info = upd_xyzMultiVecDevice(m,alpha,beta,gamma,delta,&
                    & xx%deviceVect,yy%deviceVect,zz%deviceVect)
             end if
             call yy%set_dev()
@@ -972,7 +972,7 @@ contains
       call z%axpby(m,gamma,y,delta,info)
     end if
 
-  end subroutine z_cuda_abgdxyz
+  end subroutine z_cuda_upd_xyz
 
   subroutine z_cuda_xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
     use psi_serial_mod
diff --git a/cuda/psb_z_vectordev_mod.F90 b/cuda/psb_z_vectordev_mod.F90
index 320f362a..b893fbdc 100644
--- a/cuda/psb_z_vectordev_mod.F90
+++ b/cuda/psb_z_vectordev_mod.F90
@@ -313,16 +313,16 @@ module psb_z_vectordev_mod
     end function axpbyMultiVecDeviceDoubleComplex
   end interface
 
-  interface abgdxyzMultiVecDevice
-    function abgdxyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
+  interface upd_xyzMultiVecDevice
+    function upd_xyzMultiVecDeviceDoubleComplex(n,alpha,beta,gamma,delta,deviceVecX,&
          & deviceVecY,deviceVecZ) &
-         & result(res) bind(c,name='abgdxyzMultiVecDeviceDoubleComplex')
+         & result(res) bind(c,name='upd_xyzMultiVecDeviceDoubleComplex')
       use iso_c_binding
       integer(c_int)      :: res
       integer(c_int), value :: n
       complex(c_double_complex), value :: alpha, beta,gamma,delta
       type(c_ptr), value  :: deviceVecX, deviceVecY, deviceVecZ
-    end function abgdxyzMultiVecDeviceDoubleComplex
+    end function upd_xyzMultiVecDeviceDoubleComplex
   end interface
 
   interface xyzwMultiVecDevice
diff --git a/cuda/spgpu/kernels/Makefile b/cuda/spgpu/kernels/Makefile
index c4ae38a0..748b66d0 100644
--- a/cuda/spgpu/kernels/Makefile
+++ b/cuda/spgpu/kernels/Makefile
@@ -11,15 +11,15 @@ LIBNAME=$(UP)/libspgpu.a
 CINCLUDES=-I$(INCDIR)
 
 OBJS=cabs.o  camax.o  casum.o  caxpby.o  caxy.o  cdot.o  cgath.o \
-  cnrm2.o cscal.o cscat.o csetscal.o cabgdxyz.o\
-  dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dabgdxyz.o\
+  cnrm2.o cscal.o cscat.o csetscal.o cupd_xyz.o\
+  dabs.o damax.o dasum.o daxpby.o daxy.o ddot.o dgath.o dupd_xyz.o\
   dia_cspmv.o dia_dspmv.o dia_sspmv.o dia_zspmv.o dnrm2.o \
   dscal.o dscat.o dsetscal.o ell_ccsput.o ell_cspmv.o \
   ell_dcsput.o ell_dspmv.o ell_scsput.o ell_sspmv.o ell_zcsput.o ell_zspmv.o \
   hdia_cspmv.o hdia_dspmv.o hdia_sspmv.o hdia_zspmv.o hell_cspmv.o hell_dspmv.o \
   hell_sspmv.o hell_zspmv.o igath.o iscat.o isetscal.o sabs.o samax.o sasum.o \
-  saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o sabgdxyz.o\
-  zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zabgdxyz.o \
+  saxpby.o saxy.o sdot.o sgath.o snrm2.o sscal.o sscat.o ssetscal.o zabs.o zamax.o supd_xyz.o\
+  zasum.o zaxpby.o zaxy.o zdot.o zgath.o znrm2.o zscal.o zscat.o zsetscal.o zupd_xyz.o \
   sxyzw.o cxyzw.o dxyzw.o zxyzw.o
 
 objs: $(OBJS)
diff --git a/cuda/spgpu/kernels/cabgdxyz.cu b/cuda/spgpu/kernels/cupd_xyz.cu
similarity index 88%
rename from cuda/spgpu/kernels/cabgdxyz.cu
rename to cuda/spgpu/kernels/cupd_xyz.cu
index a85b3873..34d637c6 100644
--- a/cuda/spgpu/kernels/cabgdxyz.cu
+++ b/cuda/spgpu/kernels/cupd_xyz.cu
@@ -31,7 +31,7 @@ extern "C"
 
 #define BLOCK_SIZE 512
 
-__global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex  alpha, cuFloatComplex  beta,
+__global__ void spgpuCupd_xyz_krn(int n, cuFloatComplex  alpha, cuFloatComplex  beta,
 				  cuFloatComplex  gamma, cuFloatComplex  delta,
 				  cuFloatComplex * x, cuFloatComplex  *y, cuFloatComplex  *z)
 {
@@ -55,7 +55,7 @@ __global__ void spgpuCabgdxyz_krn(int n, cuFloatComplex  alpha, cuFloatComplex
 }
 
 
-void spgpuCabgdxyz(spgpuHandle_t handle,
+void spgpuCupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   cuFloatComplex  alpha,
 		   cuFloatComplex  beta,
@@ -74,7 +74,7 @@ void spgpuCabgdxyz(spgpuHandle_t handle,
 	num_blocks     = num_blocks_mp*num_mp;
 	dim3 grid(num_blocks);
 
-	spgpuCabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
+	spgpuCupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
 								   x, y, z);
 }
 
diff --git a/cuda/spgpu/kernels/dabgdxyz.cu b/cuda/spgpu/kernels/dupd_xyz.cu
similarity index 88%
rename from cuda/spgpu/kernels/dabgdxyz.cu
rename to cuda/spgpu/kernels/dupd_xyz.cu
index f2b18e02..38957fe1 100644
--- a/cuda/spgpu/kernels/dabgdxyz.cu
+++ b/cuda/spgpu/kernels/dupd_xyz.cu
@@ -31,7 +31,7 @@ extern "C"
 
 #define BLOCK_SIZE 512
 
-__global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma, double delta,
+__global__ void spgpuDupd_xyz_krn(int n, double alpha, double beta, double gamma, double delta,
 				  double* x, double *y, double *z)
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@@ -54,7 +54,7 @@ __global__ void spgpuDabgdxyz_krn(int n, double alpha, double beta, double gamma
 }
 
 
-void spgpuDabgdxyz(spgpuHandle_t handle,
+void spgpuDupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   double alpha,
 		   double beta,
@@ -73,7 +73,7 @@ void spgpuDabgdxyz(spgpuHandle_t handle,
 	num_blocks     = num_blocks_mp*num_mp;
 	dim3 grid(num_blocks);
 
-	spgpuDabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
+	spgpuDupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
 								   x, y, z);
 }
 
diff --git a/cuda/spgpu/kernels/sabgdxyz.cu b/cuda/spgpu/kernels/supd_xyz.cu
similarity index 88%
rename from cuda/spgpu/kernels/sabgdxyz.cu
rename to cuda/spgpu/kernels/supd_xyz.cu
index 8c137ed3..d4ad1d0e 100644
--- a/cuda/spgpu/kernels/sabgdxyz.cu
+++ b/cuda/spgpu/kernels/supd_xyz.cu
@@ -31,7 +31,7 @@ extern "C"
 
 #define BLOCK_SIZE 512
 
-__global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, float delta,
+__global__ void spgpuSupd_xyz_krn(int n, float alpha, float beta, float gamma, float delta,
 				  float* x, float *y, float *z)
 {
 	int id = threadIdx.x + BLOCK_SIZE*blockIdx.x;
@@ -54,7 +54,7 @@ __global__ void spgpuSabgdxyz_krn(int n, float alpha, float beta, float gamma, f
 }
 
 
-void spgpuSabgdxyz(spgpuHandle_t handle,
+void spgpuSupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   float alpha,
 		   float beta,
@@ -73,7 +73,7 @@ void spgpuSabgdxyz(spgpuHandle_t handle,
 	num_blocks     = num_blocks_mp*num_mp;
 	dim3 grid(num_blocks);
 
-	spgpuSabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
+	spgpuSupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
 								   x, y, z);
 }
 
diff --git a/cuda/spgpu/kernels/zabgdxyz.cu b/cuda/spgpu/kernels/zupd_xyz.cu
similarity index 88%
rename from cuda/spgpu/kernels/zabgdxyz.cu
rename to cuda/spgpu/kernels/zupd_xyz.cu
index 48def937..a3936ee9 100644
--- a/cuda/spgpu/kernels/zabgdxyz.cu
+++ b/cuda/spgpu/kernels/zupd_xyz.cu
@@ -31,7 +31,7 @@ extern "C"
 
 #define BLOCK_SIZE 512
 
-__global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex  alpha, cuDoubleComplex  beta,
+__global__ void spgpuZupd_xyz_krn(int n, cuDoubleComplex  alpha, cuDoubleComplex  beta,
 				  cuDoubleComplex  gamma, cuDoubleComplex  delta,
 				  cuDoubleComplex * x, cuDoubleComplex  *y, cuDoubleComplex  *z)
 {
@@ -55,7 +55,7 @@ __global__ void spgpuZabgdxyz_krn(int n, cuDoubleComplex  alpha, cuDoubleComplex
 }
 
 
-void spgpuZabgdxyz(spgpuHandle_t handle,
+void spgpuZupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   cuDoubleComplex  alpha,
 		   cuDoubleComplex  beta,
@@ -74,7 +74,7 @@ void spgpuZabgdxyz(spgpuHandle_t handle,
 	num_blocks     = num_blocks_mp*num_mp;
 	dim3 grid(num_blocks);
 
-	spgpuZabgdxyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
+	spgpuZupd_xyz_krn<<<grid, block, 0, handle->currentStream>>>(n, alpha, beta, gamma, delta,
 								   x, y, z);
 }
 
diff --git a/cuda/spgpu/vector.h b/cuda/spgpu/vector.h
index d08e6edd..26c3443d 100644
--- a/cuda/spgpu/vector.h
+++ b/cuda/spgpu/vector.h
@@ -182,7 +182,7 @@ void spgpuSaxpby(spgpuHandle_t handle,
 	__device float* x);
 
 
-void spgpuSabgdxyz(spgpuHandle_t handle,
+void spgpuSupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   float alpha,
 		   float beta,
@@ -487,7 +487,7 @@ void spgpuDaxpby(spgpuHandle_t handle,
 	__device double* x);
 	
 
-void spgpuDabgdxyz(spgpuHandle_t handle,
+void spgpuDupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   double alpha,
 		   double beta,
@@ -789,7 +789,7 @@ void spgpuCaxpby(spgpuHandle_t handle,
 	__device cuFloatComplex* x);
 
 
-void spgpuCabgdxyz(spgpuHandle_t handle,
+void spgpuCupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   cuFloatComplex alpha,
 		   cuFloatComplex beta,
@@ -1092,7 +1092,7 @@ void spgpuZaxpby(spgpuHandle_t handle,
 	__device cuDoubleComplex* x);
 	
 
-void spgpuZabgdxyz(spgpuHandle_t handle,
+void spgpuZupd_xyz(spgpuHandle_t handle,
 		   int n,
 		   cuDoubleComplex alpha,
 		   cuDoubleComplex beta,
diff --git a/cuda/svectordev.c b/cuda/svectordev.c
index bf7545bb..cfaef5ce 100644
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@@ -241,7 +241,7 @@ int axpbyMultiVecDeviceFloat(int n,float alpha, void* devMultiVecX,
   return(i);
 }
 
-int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, 
+int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, 
 				void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
 { int j=0, i=0;
   int pitch = 0;
@@ -253,7 +253,7 @@ int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
 
-  spgpuSabgdxyz(handle,n, alpha,beta,gamma,delta, 
+  spgpuSupd_xyz(handle,n, alpha,beta,gamma,delta, 
 	      (float*)devVecX->v_,(float*) devVecY->v_,(float*) devVecZ->v_);
   return(i);
 }
diff --git a/cuda/svectordev.h b/cuda/svectordev.h
index 363c0108..d5c85f78 100644
--- a/cuda/svectordev.h
+++ b/cuda/svectordev.h
@@ -67,7 +67,7 @@ int asumMultiVecDeviceFloat(float* y_res, int n, void* devVecA);
 int dotMultiVecDeviceFloat(float* y_res, int n, void* devVecA, void* devVecB);
 
 int axpbyMultiVecDeviceFloat(int n, float alpha, void* devVecX, float beta, void* devVecY);
-int abgdxyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, 
+int upd_xyzMultiVecDeviceFloat(int n,float alpha,float beta, float gamma, float delta, 
 			       void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
 int xyzwMultiVecDeviceFloat(int n,float a,float b, float c, float d, float e, float f, 
 			    void* devMultiVecX, void* devMultiVecY,
diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c
index e9f0cec7..d7d88f1b 100644
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@@ -234,7 +234,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMulti
   return(i);
 }
 
-int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  alpha,
+int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  alpha,
 				       cuDoubleComplex  beta, cuDoubleComplex  gamma, cuDoubleComplex  delta, 
 				       void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ)
 { int j=0, i=0;
@@ -247,7 +247,7 @@ int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  alpha,
   if ((n > devVecY->size_) || (n>devVecX->size_ )) 
     return SPGPU_UNSUPPORTED;
 
-  spgpuZabgdxyz(handle,n, alpha,beta,gamma,delta, 
+  spgpuZupd_xyz(handle,n, alpha,beta,gamma,delta, 
 	      (cuDoubleComplex *)devVecX->v_,(cuDoubleComplex *) devVecY->v_,(cuDoubleComplex *) devVecZ->v_);
   return(i);
 }
diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h
index ae623bdb..e15802f0 100644
--- a/cuda/zvectordev.h
+++ b/cuda/zvectordev.h
@@ -77,7 +77,7 @@ int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
 
 int axpbyMultiVecDeviceDoubleComplex(int n, cuDoubleComplex alpha, void* devVecX, 
 				     cuDoubleComplex beta, void* devVecY);
-int abgdxyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  alpha,
+int upd_xyzMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  alpha,
 				       cuDoubleComplex  beta, cuDoubleComplex  gamma, cuDoubleComplex  delta, 
 				       void* devMultiVecX, void* devMultiVecY, void* devMultiVecZ);
 int xyzwMultiVecDeviceDoubleComplex(int n,cuDoubleComplex  a, cuDoubleComplex  b,
diff --git a/krylov/psb_dcg.F90 b/krylov/psb_dcg.F90
index caebb712..669573be 100644
--- a/krylov/psb_dcg.F90
+++ b/krylov/psb_dcg.F90
@@ -129,8 +129,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
   logical                     :: do_cond
   character(len=20)           :: name
   character(len=*), parameter :: methdname='CG'
-  logical, parameter  :: do_timings=.true.
-  integer(psb_ipk_), save  :: cg_vect=-1, cg_mv=-1, cg_prec=-1
 
   info = psb_success_
   name = 'psb_dcg'
@@ -151,12 +149,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
     call psb_errpush(info,name)
     goto 9999
   endif
-  if ((do_timings).and.(cg_vect==-1))       &
-       & cg_vect = psb_get_timer_idx("CG: vector ops ")
-  if ((do_timings).and.(cg_mv==-1))       &
-       & cg_mv = psb_get_timer_idx("CG: MV product")
-  if ((do_timings).and.(cg_prec==-1))       &
-       & cg_prec = psb_get_timer_idx("CG: preconditioner")
 
 
   mglob = desc_a%get_global_rows()
@@ -227,21 +219,17 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
 ! =   
 ! =    r0 = b-Ax0
 ! =   
-    if (do_timings) call psb_tic(cg_vect)
     if (itx>= itmax_) exit restart 
+
     it = 0
     call psb_geaxpby(done,b,dzero,r,desc_a,info)
-    if (do_timings) call psb_toc(cg_vect)
-    if (do_timings) call psb_tic(cg_mv)
     if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux)
     if (info /= psb_success_) then 
       info=psb_err_from_subroutine_non_
       call psb_errpush(info,name)
       goto 9999
     end if
-    if (do_timings) call psb_toc(cg_mv)
-    
-    if (do_timings) call psb_tic(cg_vect)
+
     rho = dzero
     
     call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
@@ -249,18 +237,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
       call psb_errpush(psb_err_from_subroutine_non_,name)
       goto 9999
     End If
-    if (do_timings) call psb_toc(cg_vect)
-        
+
     iteration:  do 
 
       it   = it + 1
       itx = itx + 1
-      if (do_timings) call psb_tic(cg_prec)
-    
-      call prec%apply(r,z,desc_a,info,work=aux)
-      if (do_timings) call psb_toc(cg_prec)
-      if (do_timings) call psb_tic(cg_vect)
 
+      call prec%apply(r,z,desc_a,info,work=aux)
       rho_old = rho
       rho     = psb_gedot(r,z,desc_a,info)
 
@@ -271,18 +254,13 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
           if (debug_level >= psb_debug_ext_)&
                & write(debug_unit,*) me,' ',trim(name),&
                & ': CG Iteration breakdown rho'
-          if (do_timings) call psb_toc(cg_vect)
           exit iteration
         endif
         beta = rho/rho_old
         call psb_geaxpby(done,z,beta,p,desc_a,info)
       end if
-      if (do_timings) call psb_toc(cg_vect)
-      if (do_timings) call psb_tic(cg_mv)
 
       call psb_spmm(done,a,p,dzero,q,desc_a,info,work=aux)
-      if (do_timings) call psb_toc(cg_mv)
-      if (do_timings) call psb_tic(cg_vect)
       sigma = psb_gedot(p,q,desc_a,info)
       if (sigma == dzero) then
           if (debug_level >= psb_debug_ext_)&
@@ -315,7 +293,6 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
 
     end do iteration
   end do restart
-  if (do_timings) call psb_toc(cg_vect)
   if (do_cond) then 
     if (me == psb_root_) then 
 #if defined(HAVE_LAPACK) 
diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile
index 5d938973..41cef197 100755
--- a/test/cudakern/Makefile
+++ b/test/cudakern/Makefile
@@ -16,48 +16,26 @@ LDLIBS=$(PSBGPULDLIBS)
 
 FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR)  $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
 
-ZTOBJS=z_file_spmv.o data_input.o
-CTOBJS=c_file_spmv.o data_input.o
-DTOBJS=d_file_spmv.o data_input.o
-STOBJS=s_file_spmv.o data_input.o
-DPGOBJS=dpdegenmv.o
-SPGOBJS=spdegenmv.o
+DPGOBJS=dpdegenmv.o data_input.o
+SPGOBJS=spdegenmv.o data_input.o
 EXEDIR=./runs
 
-all: dir pgen file
+all: dir pgen 
 pgen: dpdegenmv spdegenmv 
-file: s_file_spmv c_file_spmv d_file_spmv z_file_spmv
-dpdegenmv spdegenmv s_file_spmv c_file_spmv d_file_spmv z_file_spmv: dir
+dpdegenmv spdegenmv: dir 
 dir:
 	(if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi)
-
-dpdegenmv: $(DPGOBJS)
+dpdegenmv.o spdegenmv.o: data_input.o
+dpdegenmv: $(DPGOBJS) 
 	$(FLINK) $(LOPT) $(DPGOBJS) -fopenmp -o dpdegenmv $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) 
 	/bin/mv dpdegenmv $(EXEDIR) 
 spdegenmv: $(SPGOBJS)
 	$(FLINK) $(LOPT) $(SPGOBJS) -o spdegenmv $(PSBLAS_LIB) $(LDLIBS) 
 	/bin/mv spdegenmv $(EXEDIR) 
 
-z_file_spmv: $(ZTOBJS)
-	$(FLINK) $(LOPT) $(ZTOBJS) -o z_file_spmv $(PSBLAS_LIB) $(LDLIBS) 
-	/bin/mv z_file_spmv $(EXEDIR) 
-c_file_spmv: $(CTOBJS)
-	$(FLINK) $(LOPT) $(CTOBJS) -o c_file_spmv $(PSBLAS_LIB) $(LDLIBS) 
-	/bin/mv c_file_spmv $(EXEDIR) 
-d_file_spmv: $(DTOBJS)
-	$(FLINK) $(LOPT) $(DTOBJS) -o d_file_spmv $(PSBLAS_LIB) $(LDLIBS) 
-	/bin/mv d_file_spmv $(EXEDIR) 
-s_file_spmv: $(STOBJS)
-	$(FLINK) $(LOPT) $(STOBJS) -o s_file_spmv $(PSBLAS_LIB) $(LDLIBS) 
-	/bin/cp s_file_spmv $(EXEDIR) 
-
-d_file_spmv.o s_file_spmv.o z_file_spmv.o c_file_spmv.o: data_input.o
-
 clean: 
 	/bin/rm -f $(DTOBJS) $(STOBJS) $(DPGOBJS) $(SPGOBJS) $(ZTOBJS) $(CTOBJS)  \
-	$(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv \
-	$(EXEDIR)/d_file_spmv $(EXEDIR)/s_file_spmv \
-	$(EXEDIR)/z_file_spmv $(EXEDIR)/c_file_spmv 
+	$(EXEDIR)/dpdegenmv $(EXEDIR)/spdegenmv *mod
 
 lib:
 	(cd ../../; make library)
diff --git a/test/cudakern/c_file_spmv.F90 b/test/cudakern/c_file_spmv.F90
deleted file mode 100644
index 2f9840ec..00000000
--- a/test/cudakern/c_file_spmv.F90
+++ /dev/null
@@ -1,491 +0,0 @@
-!    
-!                Parallel Sparse BLAS  GPU plugin
-!      (C) Copyright 2013
-!                         Salvatore Filippone
-!                         Alessandro Fanfarillo
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-program c_file_spmv
-  use psb_base_mod
-  use psb_util_mod
-  use psb_ext_mod
-#ifdef HAVE_GPU
-  use psb_gpu_mod
-#endif
-  use data_input
-  implicit none
-
-  ! input parameters
-  character(len=200) :: mtrx_file
-
-  ! sparse matrices
-  type(psb_cspmat_type) :: a, aux_a, agpu
-
-  ! dense matrices
-  complex(psb_spk_), allocatable, target :: aux_b(:,:), d(:)
-  complex(psb_spk_), allocatable , save  :: x_col_glob(:), r_col_glob(:)
-  complex(psb_spk_), pointer  :: b_col_glob(:)
-  type(psb_c_vect_type)    :: b_col, x_col, r_col
-  type(psb_c_vect_type) :: xg, bg, xv, bv
-#ifdef HAVE_GPU
-  type(psb_c_vect_gpu)  :: vmold
-#endif
-  complex(psb_spk_), allocatable :: xc1(:),xc2(:)
-  ! communications data structure
-  type(psb_desc_type):: desc_a
-
-  type(psb_ctxt_type) :: ctxt
-  integer            :: iam, np
-  integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
-  real(psb_spk_)    :: damatsize, dgmatsize
-  complex(psb_spk_) :: err, eps
-
-  character(len=5)   :: acfmt, agfmt
-  character(len=20)  :: name
-  character(len=2)   :: filefmt
-  integer, parameter :: iunit=12
-  integer, parameter :: times=2000 
-  integer, parameter :: ntests=200, ngpu=50, ncnv=20 
-
-  type(psb_c_coo_sparse_mat), target   :: acoo
-  type(psb_c_csr_sparse_mat), target   :: acsr
-  type(psb_c_ell_sparse_mat), target   :: aell
-  type(psb_c_hll_sparse_mat), target   :: ahll
-#ifdef HAVE_GPU
-  type(psb_c_elg_sparse_mat), target   :: aelg
-  type(psb_c_csrg_sparse_mat), target  :: acsrg
-  type(psb_c_hybg_sparse_mat), target  :: ahybg
-  type(psb_c_hlg_sparse_mat), target   :: ahlg
-#endif
-  class(psb_c_base_sparse_mat), pointer :: acmold, agmold
-  ! other variables
-  integer            :: i,info,j,nrt, ns, nr, ipart, ig, nrg
-  integer            :: internal, m,ii,nnzero
-  real(psb_dpk_) :: t0,t1, t2, tprec, flops
-  real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
-       & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
-  integer :: nrhs, nrow, n_row, dim, nv, ne
-  integer, allocatable :: ivg(:), ipv(:)
-
-
-  call psb_init(ctxt)
-  call psb_info(ctxt,iam,np)
-#ifdef HAVE_GPU
-  call psb_gpu_init(ctxt)
-#endif
-  if (iam < 0) then 
-    ! This should not happen, but just in case
-    call psb_exit(ctxt)
-    stop
-  endif
-
-
-  name='file_spmv'
-  if(psb_get_errstatus() /= 0) goto 9999
-  info=psb_success_  
-  call psb_set_errverbosity(2)
-  if (iam == psb_root_) then 
-    write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
-    write(*,*) 'This is the ',trim(name),' sample program'
-  end if
-#ifdef HAVE_GPU
-  write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
-  write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())  
-#endif
-
-  if (iam == 0) then 
-    write(*,*) 'Matrix? '
-    call read_data(mtrx_file,psb_inp_unit)
-    write(*,*) 'file format'
-    call read_data(filefmt,psb_inp_unit)
-    write(*,*) 'CPU format'
-    call read_data(acfmt,psb_inp_unit)
-    write(*,*) 'GPU format'
-    call read_data(agfmt,psb_inp_unit)
-    write(*,*) 'distribution '
-    call read_data(ipart,psb_inp_unit)
-    write(*,*) 'Read all data, going on'
-  end if
-  call psb_bcast(ctxt,mtrx_file)
-  call psb_bcast(ctxt,filefmt)
-  call psb_bcast(ctxt,acfmt)
-  call psb_bcast(ctxt,agfmt)
-  call psb_bcast(ctxt,ipart)
-  call psb_barrier(ctxt)
-  t0 = psb_wtime()  
-  ! read the input matrix to be processed and (possibly) the rhs 
-  nrhs = 1
-
-  if (iam==psb_root_) then
-    select case(psb_toupper(filefmt)) 
-    case('MM') 
-      ! For Matrix Market we have an input file for the matrix
-      ! and an (optional) second file for the RHS. 
-      call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case ('HB')
-      ! For Harwell-Boeing we have a single file which may or may not
-      ! contain an RHS.
-      call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case default
-      info = -1 
-      write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
-    end select
-    if (info /= 0) then
-      write(psb_err_unit,*) 'Error while reading input matrix '
-      call psb_abort(ctxt)
-    end if
-
-    !
-    ! Always get nnz from original matrix.
-    ! Some formats add fill-in and do not keep track
-    ! of how many were added. So if the original matrix 
-    ! contained some extra zeros, the count of entries
-    ! is not recoverable exactly.
-    !    
-    nrt  = aux_a%get_nrows()
-    annz = aux_a%get_nzeros()
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-    write(psb_out_unit,'("Generating an rhs...")')
-    write(psb_out_unit,'(" ")')
-    call psb_realloc(nrt,1,aux_b,info)
-    if (info /= 0) then
-      call psb_errpush(4000,name)
-      goto 9999
-    endif
-
-    b_col_glob => aux_b(:,1)
-    do i=1, nrt
-      b_col_glob(i) = 1.d0
-    enddo
-
-  else
-
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-  end if
-
-
-  select case(psb_toupper(acfmt))
-  case('COO')
-    acmold => acoo
-  case('CSR')
-    acmold => acsr
-  case('ELL')
-    acmold => aell
-  case('HLL')
-    acmold => ahll
-  case default
-    write(*,*) 'Unknown format defaulting to CSR'
-    acmold => acsr
-  end select
-
-#ifdef HAVE_GPU
-  select case(psb_toupper(agfmt))
-  case('ELG')
-    agmold => aelg
-  case('HLG')
-    agmold => ahlg
-  case('CSRG')
-    agmold => acsrg
-  case('HYBG')
-    agmold => ahybg
-  case default
-    write(*,*) 'Unknown format defaulting to HLG'
-    agmold => ahlg
-  end select
-#endif
-
-
-  ! switch over different partition types
-  if (ipart == 0) then 
-    call psb_barrier(ctxt)
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
-    allocate(ivg(nrt),ipv(np))
-    do i=1,nrt
-      call part_block(i,nrt,np,ipv,nv)
-      ivg(i) = ipv(1)
-    enddo
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else if (ipart == 2) then 
-    if (iam==psb_root_) then 
-      write(psb_out_unit,'("Partition type: graph")')
-      write(psb_out_unit,'(" ")')
-      !      write(psb_err_unit,'("Build type: graph")')
-      call build_mtpart(aux_a,np)
-    endif
-    call psb_barrier(ctxt)
-    call distr_mtpart(psb_root_,ctxt)
-    call getv_mtpart(ivg)
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else 
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
-    call psb_matdist(aux_a, a,  ctxt,desc_a,info,parts=part_block)
-  end if
-
-  call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
-
-  t2 = psb_wtime() - t0
-
-  call psb_amx(ctxt, t2)
-
-  if (iam==psb_root_) then
-    write(psb_out_unit,'(" ")')
-    write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
-    write(psb_out_unit,'(" ")')
-  end if
-  call a%cscnv(aux_a,info,mold=acoo)
-  tcnvcsr = 0
-  tcnvgpu = 0
-  nr       = desc_a%get_local_rows()
-  nrg      = desc_a%get_global_rows() 
-  call psb_geall(x_col,desc_a,info)
-  do i=1, nr
-    call desc_a%l2g(i,ig,info)
-    call psb_geins(ione,(/ig/),(/(cone + (cone*ig)/nrg)/),x_col,desc_a,info)
-  end do
-  call psb_geasb(x_col,desc_a,info)
-  do j=1, ncnv
-    call aux_a%cscnv(a,info,mold=acoo)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call a%cscnv(info,mold=acmold)
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    tcnvcsr = tcnvcsr + t2
-    if (j==1) tcnvc1 = t2
-    xc1 = x_col%get_vect()
-    call xv%bld(xc1)
-    call psb_geasb(bv,desc_a,info,scratch=.true.)
-    
-#ifdef HAVE_GPU
-    
-    call aux_a%cscnv(agpu,info,mold=acoo)
-    call xg%bld(xc1,mold=vmold)
-    call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call agpu%cscnv(info,mold=agmold)
-    call psb_gpu_DeviceSync()
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    if (j==1) tcnvg1 = t2
-    tcnvgpu = tcnvgpu + t2
-#endif
-  end do
-
-  call psb_barrier(ctxt)
-  t1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(cone,a,xv,czero,bv,desc_a,info)
-  end do
-  call psb_barrier(ctxt)
-  t2 = psb_wtime() - t1
-  call psb_amx(ctxt,t2)
-
-#ifdef HAVE_GPU
-  ! FIXME: cache flush needed here
-  call psb_barrier(ctxt)
-  tt1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(cone,agpu,xv,czero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 1 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  tt2 = psb_wtime() - tt1
-  call psb_amx(ctxt,tt2)
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on xGPU',eps
-
-  call xg%sync()
-  ! FIXME: cache flush needed here
-
-  call psb_barrier(ctxt)
-  gt1 = psb_wtime()
-  do i=1,ntests*ngpu
-    call psb_spmm(cone,agpu,xg,czero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 2 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  ! For timing purposes we need to make sure all threads
-  ! in the device are done. 
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  gt2 = psb_wtime() - gt1
-  call psb_amx(ctxt,gt2)
-  call bg%sync()
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  call psb_geaxpby(-cone,bg,+cone,bv,desc_a,info)
-  eps = psb_geamax(bv,desc_a,info)
-
-  call psb_amx(ctxt,t2)
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on GPU',eps
-#endif
-
-
-  amatsize = a%sizeof()
-  agmatsize = agpu%sizeof()
-  damatsize = amatsize
-  damatsize = damatsize/(1024*1024)
-  dgmatsize = agmatsize
-  dgmatsize = dgmatsize/(1024*1024)
-  descsize = psb_sizeof(desc_a)
-  call psb_sum(ctxt,damatsize)
-  call psb_sum(ctxt,dgmatsize)
-  call psb_sum(ctxt,descsize)
-
-  if (iam == psb_root_) then
-    write(psb_out_unit,'("Matrix: ",a)') mtrx_file
-    write(psb_out_unit,&
-         &'("Test on                          : ",i20," processors")') np
-    write(psb_out_unit,&
-         &'("Size of matrix                   : ",i20,"           ")') nrt
-    write(psb_out_unit,&
-         &'("Number of nonzeros               : ",i20,"           ")') annz
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (MBytes)  : ",f20.2,"           ")') damatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (MBytes)  : ",f20.2,"           ")') dgmatsize
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (Bytes)   : ",i24,"           ")') amatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (Bytes)   : ",i24,"           ")') agmatsize
-    flops  = ntests*(2.d0*annz)
-    tflops = flops
-    gflops = flops * ngpu
-    write(psb_out_unit,'("Storage type for    A: ",a)') a%get_fmt()
-#ifdef HAVE_GPU
-    write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
-    write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
-         & tcnvc1
-    write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
-         & tcnvcsr
-    write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
-         & tcnvcsr/ncnv
-    write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
-         & tcnvg1
-    write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
-         & tcnvgpu
-    write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
-         & tcnvgpu/ncnv
-
-#endif
-    write(psb_out_unit,&
-         & '("Number of flops (",i0," prod)        : ",F20.0,"           ")') &
-         &  ntests,flops
-
-    flops  = flops / (t2)
-    tflops = tflops / (tt2)
-    gflops = gflops / (gt2)
-    write(psb_out_unit,'("Time for ",i6," products (s) (CPU)   : ",F20.3)')&
-         &  ntests,t2
-    write(psb_out_unit,'("Time per product    (ms)     (CPU)   : ",F20.3)')&
-         & t2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (CPU)   : ",F20.3)')&
-         & flops/1.d6
-#ifdef HAVE_GPU
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (xGPU)  : ",F20.3)')&
-         & ntests, tt2
-    write(psb_out_unit,'("Time per product    (ms)     (xGPU)  : ",F20.3)')&
-         & tt2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (xGPU)  : ",F20.3)')&
-         & tflops/1.d6
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (GPU)   : ",F20.3)')&
-         & ngpu*ntests,gt2
-    write(psb_out_unit,'("Time per product    (ms)     (GPU)   : ",F20.3)')&
-         & gt2*1.d3/(1.d0*ntests*ngpu)
-    write(psb_out_unit,'("MFLOPS                       (GPU)   : ",F20.3)')&
-         & gflops/1.d6
-#endif
-    !
-    ! This computation assumes the data movement associated with CSR:
-    ! it is minimal in terms of coefficients. Other formats may either move
-    ! more data (padding etc.) or less data (if they can save on the indices). 
-    !
-    nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+&
-         & annz*(2*psb_sizeof_dp + psb_sizeof_ip)
-    bdwdth = ntests*nbytes/(t2*1.d6)
-    write(psb_out_unit,*)
-    write(psb_out_unit,'("MBYTES/S                  (CPU)  : ",F20.3)') bdwdth
-#ifdef HAVE_GPU
-    bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
-    write(psb_out_unit,'("MBYTES/S                  (GPU)  : ",F20.3)') bdwdth
-#endif
-    write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
-    write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
-
-  end if
-
-  call psb_gefree(b_col, desc_a,info)
-  call psb_gefree(x_col, desc_a,info)
-  call psb_gefree(xv, desc_a,info)
-  call psb_gefree(bv, desc_a,info)
-  call psb_spfree(a, desc_a,info)
-#ifdef HAVE_GPU
-  call psb_gefree(xg, desc_a,info)
-  call psb_gefree(bg, desc_a,info)
-  call psb_spfree(agpu,desc_a,info)
-  call psb_gpu_exit()
-#endif
-  call psb_cdfree(desc_a,info)
-
-  call psb_exit(ctxt)
-  stop
-
-9999 continue
-  call psb_error(ctxt)
-
-end program c_file_spmv
-  
-
-
-
-
diff --git a/test/cudakern/d_file_spmv.F90 b/test/cudakern/d_file_spmv.F90
deleted file mode 100644
index 2bbc0bc4..00000000
--- a/test/cudakern/d_file_spmv.F90
+++ /dev/null
@@ -1,496 +0,0 @@
-!    
-!                Parallel Sparse BLAS  GPU plugin
-!      (C) Copyright 2013
-!                         Salvatore Filippone
-!                         Alessandro Fanfarillo
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-program d_file_spmv
-  use psb_base_mod
-  use psb_util_mod
-  use psb_ext_mod
-#ifdef HAVE_GPU
-  use psb_gpu_mod
-#endif
-  use data_input
-  implicit none
-
-  ! input parameters
-  character(len=200) :: mtrx_file
-
-  ! sparse matrices
-  type(psb_dspmat_type) :: a, aux_a, agpu
-
-  ! dense matrices
-  real(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
-  real(psb_dpk_), allocatable , save  :: x_col_glob(:), r_col_glob(:)
-  real(psb_dpk_), pointer  :: b_col_glob(:)
-  type(psb_d_vect_type)    :: b_col, x_col, r_col
-  type(psb_d_vect_type) :: xg, bg, xv, bv
-#ifdef HAVE_GPU
-  type(psb_d_vect_gpu)  :: vmold
-#endif
-  real(psb_dpk_), allocatable :: xc1(:),xc2(:)
-  ! communications data structure
-  type(psb_desc_type):: desc_a
-
-  type(psb_ctxt_type) :: ctxt
-  integer(psb_ipk_) :: iam, np
-  integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
-  real(psb_dpk_)    :: err, eps, damatsize, dgmatsize
-
-  character(len=5)   :: acfmt, agfmt
-  character(len=20)  :: name
-  character(len=2)   :: filefmt
-  integer, parameter :: iunit=12
-  integer, parameter :: times=2000 
-  integer, parameter :: ntests=200, ngpu=50, ncnv=20 
-
-  type(psb_d_coo_sparse_mat), target   :: acoo
-  type(psb_d_csr_sparse_mat), target   :: acsr
-  type(psb_d_ell_sparse_mat), target   :: aell
-  type(psb_d_hll_sparse_mat), target   :: ahll
-  type(psb_d_hdia_sparse_mat), target   :: ahdia
-#ifdef HAVE_GPU
-  type(psb_d_elg_sparse_mat), target   :: aelg
-  type(psb_d_csrg_sparse_mat), target  :: acsrg
-  type(psb_d_hybg_sparse_mat), target  :: ahybg
-  type(psb_d_hlg_sparse_mat), target   :: ahlg
-  type(psb_d_hdiag_sparse_mat), target  :: ahdiag
-#endif
-  class(psb_d_base_sparse_mat), pointer :: acmold, agmold
-  ! other variables
-  integer(psb_lpk_)  :: i,j,nrt, ns, nr, ig, nrg
-  integer(psb_ipk_)  :: internal, m,ii,nnzero,info, ipart
-  real(psb_dpk_) :: t0,t1, t2, tprec, flops
-  real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
-       & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
-  integer :: nrhs, nrow, n_row, dim, nv, ne
-  integer, allocatable :: ivg(:), ipv(:)
-
-
-  call psb_init(ctxt)
-  call psb_info(ctxt,iam,np)
-#ifdef HAVE_GPU
-  call psb_gpu_init(ctxt)
-#endif
-  if (iam < 0) then 
-    ! This should not happen, but just in case
-    call psb_exit(ctxt)
-    stop
-  endif
-
-
-  name='file_spmv'
-  if(psb_get_errstatus() /= 0) goto 9999
-  info=psb_success_  
-  call psb_set_errverbosity(2)
-  if (iam == psb_root_) then 
-    write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
-    write(*,*) 'This is the ',trim(name),' sample program'
-  end if
-#ifdef HAVE_GPU
-  write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
-  write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())  
-#endif
-
-  if (iam == 0) then 
-    write(*,*) 'Matrix? '
-    call read_data(mtrx_file,psb_inp_unit)
-    write(*,*) 'file format'
-    call read_data(filefmt,psb_inp_unit)
-    write(*,*) 'CPU format'
-    call read_data(acfmt,psb_inp_unit)
-    write(*,*) 'GPU format'
-    call read_data(agfmt,psb_inp_unit)
-    write(*,*) 'distribution '
-    call read_data(ipart,psb_inp_unit)
-    write(*,*) 'Read all data, going on'
-  end if
-  call psb_bcast(ctxt,mtrx_file)
-  call psb_bcast(ctxt,filefmt)
-  call psb_bcast(ctxt,acfmt)
-  call psb_bcast(ctxt,agfmt)
-  call psb_bcast(ctxt,ipart)
-  call psb_barrier(ctxt)
-  t0 = psb_wtime()  
-  ! read the input matrix to be processed and (possibly) the rhs 
-  nrhs = 1
-
-  if (iam==psb_root_) then
-    select case(psb_toupper(filefmt)) 
-    case('MM') 
-      ! For Matrix Market we have an input file for the matrix
-      ! and an (optional) second file for the RHS. 
-      call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case ('HB')
-      ! For Harwell-Boeing we have a single file which may or may not
-      ! contain an RHS.
-      call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case default
-      info = -1 
-      write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
-    end select
-    if (info /= 0) then
-      write(psb_err_unit,*) 'Error while reading input matrix '
-      call psb_abort(ctxt)
-    end if
-
-    !
-    ! Always get nnz from original matrix.
-    ! Some formats add fill-in and do not keep track
-    ! of how many were added. So if the original matrix 
-    ! contained some extra zeros, the count of entries
-    ! is not recoverable exactly.
-    !    
-    nrt  = aux_a%get_nrows()
-    annz = aux_a%get_nzeros()
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-    write(psb_out_unit,'("Generating an rhs...")')
-    write(psb_out_unit,'(" ")')
-    call psb_realloc(nrt,1,aux_b,info)
-    if (info /= 0) then
-      call psb_errpush(4000,name)
-      goto 9999
-    endif
-
-    b_col_glob => aux_b(:,1)
-    do i=1, nrt
-      b_col_glob(i) = 1.d0
-    enddo
-
-  else
-
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-  end if
-
-
-  select case(psb_toupper(acfmt))
-  case('COO')
-    acmold => acoo
-  case('CSR')
-    acmold => acsr
-  case('ELL')
-    acmold => aell
-  case('HLL')
-    acmold => ahll
-  case('HDIA')
-    acmold => ahdia
-  case default
-    write(*,*) 'Unknown format defaulting to CSR'
-    acmold => acsr
-  end select
-
-#ifdef HAVE_GPU
-  select case(psb_toupper(agfmt))
-  case('ELG')
-    agmold => aelg
-  case('HLG')
-    agmold => ahlg
-  case('CSRG')
-    agmold => acsrg
-  case('HYBG')
-    agmold => ahybg
-  case('HDIAG')
-    agmold => ahdiag
-  case default
-    write(*,*) 'Unknown format defaulting to HLG'
-    agmold => ahlg
-  end select
-#endif
-
-
-  ! switch over different partition types
-  if (ipart == 0) then 
-    call psb_barrier(ctxt)
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
-    allocate(ivg(nrt),ipv(np))
-    do i=1,nrt
-      call part_block(i,nrt,np,ipv,nv)
-      ivg(i) = ipv(1)
-    enddo
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg)
-  else if (ipart == 2) then 
-    if (iam==psb_root_) then 
-      write(psb_out_unit,'("Partition type: graph")')
-      write(psb_out_unit,'(" ")')
-      !      write(psb_err_unit,'("Build type: graph")')
-      call build_mtpart(aux_a,np)
-    endif
-    call psb_barrier(ctxt)
-    call distr_mtpart(psb_root_,ctxt)
-    call getv_mtpart(ivg)
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,vg=ivg)
-  else 
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
-    call psb_matdist(aux_a, a,  ctxt,desc_a,info,parts=part_block)
-  end if
-
-  call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
-
-  t2 = psb_wtime() - t0
-
-  call psb_amx(ctxt, t2)
-
-  if (iam==psb_root_) then
-    write(psb_out_unit,'(" ")')
-    write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
-    write(psb_out_unit,'(" ")')
-  end if
-  call a%cscnv(aux_a,info,mold=acoo)
-  tcnvcsr = 0
-  tcnvgpu = 0
-  nr       = desc_a%get_local_rows()
-  nrg      = desc_a%get_global_rows() 
-  call psb_geall(x_col,desc_a,info)
-  do i=1, nr
-    call desc_a%l2g(i,ig,info)
-    call psb_geins(ione,(/ig/),(/(done + (done*ig)/nrg)/),x_col,desc_a,info)
-  end do
-  call psb_geasb(x_col,desc_a,info)
-  do j=1, ncnv
-    call aux_a%cscnv(a,info,mold=acoo)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call a%cscnv(info,mold=acmold)
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    tcnvcsr = tcnvcsr + t2
-    if (j==1) tcnvc1 = t2
-    xc1 = x_col%get_vect()
-    call xv%bld(xc1)
-    call psb_geasb(bv,desc_a,info,scratch=.true.)
-    
-#ifdef HAVE_GPU
-    
-    call aux_a%cscnv(agpu,info,mold=acoo)
-    call xg%bld(xc1,mold=vmold)
-    call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call agpu%cscnv(info,mold=agmold)
-    call psb_gpu_DeviceSync()
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    if (j==1) tcnvg1 = t2
-    tcnvgpu = tcnvgpu + t2
-#endif
-  end do
-
-  call psb_barrier(ctxt)
-  t1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(done,a,xv,dzero,bv,desc_a,info)
-  end do
-  call psb_barrier(ctxt)
-  t2 = psb_wtime() - t1
-  call psb_amx(ctxt,t2)
-
-#ifdef HAVE_GPU
-  ! FIXME: cache flush needed here
-  call psb_barrier(ctxt)
-  tt1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(done,agpu,xv,dzero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 1 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  tt2 = psb_wtime() - tt1
-  call psb_amx(ctxt,tt2)
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on xGPU',eps
-
-  call xg%sync()
-  ! FIXME: cache flush needed here
-
-  call psb_barrier(ctxt)
-  gt1 = psb_wtime()
-  do i=1,ntests*ngpu
-    call psb_spmm(done,agpu,xg,dzero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 2 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  ! For timing purposes we need to make sure all threads
-  ! in the device are done. 
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  gt2 = psb_wtime() - gt1
-  call psb_amx(ctxt,gt2)
-  call bg%sync()
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  call psb_geaxpby(-done,bg,+done,bv,desc_a,info)
-  eps = psb_geamax(bv,desc_a,info)
-
-  call psb_amx(ctxt,t2)
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on GPU',eps
-#endif
-
-
-  amatsize = a%sizeof()
-  agmatsize = agpu%sizeof()
-  damatsize = amatsize
-  damatsize = damatsize/(1024*1024)
-  dgmatsize = agmatsize
-  dgmatsize = dgmatsize/(1024*1024)
-  descsize = psb_sizeof(desc_a)
-  call psb_sum(ctxt,damatsize)
-  call psb_sum(ctxt,dgmatsize)
-  call psb_sum(ctxt,descsize)
-
-  if (iam == psb_root_) then
-    write(psb_out_unit,'("Matrix: ",a)') mtrx_file
-    write(psb_out_unit,&
-         &'("Test on                          : ",i20," processors")') np
-    write(psb_out_unit,&
-         &'("Size of matrix                   : ",i20,"           ")') nrt
-    write(psb_out_unit,&
-         &'("Number of nonzeros               : ",i20,"           ")') annz
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (MBytes)  : ",f20.2,"           ")') damatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (MBytes)  : ",f20.2,"           ")') dgmatsize
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (Bytes)   : ",i24,"           ")') amatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (Bytes)   : ",i24,"           ")') agmatsize
-    flops  = ntests*(2.d0*annz)
-    tflops = flops
-    gflops = flops * ngpu
-    write(psb_out_unit,'("Storage type for    A: ",a)') a%get_fmt()
-#ifdef HAVE_GPU
-    write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
-    write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
-         & tcnvc1
-    write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
-         & tcnvcsr
-    write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
-         & tcnvcsr/ncnv
-    write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
-         & tcnvg1
-    write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
-         & tcnvgpu
-    write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
-         & tcnvgpu/ncnv
-
-#endif
-    write(psb_out_unit,&
-         & '("Number of flops (",i0," prod)        : ",F20.0,"           ")') &
-         &  ntests,flops
-
-    flops  = flops / (t2)
-    tflops = tflops / (tt2)
-    gflops = gflops / (gt2)
-    write(psb_out_unit,'("Time for ",i6," products (s) (CPU)   : ",F20.3)')&
-         &  ntests,t2
-    write(psb_out_unit,'("Time per product    (ms)     (CPU)   : ",F20.3)')&
-         & t2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (CPU)   : ",F20.3)')&
-         & flops/1.d6
-#ifdef HAVE_GPU
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (xGPU)  : ",F20.3)')&
-         & ntests, tt2
-    write(psb_out_unit,'("Time per product    (ms)     (xGPU)  : ",F20.3)')&
-         & tt2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (xGPU)  : ",F20.3)')&
-         & tflops/1.d6
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (GPU)   : ",F20.3)')&
-         & ngpu*ntests,gt2
-    write(psb_out_unit,'("Time per product    (ms)     (GPU)   : ",F20.3)')&
-         & gt2*1.d3/(1.d0*ntests*ngpu)
-    write(psb_out_unit,'("MFLOPS                       (GPU)   : ",F20.3)')&
-         & gflops/1.d6
-#endif
-    !
-    ! This computation assumes the data movement associated with CSR:
-    ! it is minimal in terms of coefficients. Other formats may either move
-    ! more data (padding etc.) or less data (if they can save on the indices). 
-    !
-    nbytes = nr*(2*psb_sizeof_dp + psb_sizeof_ip)+&
-         & annz*(psb_sizeof_dp + psb_sizeof_ip)
-    bdwdth = ntests*nbytes/(t2*1.d6)
-    write(psb_out_unit,*)
-    write(psb_out_unit,'("MBYTES/S                  (CPU)  : ",F20.3)') bdwdth
-#ifdef HAVE_GPU
-    bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
-    write(psb_out_unit,'("MBYTES/S                  (GPU)  : ",F20.3)') bdwdth
-#endif
-    write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
-    write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
-
-  end if
-
-  call psb_gefree(b_col, desc_a,info)
-  call psb_gefree(x_col, desc_a,info)
-  call psb_gefree(xv, desc_a,info)
-  call psb_gefree(bv, desc_a,info)
-  call psb_spfree(a, desc_a,info)
-#ifdef HAVE_GPU
-  call psb_gefree(xg, desc_a,info)
-  call psb_gefree(bg, desc_a,info)
-  call psb_spfree(agpu,desc_a,info)
-  call psb_gpu_exit()
-#endif
-  call psb_cdfree(desc_a,info)
-
-  call psb_exit(ctxt)
-  stop
-
-9999 continue
-  call psb_error(ctxt)  
-
-end program d_file_spmv
-  
-
-
-
-
diff --git a/test/cudakern/s_file_spmv.F90 b/test/cudakern/s_file_spmv.F90
deleted file mode 100644
index 37a52717..00000000
--- a/test/cudakern/s_file_spmv.F90
+++ /dev/null
@@ -1,496 +0,0 @@
-!    
-!                Parallel Sparse BLAS  GPU plugin
-!      (C) Copyright 2013
-!                         Salvatore Filippone
-!                         Alessandro Fanfarillo
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-program s_file_spmv
-  use psb_base_mod
-  use psb_util_mod
-  use psb_ext_mod
-#ifdef HAVE_GPU
-  use psb_gpu_mod
-#endif
-  use data_input
-  implicit none
-
-  ! input parameters
-  character(len=200) :: mtrx_file
-
-  ! sparse matrices
-  type(psb_sspmat_type) :: a, aux_a, agpu
-
-  ! dense matrices
-  real(psb_spk_), allocatable, target ::  aux_b(:,:), d(:)
-  real(psb_spk_), allocatable , save  :: x_col_glob(:), r_col_glob(:)
-  real(psb_spk_), pointer  :: b_col_glob(:)
-  type(psb_s_vect_type)    :: b_col, x_col, r_col
-  type(psb_s_vect_type) :: xg, bg, xv, bv
-#ifdef HAVE_GPU
-  type(psb_s_vect_gpu)  :: vmold
-#endif
-  real(psb_spk_), allocatable :: xc1(:),xc2(:)
-  ! communications data structure
-  type(psb_desc_type):: desc_a
-
-  type(psb_ctxt_type) :: ctxt
-  integer            :: iam, np
-  integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
-  real(psb_spk_)   :: err, eps, samatsize, sgmatsize
-
-  character(len=5)   :: acfmt, agfmt
-  character(len=20)  :: name
-  character(len=2)   :: filefmt
-  integer, parameter :: iunit=12
-  integer, parameter :: times=2000 
-  integer, parameter :: ntests=200, ngpu=50, ncnv=20 
-
-  type(psb_s_coo_sparse_mat), target   :: acoo
-  type(psb_s_csr_sparse_mat), target   :: acsr
-  type(psb_s_ell_sparse_mat), target   :: aell
-  type(psb_s_hll_sparse_mat), target   :: ahll
-  type(psb_s_hdia_sparse_mat), target   :: ahdia
-#ifdef HAVE_GPU
-  type(psb_s_elg_sparse_mat), target   :: aelg
-  type(psb_s_csrg_sparse_mat), target  :: acsrg
-  type(psb_s_hybg_sparse_mat), target  :: ahybg
-  type(psb_s_hlg_sparse_mat), target   :: ahlg
-  type(psb_s_hdiag_sparse_mat), target  :: ahdiag
-#endif
-  class(psb_s_base_sparse_mat), pointer :: acmold, agmold
-  ! other variables
-  integer            :: i,info,j,nrt, ns, nr, ipart, ig, nrg
-  integer            :: internal, m,ii,nnzero
-  real(psb_dpk_) :: t0,t1, t2, tprec, flops
-  real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
-       & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
-  integer :: nrhs, nrow, n_row, dim, nv, ne
-  integer, allocatable :: ivg(:), ipv(:)
-
-
-  call psb_init(ctxt)
-  call psb_info(ctxt,iam,np)
-#ifdef HAVE_GPU
-  call psb_gpu_init(ctxt)
-#endif
-  if (iam < 0) then 
-    ! This should not happen, but just in case
-    call psb_exit(ctxt)
-    stop
-  endif
-
-
-  name='file_spmv'
-  if(psb_get_errstatus() /= 0) goto 9999
-  info=psb_success_  
-  call psb_set_errverbosity(2)
-  if (iam == psb_root_) then 
-    write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
-    write(*,*) 'This is the ',trim(name),' sample program'
-  end if
-#ifdef HAVE_GPU
-  write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
-  write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())  
-#endif
-
-  if (iam == 0) then 
-    write(*,*) 'Matrix? '
-    call read_data(mtrx_file,psb_inp_unit)
-    write(*,*) 'file format'
-    call read_data(filefmt,psb_inp_unit)
-    write(*,*) 'CPU format'
-    call read_data(acfmt,psb_inp_unit)
-    write(*,*) 'GPU format'
-    call read_data(agfmt,psb_inp_unit)
-    write(*,*) 'distribution '
-    call read_data(ipart,psb_inp_unit)
-    write(*,*) 'Read all data, going on'
-  end if
-  call psb_bcast(ctxt,mtrx_file)
-  call psb_bcast(ctxt,filefmt)
-  call psb_bcast(ctxt,acfmt)
-  call psb_bcast(ctxt,agfmt)
-  call psb_bcast(ctxt,ipart)
-  call psb_barrier(ctxt)
-  t0 = psb_wtime()  
-  ! read the input matrix to be processed and (possibly) the rhs 
-  nrhs = 1
-
-  if (iam==psb_root_) then
-    select case(psb_toupper(filefmt)) 
-    case('MM') 
-      ! For Matrix Market we have an input file for the matrix
-      ! and an (optional) second file for the RHS. 
-      call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case ('HB')
-      ! For Harwell-Boeing we have a single file which may or may not
-      ! contain an RHS.
-      call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case default
-      info = -1 
-      write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
-    end select
-    if (info /= 0) then
-      write(psb_err_unit,*) 'Error while reading input matrix '
-      call psb_abort(ctxt)
-    end if
-
-    !
-    ! Always get nnz from original matrix.
-    ! Some formats add fill-in and do not keep track
-    ! of how many were added. So if the original matrix 
-    ! contained some extra zeros, the count of entries
-    ! is not recoverable exactly.
-    !    
-    nrt  = aux_a%get_nrows()
-    annz = aux_a%get_nzeros()
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-    write(psb_out_unit,'("Generating an rhs...")')
-    write(psb_out_unit,'(" ")')
-    call psb_realloc(nrt,1,aux_b,info)
-    if (info /= 0) then
-      call psb_errpush(4000,name)
-      goto 9999
-    endif
-
-    b_col_glob => aux_b(:,1)
-    do i=1, nrt
-      b_col_glob(i) = 1.d0
-    enddo
-
-  else
-
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-  end if
-
-
-  select case(psb_toupper(acfmt))
-  case('COO')
-    acmold => acoo
-  case('CSR')
-    acmold => acsr
-  case('ELL')
-    acmold => aell
-  case('HLL')
-    acmold => ahll
-  case('HDIA')
-    acmold => ahdia
-  case default
-    write(*,*) 'Unknown format defaulting to CSR'
-    acmold => acsr
-  end select
-
-#ifdef HAVE_GPU
-  select case(psb_toupper(agfmt))
-  case('ELG')
-    agmold => aelg
-  case('HLG')
-    agmold => ahlg
-  case('CSRG')
-    agmold => acsrg
-  case('HYBG')
-    agmold => ahybg
-  case('HDIAG')
-    agmold => ahdiag
-  case default
-    write(*,*) 'Unknown format defaulting to HLG'
-    agmold => ahlg
-  end select
-#endif
-
-
-  ! switch over different partition types
-  if (ipart == 0) then 
-    call psb_barrier(ctxt)
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
-    allocate(ivg(nrt),ipv(np))
-    do i=1,nrt
-      call part_block(i,nrt,np,ipv,nv)
-      ivg(i) = ipv(1)
-    enddo
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else if (ipart == 2) then 
-    if (iam==psb_root_) then 
-      write(psb_out_unit,'("Partition type: graph")')
-      write(psb_out_unit,'(" ")')
-      !      write(psb_err_unit,'("Build type: graph")')
-      call build_mtpart(aux_a,np)
-    endif
-    call psb_barrier(ctxt)
-    call distr_mtpart(psb_root_,ctxt)
-    call getv_mtpart(ivg)
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else 
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
-    call psb_matdist(aux_a, a,  ctxt,desc_a,info,parts=part_block)
-  end if
-
-  call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
-
-  t2 = psb_wtime() - t0
-
-  call psb_amx(ctxt, t2)
-
-  if (iam==psb_root_) then
-    write(psb_out_unit,'(" ")')
-    write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
-    write(psb_out_unit,'(" ")')
-  end if
-  call a%cscnv(aux_a,info,mold=acoo)
-  tcnvcsr = 0
-  tcnvgpu = 0
-  nr       = desc_a%get_local_rows()
-  nrg      = desc_a%get_global_rows() 
-  call psb_geall(x_col,desc_a,info)
-  do i=1, nr
-    call desc_a%l2g(i,ig,info)
-    call psb_geins(ione,(/ig/),(/(sone + (sone*ig)/nrg)/),x_col,desc_a,info)
-  end do
-  call psb_geasb(x_col,desc_a,info)
-  do j=1, ncnv
-    call aux_a%cscnv(a,info,mold=acoo)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call a%cscnv(info,mold=acmold)
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    tcnvcsr = tcnvcsr + t2
-    if (j==1) tcnvc1 = t2
-    xc1 = x_col%get_vect()
-    call xv%bld(xc1)
-    call psb_geasb(bv,desc_a,info,scratch=.true.)
-    
-#ifdef HAVE_GPU
-    
-    call aux_a%cscnv(agpu,info,mold=acoo)
-    call xg%bld(xc1,mold=vmold)
-    call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call agpu%cscnv(info,mold=agmold)
-    call psb_gpu_DeviceSync()
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    if (j==1) tcnvg1 = t2
-    tcnvgpu = tcnvgpu + t2
-#endif
-  end do
-
-  call psb_barrier(ctxt)
-  t1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(sone,a,xv,szero,bv,desc_a,info)
-  end do
-  call psb_barrier(ctxt)
-  t2 = psb_wtime() - t1
-  call psb_amx(ctxt,t2)
-
-#ifdef HAVE_GPU
-  ! FIXME: cache flush needed here
-  call psb_barrier(ctxt)
-  tt1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(sone,agpu,xv,szero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 1 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  tt2 = psb_wtime() - tt1
-  call psb_amx(ctxt,tt2)
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on xGPU',eps
-
-  call xg%sync()
-  ! FIXME: cache flush needed here
-
-  call psb_barrier(ctxt)
-  gt1 = psb_wtime()
-  do i=1,ntests*ngpu
-    call psb_spmm(sone,agpu,xg,szero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 2 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  ! For timing purposes we need to make sure all threads
-  ! in the device are done. 
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  gt2 = psb_wtime() - gt1
-  call psb_amx(ctxt,gt2)
-  call bg%sync()
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info)
-  eps = psb_geamax(bv,desc_a,info)
-
-  call psb_amx(ctxt,t2)
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on GPU',eps
-#endif
-
-
-  amatsize = a%sizeof()
-  agmatsize = agpu%sizeof()
-  samatsize = amatsize
-  samatsize = samatsize/(1024*1024)
-  sgmatsize = agmatsize
-  sgmatsize = sgmatsize/(1024*1024)
-  descsize = psb_sizeof(desc_a)
-  call psb_sum(ctxt,samatsize)
-  call psb_sum(ctxt,sgmatsize)
-  call psb_sum(ctxt,descsize)
-
-  if (iam == psb_root_) then
-    write(psb_out_unit,'("Matrix: ",a)') mtrx_file
-    write(psb_out_unit,&
-         &'("Test on                          : ",i20," processors")') np
-    write(psb_out_unit,&
-         &'("Size of matrix                   : ",i20,"           ")') nrt
-    write(psb_out_unit,&
-         &'("Number of nonzeros               : ",i20,"           ")') annz
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (MBytes)  : ",f20.2,"           ")') samatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (MBytes)  : ",f20.2,"           ")') sgmatsize
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (Bytes)   : ",i24,"           ")') amatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (Bytes)   : ",i24,"           ")') agmatsize
-    flops  = ntests*(2.d0*annz)
-    tflops = flops
-    gflops = flops * ngpu
-    write(psb_out_unit,'("Storage type for    A: ",a)') a%get_fmt()
-#ifdef HAVE_GPU
-    write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
-    write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
-         & tcnvc1
-    write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
-         & tcnvcsr
-    write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
-         & tcnvcsr/ncnv
-    write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
-         & tcnvg1
-    write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
-         & tcnvgpu
-    write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
-         & tcnvgpu/ncnv
-
-#endif
-    write(psb_out_unit,&
-         & '("Number of flops (",i0," prod)        : ",F20.0,"           ")') &
-         &  ntests,flops
-
-    flops  = flops / (t2)
-    tflops = tflops / (tt2)
-    gflops = gflops / (gt2)
-    write(psb_out_unit,'("Time for ",i6," products (s) (CPU)   : ",F20.3)')&
-         &  ntests,t2
-    write(psb_out_unit,'("Time per product    (ms)     (CPU)   : ",F20.3)')&
-         & t2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (CPU)   : ",F20.3)')&
-         & flops/1.d6
-#ifdef HAVE_GPU
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (xGPU)  : ",F20.3)')&
-         & ntests, tt2
-    write(psb_out_unit,'("Time per product    (ms)     (xGPU)  : ",F20.3)')&
-         & tt2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (xGPU)  : ",F20.3)')&
-         & tflops/1.d6
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (GPU)   : ",F20.3)')&
-         & ngpu*ntests,gt2
-    write(psb_out_unit,'("Time per product    (ms)     (GPU)   : ",F20.3)')&
-         & gt2*1.d3/(1.d0*ntests*ngpu)
-    write(psb_out_unit,'("MFLOPS                       (GPU)   : ",F20.3)')&
-         & gflops/1.d6
-#endif
-    !
-    ! This computation assumes the data movement associated with CSR:
-    ! it is minimal in terms of coefficients. Other formats may either move
-    ! more data (padding etc.) or less data (if they can save on the indices). 
-    !
-    nbytes = nr*(2*psb_sizeof_sp + psb_sizeof_ip)+&
-         & annz*(psb_sizeof_sp + psb_sizeof_ip)
-    bdwdth = ntests*nbytes/(t2*1.d6)
-    write(psb_out_unit,*)
-    write(psb_out_unit,'("MBYTES/S                  (CPU)  : ",F20.3)') bdwdth
-#ifdef HAVE_GPU
-    bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
-    write(psb_out_unit,'("MBYTES/S                  (GPU)  : ",F20.3)') bdwdth
-#endif
-    write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
-    write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
-
-  end if
-
-  call psb_gefree(b_col, desc_a,info)
-  call psb_gefree(x_col, desc_a,info)
-  call psb_gefree(xv, desc_a,info)
-  call psb_gefree(bv, desc_a,info)
-  call psb_spfree(a, desc_a,info)
-#ifdef HAVE_GPU
-  call psb_gefree(xg, desc_a,info)
-  call psb_gefree(bg, desc_a,info)
-  call psb_spfree(agpu,desc_a,info)
-  call psb_gpu_exit()
-#endif
-  call psb_cdfree(desc_a,info)
-
-  call psb_exit(ctxt)
-  stop
-
-9999 continue
-  call psb_error(ctxt)  
-
-end program s_file_spmv
-  
-
-
-
-
diff --git a/test/cudakern/z_file_spmv.F90 b/test/cudakern/z_file_spmv.F90
deleted file mode 100644
index 153dd5e1..00000000
--- a/test/cudakern/z_file_spmv.F90
+++ /dev/null
@@ -1,491 +0,0 @@
-!    
-!                Parallel Sparse BLAS  GPU plugin
-!      (C) Copyright 2013
-!                         Salvatore Filippone
-!                         Alessandro Fanfarillo
-!   
-!    Redistribution and use in source and binary forms, with or without
-!    modification, are permitted provided that the following conditions
-!    are met:
-!      1. Redistributions of source code must retain the above copyright
-!         notice, this list of conditions and the following disclaimer.
-!      2. Redistributions in binary form must reproduce the above copyright
-!         notice, this list of conditions, and the following disclaimer in the
-!         documentation and/or other materials provided with the distribution.
-!      3. The name of the PSBLAS group or the names of its contributors may
-!         not be used to endorse or promote products derived from this
-!         software without specific written permission.
-!   
-!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
-!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-!    POSSIBILITY OF SUCH DAMAGE.
-!   
-!    
-program z_file_spmv
-  use psb_base_mod
-  use psb_util_mod
-  use psb_ext_mod
-#ifdef HAVE_GPU
-  use psb_gpu_mod
-#endif
-  use data_input
-  implicit none
-
-  ! input parameters
-  character(len=200) :: mtrx_file
-
-  ! sparse matrices
-  type(psb_zspmat_type) :: a, aux_a, agpu
-
-  ! dense matrices
-  complex(psb_dpk_), allocatable, target :: aux_b(:,:), d(:)
-  complex(psb_dpk_), allocatable , save  :: x_col_glob(:), r_col_glob(:)
-  complex(psb_dpk_), pointer  :: b_col_glob(:)
-  type(psb_z_vect_type)    :: b_col, x_col, r_col
-  type(psb_z_vect_type) :: xg, bg, xv, bv
-#ifdef HAVE_GPU
-  type(psb_z_vect_gpu)  :: vmold
-#endif
-  complex(psb_dpk_), allocatable :: xc1(:),xc2(:)
-  ! communications data structure
-  type(psb_desc_type):: desc_a
-
-  type(psb_ctxt_type) :: ctxt
-  integer            :: iam, np
-  integer(psb_epk_) :: amatsize, agmatsize, precsize, descsize, annz, nbytes
-  real(psb_dpk_)    :: damatsize, dgmatsize
-  complex(psb_dpk_) :: err, eps
-
-  character(len=5)   :: acfmt, agfmt
-  character(len=20)  :: name
-  character(len=2)   :: filefmt
-  integer, parameter :: iunit=12
-  integer, parameter :: times=2000 
-  integer, parameter :: ntests=200, ngpu=50, ncnv=20 
-
-  type(psb_z_coo_sparse_mat), target   :: acoo
-  type(psb_z_csr_sparse_mat), target   :: acsr
-  type(psb_z_ell_sparse_mat), target   :: aell
-  type(psb_z_hll_sparse_mat), target   :: ahll
-#ifdef HAVE_GPU
-  type(psb_z_elg_sparse_mat), target   :: aelg
-  type(psb_z_csrg_sparse_mat), target  :: acsrg
-  type(psb_z_hybg_sparse_mat), target  :: ahybg
-  type(psb_z_hlg_sparse_mat), target   :: ahlg
-#endif
-  class(psb_z_base_sparse_mat), pointer :: acmold, agmold
-  ! other variables
-  integer            :: i,info,j,nrt, ns, nr, ipart, ig, nrg
-  integer            :: internal, m,ii,nnzero
-  real(psb_dpk_) :: t0,t1, t2, tprec, flops
-  real(psb_dpk_) :: tt1, tt2, tflops, gt1, gt2,gflops, gtint, bdwdth,&
-       & tcnvcsr, tcnvc1, tcnvgpu, tcnvg1
-  integer :: nrhs, nrow, n_row, dim, nv, ne
-  integer, allocatable :: ivg(:), ipv(:)
-
-
-  call psb_init(ctxt)
-  call psb_info(ctxt,iam,np)
-#ifdef HAVE_GPU
-  call psb_gpu_init(ctxt)
-#endif
-  if (iam < 0) then 
-    ! This should not happen, but just in case
-    call psb_exit(ctxt)
-    stop
-  endif
-
-
-  name='file_spmv'
-  if(psb_get_errstatus() /= 0) goto 9999
-  info=psb_success_  
-  call psb_set_errverbosity(2)
-  if (iam == psb_root_) then 
-    write(*,*) 'Welcome to PSBLAS version: ',psb_version_string_
-    write(*,*) 'This is the ',trim(name),' sample program'
-  end if
-#ifdef HAVE_GPU
-  write(*,*) 'Process ',iam,' running on device: ', psb_cuda_getDevice(),' out of', psb_cuda_getDeviceCount()
-  write(*,*) 'Process ',iam,' device ', psb_cuda_getDevice(),' is a: ', trim(psb_gpu_DeviceName())  
-#endif
-
-  if (iam == 0) then 
-    write(*,*) 'Matrix? '
-    call read_data(mtrx_file,psb_inp_unit)
-    write(*,*) 'file format'
-    call read_data(filefmt,psb_inp_unit)
-    write(*,*) 'CPU format'
-    call read_data(acfmt,psb_inp_unit)
-    write(*,*) 'GPU format'
-    call read_data(agfmt,psb_inp_unit)
-    write(*,*) 'distribution '
-    call read_data(ipart,psb_inp_unit)
-    write(*,*) 'Read all data, going on'
-  end if
-  call psb_bcast(ctxt,mtrx_file)
-  call psb_bcast(ctxt,filefmt)
-  call psb_bcast(ctxt,acfmt)
-  call psb_bcast(ctxt,agfmt)
-  call psb_bcast(ctxt,ipart)
-  call psb_barrier(ctxt)
-  t0 = psb_wtime()  
-  ! read the input matrix to be processed and (possibly) the rhs 
-  nrhs = 1
-
-  if (iam==psb_root_) then
-    select case(psb_toupper(filefmt)) 
-    case('MM') 
-      ! For Matrix Market we have an input file for the matrix
-      ! and an (optional) second file for the RHS. 
-      call mm_mat_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case ('HB')
-      ! For Harwell-Boeing we have a single file which may or may not
-      ! contain an RHS.
-      call hb_read(aux_a,info,iunit=iunit,filename=mtrx_file)
-
-    case default
-      info = -1 
-      write(psb_err_unit,*) 'Wrong choice for fileformat ', filefmt
-    end select
-    if (info /= 0) then
-      write(psb_err_unit,*) 'Error while reading input matrix '
-      call psb_abort(ctxt)
-    end if
-
-    !
-    ! Always get nnz from original matrix.
-    ! Some formats add fill-in and do not keep track
-    ! of how many were added. So if the original matrix 
-    ! contained some extra zeros, the count of entries
-    ! is not recoverable exactly.
-    !    
-    nrt  = aux_a%get_nrows()
-    annz = aux_a%get_nzeros()
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-    write(psb_out_unit,'("Generating an rhs...")')
-    write(psb_out_unit,'(" ")')
-    call psb_realloc(nrt,1,aux_b,info)
-    if (info /= 0) then
-      call psb_errpush(4000,name)
-      goto 9999
-    endif
-
-    b_col_glob => aux_b(:,1)
-    do i=1, nrt
-      b_col_glob(i) = 1.d0
-    enddo
-
-  else
-
-    call psb_bcast(ctxt,annz)
-    call psb_bcast(ctxt,nrt)
-
-  end if
-
-
-  select case(psb_toupper(acfmt))
-  case('COO')
-    acmold => acoo
-  case('CSR')
-    acmold => acsr
-  case('ELL')
-    acmold => aell
-  case('HLL')
-    acmold => ahll
-  case default
-    write(*,*) 'Unknown format defaulting to CSR'
-    acmold => acsr
-  end select
-
-#ifdef HAVE_GPU
-  select case(psb_toupper(agfmt))
-  case('ELG')
-    agmold => aelg
-  case('HLG')
-    agmold => ahlg
-  case('CSRG')
-    agmold => acsrg
-  case('HYBG')
-    agmold => ahybg
-  case default
-    write(*,*) 'Unknown format defaulting to HLG'
-    agmold => ahlg
-  end select
-#endif
-
-
-  ! switch over different partition types
-  if (ipart == 0) then 
-    call psb_barrier(ctxt)
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type: block")')
-    allocate(ivg(nrt),ipv(np))
-    do i=1,nrt
-      call part_block(i,nrt,np,ipv,nv)
-      ivg(i) = ipv(1)
-    enddo
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else if (ipart == 2) then 
-    if (iam==psb_root_) then 
-      write(psb_out_unit,'("Partition type: graph")')
-      write(psb_out_unit,'(" ")')
-      !      write(psb_err_unit,'("Build type: graph")')
-      call build_mtpart(aux_a,np)
-    endif
-    call psb_barrier(ctxt)
-    call distr_mtpart(psb_root_,ctxt)
-    call getv_mtpart(ivg)
-    call psb_matdist(aux_a, a, ctxt, desc_a,info,v=ivg)
-  else 
-    if (iam==psb_root_) write(psb_out_unit,'("Partition type default: block")')
-    call psb_matdist(aux_a, a,  ctxt,desc_a,info,parts=part_block)
-  end if
-
-  call psb_scatter(b_col_glob,bv,desc_a,info,root=psb_root_)
-
-  t2 = psb_wtime() - t0
-
-  call psb_amx(ctxt, t2)
-
-  if (iam==psb_root_) then
-    write(psb_out_unit,'(" ")')
-    write(psb_out_unit,'("Time to read and partition matrix : ",es12.5)')t2
-    write(psb_out_unit,'(" ")')
-  end if
-  call a%cscnv(aux_a,info,mold=acoo)
-  tcnvcsr = 0
-  tcnvgpu = 0
-  nr       = desc_a%get_local_rows()
-  nrg      = desc_a%get_global_rows() 
-  call psb_geall(x_col,desc_a,info)
-  do i=1, nr
-    call desc_a%l2g(i,ig,info)
-    call psb_geins(ione,(/ig/),(/(zone + (zone*ig)/nrg)/),x_col,desc_a,info)
-  end do
-  call psb_geasb(x_col,desc_a,info)
-  do j=1, ncnv
-    call aux_a%cscnv(a,info,mold=acoo)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call a%cscnv(info,mold=acmold)
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    tcnvcsr = tcnvcsr + t2
-    if (j==1) tcnvc1 = t2
-    xc1 = x_col%get_vect()
-    call xv%bld(xc1)
-    call psb_geasb(bv,desc_a,info,scratch=.true.)
-    
-#ifdef HAVE_GPU
-    
-    call aux_a%cscnv(agpu,info,mold=acoo)
-    call xg%bld(xc1,mold=vmold)
-    call psb_geasb(bg,desc_a,info,scratch=.true.,mold=vmold)
-    call psb_barrier(ctxt)
-    t1 = psb_wtime()
-    call agpu%cscnv(info,mold=agmold)
-    call psb_gpu_DeviceSync()
-    t2 = psb_Wtime() -t1
-    call psb_amx(ctxt,t2)
-    if (j==1) tcnvg1 = t2
-    tcnvgpu = tcnvgpu + t2
-#endif
-  end do
-
-  call psb_barrier(ctxt)
-  t1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(zone,a,xv,zzero,bv,desc_a,info)
-  end do
-  call psb_barrier(ctxt)
-  t2 = psb_wtime() - t1
-  call psb_amx(ctxt,t2)
-
-#ifdef HAVE_GPU
-  ! FIXME: cache flush needed here
-  call psb_barrier(ctxt)
-  tt1 = psb_wtime()
-  do i=1,ntests 
-    call psb_spmm(zone,agpu,xv,zzero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 1 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  tt2 = psb_wtime() - tt1
-  call psb_amx(ctxt,tt2)
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on xGPU',eps
-
-  call xg%sync()
-  ! FIXME: cache flush needed here
-
-  call psb_barrier(ctxt)
-  gt1 = psb_wtime()
-  do i=1,ntests*ngpu
-    call psb_spmm(zone,agpu,xg,zzero,bg,desc_a,info)
-    if ((info /= 0).or.(psb_get_errstatus()/=0)) then 
-      write(0,*) 'From 2 spmm',info,i,ntests
-      call psb_error()
-      stop
-    end if
-
-  end do
-  ! For timing purposes we need to make sure all threads
-  ! in the device are done. 
-  call psb_gpu_DeviceSync()
-  call psb_barrier(ctxt)
-  gt2 = psb_wtime() - gt1
-  call psb_amx(ctxt,gt2)
-  call bg%sync()
-  xc1 = bv%get_vect()
-  xc2 = bg%get_vect()
-  call psb_geaxpby(-zone,bg,+zone,bv,desc_a,info)
-  eps = psb_geamax(bv,desc_a,info)
-
-  call psb_amx(ctxt,t2)
-  nr       = desc_a%get_local_rows() 
-  eps = maxval(abs(xc1(1:nr)-xc2(1:nr)))
-  call psb_amx(ctxt,eps)
-  if (iam==0) write(*,*) 'Max diff on GPU',eps
-#endif
-
-
-  amatsize = a%sizeof()
-  agmatsize = agpu%sizeof()
-  damatsize = amatsize
-  damatsize = damatsize/(1024*1024)
-  dgmatsize = agmatsize
-  dgmatsize = dgmatsize/(1024*1024)
-  descsize = psb_sizeof(desc_a)
-  call psb_sum(ctxt,damatsize)
-  call psb_sum(ctxt,dgmatsize)
-  call psb_sum(ctxt,descsize)
-
-  if (iam == psb_root_) then
-    write(psb_out_unit,'("Matrix: ",a)') mtrx_file
-    write(psb_out_unit,&
-         &'("Test on                          : ",i20," processors")') np
-    write(psb_out_unit,&
-         &'("Size of matrix                   : ",i20,"           ")') nrt
-    write(psb_out_unit,&
-         &'("Number of nonzeros               : ",i20,"           ")') annz
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (MBytes)  : ",f20.2,"           ")') damatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (MBytes)  : ",f20.2,"           ")') dgmatsize
-    write(psb_out_unit,&
-         &'("Memory occupation CPU  (Bytes)   : ",i24,"           ")') amatsize
-    write(psb_out_unit,&
-         &'("Memory occupation GPU  (Bytes)   : ",i24,"           ")') agmatsize
-    flops  = ntests*(2.d0*annz)
-    tflops = flops
-    gflops = flops * ngpu
-    write(psb_out_unit,'("Storage type for    A: ",a)') a%get_fmt()
-#ifdef HAVE_GPU
-    write(psb_out_unit,'("Storage type for AGPU: ",a)') agpu%get_fmt()
-    write(psb_out_unit,'("Time to convert A from COO to CPU (1): ",F20.9)')&
-         & tcnvc1
-    write(psb_out_unit,'("Time to convert A from COO to CPU (t): ",F20.9)')&
-         & tcnvcsr
-    write(psb_out_unit,'("Time to convert A from COO to CPU (a): ",F20.9)')&
-         & tcnvcsr/ncnv
-    write(psb_out_unit,'("Time to convert A from COO to GPU (1): ",F20.9)')&
-         & tcnvg1
-    write(psb_out_unit,'("Time to convert A from COO to GPU (t): ",F20.9)')&
-         & tcnvgpu
-    write(psb_out_unit,'("Time to convert A from COO to GPU (a): ",F20.9)')&
-         & tcnvgpu/ncnv
-
-#endif
-    write(psb_out_unit,&
-         & '("Number of flops (",i0," prod)        : ",F20.0,"           ")') &
-         &  ntests,flops
-
-    flops  = flops / (t2)
-    tflops = tflops / (tt2)
-    gflops = gflops / (gt2)
-    write(psb_out_unit,'("Time for ",i6," products (s) (CPU)   : ",F20.3)')&
-         &  ntests,t2
-    write(psb_out_unit,'("Time per product    (ms)     (CPU)   : ",F20.3)')&
-         & t2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (CPU)   : ",F20.3)')&
-         & flops/1.d6
-#ifdef HAVE_GPU
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (xGPU)  : ",F20.3)')&
-         & ntests, tt2
-    write(psb_out_unit,'("Time per product    (ms)     (xGPU)  : ",F20.3)')&
-         & tt2*1.d3/(1.d0*ntests)
-    write(psb_out_unit,'("MFLOPS                       (xGPU)  : ",F20.3)')&
-         & tflops/1.d6
-
-    write(psb_out_unit,'("Time for ",i6," products (s) (GPU)   : ",F20.3)')&
-         & ngpu*ntests,gt2
-    write(psb_out_unit,'("Time per product    (ms)     (GPU)   : ",F20.3)')&
-         & gt2*1.d3/(1.d0*ntests*ngpu)
-    write(psb_out_unit,'("MFLOPS                       (GPU)   : ",F20.3)')&
-         & gflops/1.d6
-#endif
-    !
-    ! This computation assumes the data movement associated with CSR:
-    ! it is minimal in terms of coefficients. Other formats may either move
-    ! more data (padding etc.) or less data (if they can save on the indices). 
-    !
-    nbytes = nr*(2*2*psb_sizeof_dp + psb_sizeof_ip)+&
-         & annz*(2*psb_sizeof_dp + psb_sizeof_ip)
-    bdwdth = ntests*nbytes/(t2*1.d6)
-    write(psb_out_unit,*)
-    write(psb_out_unit,'("MBYTES/S                  (CPU)  : ",F20.3)') bdwdth
-#ifdef HAVE_GPU
-    bdwdth = ngpu*ntests*nbytes/(gt2*1.d6)
-    write(psb_out_unit,'("MBYTES/S                  (GPU)  : ",F20.3)') bdwdth
-#endif
-    write(psb_out_unit,'("Storage type for DESC_A: ",a)') desc_a%indxmap%get_fmt()
-    write(psb_out_unit,'("Total memory occupation for DESC_A: ",i12)')descsize
-
-  end if
-
-  call psb_gefree(b_col, desc_a,info)
-  call psb_gefree(x_col, desc_a,info)
-  call psb_gefree(xv, desc_a,info)
-  call psb_gefree(bv, desc_a,info)
-  call psb_spfree(a, desc_a,info)
-#ifdef HAVE_GPU
-  call psb_gefree(xg, desc_a,info)
-  call psb_gefree(bg, desc_a,info)
-  call psb_spfree(agpu,desc_a,info)
-  call psb_gpu_exit()
-#endif
-  call psb_cdfree(desc_a,info)
-
-  call psb_exit(ctxt)
-  stop
-
-9999 continue
-  call psb_error(ctxt)
-
-end program z_file_spmv
-  
-
-
-
-