From aaaf4c9f090dcd45feaf6dd568ef5d81ca768460 Mon Sep 17 00:00:00 2001
From: Salvatore Filippone <salvatore.filippone@cranfield.ac.uk>
Date: Sun, 18 Feb 2018 09:42:35 +0000
Subject: [PATCH] New psb_partidx_mod.F90. Merged into psb_util. Updated all
 test/pargen progs.

---
 base/modules/psb_const_mod.F90 |   2 +
 test/pargen/Makefile           |   6 +-
 test/pargen/psb_d_pde2d.f90    | 142 ++++++++++++++----
 test/pargen/psb_d_pde3d.f90    | 215 ++++++++++++++-------------
 test/pargen/psb_s_pde2d.f90    | 167 ++++++++++++++++-----
 test/pargen/psb_s_pde3d.f90    | 184 +++++++++++++++++------
 test/pargen/tryidxijk.f90      | 107 +-------------
 util/Makefile                  |   2 +-
 util/psb_partidx_mod.F90       | 259 +++++++++++++++++++++++++++++++++
 util/psb_util_mod.f90          |   3 +-
 10 files changed, 769 insertions(+), 318 deletions(-)
 create mode 100644 util/psb_partidx_mod.F90

diff --git a/base/modules/psb_const_mod.F90 b/base/modules/psb_const_mod.F90
index 80b88ef4..3b3f07f4 100644
--- a/base/modules/psb_const_mod.F90
+++ b/base/modules/psb_const_mod.F90
@@ -41,6 +41,7 @@ module psb_const_mod
 #endif
   ! This is always an 8-byte  integer.
   integer, parameter  :: psb_long_int_k_ = int64
+  integer, parameter  :: psb_lpk_        = psb_long_int_k_
   ! This is always a 4-byte integer, for MPI-related stuff
   integer, parameter  :: psb_mpik_ = int32
   !
@@ -60,6 +61,7 @@ module psb_const_mod
   ! This is always an 8-byte  integer.
   integer, parameter  :: longndig=12
   integer, parameter  :: psb_long_int_k_ = selected_int_kind(longndig)
+  integer, parameter  :: psb_lpk_        = psb_long_int_k_
   ! This is always a 4-byte integer, for MPI-related stuff
   integer, parameter  :: psb_mpik_ = kind(1)
   !
diff --git a/test/pargen/Makefile b/test/pargen/Makefile
index 2690aa09..f79a8551 100644
--- a/test/pargen/Makefile
+++ b/test/pargen/Makefile
@@ -18,8 +18,8 @@ EXEDIR=./runs
 
 all: psb_d_pde3d psb_s_pde3d psb_d_pde2d psb_s_pde2d
 
-tryidxijk: tryidxijk.o
-	$(FLINK)  tryidxijk.o -o tryidxijk $(PSBLAS_LIB) $(LDLIBS)
+tryidxijk: tryidxijk.o 
+	$(FLINK)  tryidxijk.o  -o  tryidxijk $(PSBLAS_LIB) $(LDLIBS)
 	/bin/mv tryidxijk $(EXEDIR)
 
 psb_d_pde3d: psb_d_pde3d.o 
@@ -42,7 +42,7 @@ psb_s_pde2d: psb_s_pde2d.o
 
 
 clean: 
-	/bin/rm -f psb_d_pde3d.o psb_s_pde3d.o  psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
+	/bin/rm -f psb_d_pde3d.o psb_s_pde3d.o  psb_d_pde2d.o psb_s_pde2d.o partidx.o *$(.mod) \
 	$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d 
 verycleanlib: 
 	(cd ../..; make veryclean)
diff --git a/test/pargen/psb_d_pde2d.f90 b/test/pargen/psb_d_pde2d.f90
index 3921780e..175ea8b6 100644
--- a/test/pargen/psb_d_pde2d.f90
+++ b/test/pargen/psb_d_pde2d.f90
@@ -50,10 +50,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module psb_d_pde2d_mod
 
@@ -91,8 +93,9 @@ contains
   !  the rhs. 
   !
   subroutine psb_d_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -120,7 +123,7 @@ contains
     class(psb_d_base_sparse_mat), optional :: amold
     class(psb_d_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl, iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -129,9 +132,13 @@ contains
     type(psb_d_coo_sparse_mat)  :: acoo
     type(psb_d_csr_sparse_mat)  :: acsr
     real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_dpk_), allocatable :: val(:)
@@ -161,6 +168,17 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.d0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
 
@@ -169,7 +187,9 @@ contains
     nnz = ((n*7)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
       if (present(nrl)) then 
         nr = nrl
       else
@@ -189,27 +209,99 @@ contains
         call psb_abort(ictxt)
         return    
       end if
-    else
-      if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ictxt)
         call psb_abort(ictxt)
         return    
       end if
 
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
       call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
-    end if
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
+
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
     call psb_barrier(ictxt)
     talc = psb_wtime()-t0
 
@@ -233,9 +325,6 @@ contains
     endif
 
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
     ! loop over rows belonging to current process in a block
     ! distribution.
 
@@ -249,13 +338,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
 
diff --git a/test/pargen/psb_d_pde3d.f90 b/test/pargen/psb_d_pde3d.f90
index 616aac89..99df31ed 100644
--- a/test/pargen/psb_d_pde3d.f90
+++ b/test/pargen/psb_d_pde3d.f90
@@ -50,13 +50,14 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! There are two choices available for data distribution:
+! There are three choices available for data distribution:
 ! 1. A simple BLOCK distribution
-! 2. A 3D distribution in which the unit cube is partitioned
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
 !    into subcubes, each one assigned to a process.
 !
 !
-!
 module psb_d_pde3d_mod
 
  
@@ -76,9 +77,6 @@ module psb_d_pde3d_mod
     module procedure  psb_d_gen_pde3d
   end interface psb_gen_pde3d
   
-  integer, private              :: dims(3),coords(3)
-  integer, private, allocatable :: rk2coo(:,:), coo2rk(:,:,:)
-  
 contains
 
   function d_null_func_3d(x,y,z) result(val)
@@ -90,78 +88,15 @@ contains
 
   end function d_null_func_3d
 
-  !
-  ! Given  a global index IDX and the domain size (NX,NY,NZ)
-  ! compute the point coordinates (I,J,K) 
-  ! Optional argument: base 0 or 1, default 1
-  !
-  ! This mapping is equivalent to a loop nesting:
-  !  idx = base
-  !  do i=1,nx
-  !    do j=1,ny
-  !      do k=1,nz
-  !         ijk2idx(i,j,k) = idx
-  !         idx = idx + 1
-  subroutine  idx2ijk(i,j,k,idx,nx,ny,nz,base)
-    integer(psb_ipk_), intent(out) :: i,j,k
-    integer(psb_ipk_), intent(in)  :: idx,nx,ny,nz
-    integer(psb_ipk_), intent(in), optional :: base
-    
-    integer(psb_ipk) :: base_, idx_
-    if (present(base)) then
-      base_  = base
-    else
-      base_ = 1
-    end if
-
-    idx_ = idx - base_
-    
-    i = idx/(nx*ny)
-    j = (idx - i*nx*ny)/ny
-    k = (idx - i*nx*ny -j*ny)/nz
-
-    i = i + base_
-    j = j + base_
-    k = k + base_
-  end subroutine idx2ijk
-  
-  !
-  ! Given  a triple (I,J,K) and  the domain size (NX,NY,NZ)
-  ! compute the global index IDX 
-  ! Optional argument: base 0 or 1, default 1
-  !
-  ! This mapping is equivalent to a loop nesting:
-  !  idx = base
-  !  do i=1,nx
-  !    do j=1,ny
-  !      do k=1,nz
-  !         ijk2idx(i,j,k) = idx
-  !         idx = idx + 1
-  subroutine  ijk2idx(idx,i,j,k,nx,ny,nz,base)
-    integer(psb_ipk_), intent(out) :: idx,
-    integer(psb_ipk_), intent(in)  :: i,j,k,nx,ny,nz
-    integer(psb_ipk_), intent(in), optional :: base
-    
-    integer(psb_ipk) :: base_
-    if (present(base)) then
-      base_  = base
-    else
-      base_ = 1
-    end if
-
-
-    idx = ((i-base_)*nx*ny + (j-base_)*nx + k -base_) + base_
-    
-  end subroutine ijk2idx
-  
   
   !
   !  subroutine to allocate and fill in the coefficient matrix and
   !  the rhs. 
   !
   subroutine psb_d_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -189,7 +124,7 @@ contains
     class(psb_d_base_sparse_mat), optional :: amold
     class(psb_d_base_vect_type), optional :: vmold 
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl,iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -198,9 +133,13 @@ contains
     type(psb_d_coo_sparse_mat)  :: acoo
     type(psb_d_csr_sparse_mat)  :: acsr
     real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_dpk_), allocatable :: val(:)
@@ -230,15 +169,28 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.d0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
-
+    
     m   = idim*idim*idim
     n   = m
     nnz = ((n*9)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
       if (present(nrl)) then 
         nr = nrl
       else
@@ -258,24 +210,99 @@ contains
         call psb_abort(ictxt)
         return    
       end if
-    else
-      if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ictxt)
         call psb_abort(ictxt)
         return    
       end if
 
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
       call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
-    end if
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
 
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
@@ -303,8 +330,6 @@ contains
       goto 9999
     endif
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
 
     ! loop over rows belonging to current process in a block
     ! distribution.
@@ -319,18 +344,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
         z = (iz-1)*deltah
diff --git a/test/pargen/psb_s_pde2d.f90 b/test/pargen/psb_s_pde2d.f90
index ae670c51..48b42799 100644
--- a/test/pargen/psb_s_pde2d.f90
+++ b/test/pargen/psb_s_pde2d.f90
@@ -50,10 +50,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module psb_s_pde2d_mod
 
@@ -91,8 +93,9 @@ contains
   !  the rhs. 
   !
   subroutine psb_s_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -120,7 +123,7 @@ contains
     class(psb_s_base_sparse_mat), optional :: amold
     class(psb_s_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -129,9 +132,13 @@ contains
     type(psb_s_coo_sparse_mat)  :: acoo
     type(psb_s_csr_sparse_mat)  :: acsr
     real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_spk_), allocatable :: val(:)
@@ -161,6 +168,17 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.e0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
 
@@ -169,32 +187,121 @@ contains
     nnz = ((n*7)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (present(nrl)) then 
-      nr = nrl
-    else
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
+      if (present(nrl)) then 
+        nr = nrl
+      else
+        !
+        ! Using a simple BLOCK distribution.
+        !
+        nt = (m+np-1)/np
+        nr = max(0,min(nt,m-(iam*nt)))
+      end if
+
+      nt = nr
+      call psb_sum(ictxt,nt) 
+      if (nt /= m) then 
+        write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      ! Using a simple BLOCK distribution.
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      nt = (m+np-1)/np
-      nr = max(0,min(nt,m-(iam*nt)))
-    end if
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
 
-    nt = nr
-    call psb_sum(ictxt,nt) 
-    if (nt /= m) then 
-      write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
       call psb_barrier(ictxt)
       call psb_abort(ictxt)
-      return    
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    call psb_cdall(ictxt,desc_a,info,nl=nr)
+      return
+    end select
+
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
     call psb_barrier(ictxt)
     talc = psb_wtime()-t0
 
@@ -218,9 +325,6 @@ contains
     endif
 
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
     ! loop over rows belonging to current process in a block
     ! distribution.
 
@@ -234,13 +338,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
 
diff --git a/test/pargen/psb_s_pde3d.f90 b/test/pargen/psb_s_pde3d.f90
index e8055d7c..68e05806 100644
--- a/test/pargen/psb_s_pde3d.f90
+++ b/test/pargen/psb_s_pde3d.f90
@@ -50,10 +50,12 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+!    into subcubes, each one assigned to a process.
 !
 !
 module psb_s_pde3d_mod
@@ -85,14 +87,15 @@ contains
 
   end function s_null_func_3d
 
-!    
-!
-!  subroutine to allocate and fill in the coefficient matrix and
-!  the rhs. 
-!
+  
+  !
+  !  subroutine to allocate and fill in the coefficient matrix and
+  !  the rhs. 
+  !
   subroutine psb_s_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -120,7 +123,7 @@ contains
     class(psb_s_base_sparse_mat), optional :: amold
     class(psb_s_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -129,9 +132,13 @@ contains
     type(psb_s_coo_sparse_mat)  :: acoo
     type(psb_s_csr_sparse_mat)  :: acsr
     real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_spk_), allocatable :: val(:)
@@ -161,6 +168,17 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.e0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
 
@@ -169,29 +187,121 @@ contains
     nnz = ((n*9)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (present(nrl)) then 
-      nr = nrl
-    else
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
+      if (present(nrl)) then 
+        nr = nrl
+      else
+        !
+        ! Using a simple BLOCK distribution.
+        !
+        nt = (m+np-1)/np
+        nr = max(0,min(nt,m-(iam*nt)))
+      end if
+
+      nt = nr
+      call psb_sum(ictxt,nt) 
+      if (nt /= m) then 
+        write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      ! Using a simple BLOCK distribution.
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      nt = (m+np-1)/np
-      nr = max(0,min(nt,m-(iam*nt)))
-    end if
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
 
-    nt = nr
-    call psb_sum(ictxt,nt) 
-    if (nt /= m) then 
-      write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
       call psb_barrier(ictxt)
       call psb_abort(ictxt)
-      return    
-    end if
+      return
+    end select
 
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    call psb_cdall(ictxt,desc_a,info,nl=nr)
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
@@ -219,8 +329,6 @@ contains
       goto 9999
     endif
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
 
     ! loop over rows belonging to current process in a block
     ! distribution.
@@ -235,18 +343,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
         z = (iz-1)*deltah
diff --git a/test/pargen/tryidxijk.f90 b/test/pargen/tryidxijk.f90
index f7c09465..68404ac4 100644
--- a/test/pargen/tryidxijk.f90
+++ b/test/pargen/tryidxijk.f90
@@ -1,5 +1,5 @@
 program tryyidxijk
-  use psb_base_mod, only : psb_ipk_
+  use psb_util_mod
   implicit none 
 
   integer(psb_ipk_) :: nx,ny,nz, base, i, j, k, idx,npx,npy,npz
@@ -41,109 +41,4 @@ program tryyidxijk
   write(*,*) 'SZZ:',v(1:npz)-v(0:npz-1)
   
   
-contains
-  !
-  ! Given  a global index IDX and the domain size (NX,NY,NZ)
-  ! compute the point coordinates (I,J,K) 
-  ! Optional argument: base 0 or 1, default 1
-  !
-  ! This mapping is equivalent to a loop nesting:
-  !  idx = base
-  !  do i=1,nx
-  !    do j=1,ny
-  !      do k=1,nz
-  !         ijk2idx(i,j,k) = idx
-  !         idx = idx + 1
-  subroutine  idx2ijk(i,j,k,idx,nx,ny,nz,base)
-    use psb_base_mod, only : psb_ipk_
-    implicit none 
-    integer(psb_ipk_), intent(out) :: i,j,k
-    integer(psb_ipk_), intent(in)  :: idx,nx,ny,nz
-    integer(psb_ipk_), intent(in), optional :: base
-    
-    integer(psb_ipk_) :: base_, idx_
-    if (present(base)) then
-      base_  = base
-    else
-      base_ = 1
-    end if
-
-    idx_ = idx - base_
-    
-    k = mod(idx_,nz)         + base_
-    j = mod(idx_/nz,ny)      + base_
-    i = mod(idx_/(ny*nz),nx) + base_
-
-  end subroutine idx2ijk
-  
-  !
-  ! Given  a triple (I,J,K) and  the domain size (NX,NY,NZ)
-  ! compute the global index IDX 
-  ! Optional argument: base 0 or 1, default 1
-  !
-  ! This mapping is equivalent to a loop nesting:
-  !  idx = base
-  !  do i=1,nx
-  !    do j=1,ny
-  !      do k=1,nz
-  !         ijk2idx(i,j,k) = idx
-  !         idx = idx + 1
-  subroutine  ijk2idx(idx,i,j,k,nx,ny,nz,base)
-    use psb_base_mod, only : psb_ipk_
-    implicit none 
-    integer(psb_ipk_), intent(out) :: idx
-    integer(psb_ipk_), intent(in)  :: i,j,k,nx,ny,nz
-    integer(psb_ipk_), intent(in), optional :: base
-    
-    integer(psb_ipk_) :: base_
-    if (present(base)) then
-      base_  = base
-    else
-      base_ = 1
-    end if
-
-    idx = ((i-base_)*nz*ny + (j-base_)*nz + k - base_) + base_
-    
-  end subroutine ijk2idx
-
-  !
-  ! dist1Didx
-  !   Given an index space [base : N-(1-base)] and
-  !   a set of NP processes, split the index base as
-  !   evenly as possible, i.e. difference in size 
-  !   between any two processes is either 0 or 1,
-  !   then return the boundaries in a vector
-  !   such that
-  !     V(P)   : first index owned by process P
-  !     V(P+1) : first index owned by process P+1
-  !
-  subroutine dist1Didx(v,n,np,base)
-    use psb_base_mod, only : psb_ipk_
-    implicit none 
-    integer(psb_ipk_), intent(out) :: v(:)
-    integer(psb_ipk_), intent(in)  :: n, np
-    integer(psb_ipk_), intent(in), optional :: base
-    !
-    integer(psb_ipk_) :: base_, nb, i
-
-    if (present(base)) then
-      base_  = base
-    else
-      base_ = 1
-    end if
-    
-    nb = n/np
-    do i=1,mod(n,np)
-      v(i) = nb + 1
-    end do
-    do i=mod(n,np)+1,np
-      v(i) = nb 
-    end do
-    v(2:np+1)  = v(1:np)
-    v(1) = base_
-    do i=2,np+1
-      v(i) = v(i) + v(i-1)
-    end do
-  end subroutine dist1Didx
-  
 end program tryyidxijk
diff --git a/util/Makefile b/util/Makefile
index f66b79ab..1359a762 100644
--- a/util/Makefile
+++ b/util/Makefile
@@ -7,7 +7,7 @@ MODDIR=../modules
 
 HERE=.
 
-BASEOBJS= psb_blockpart_mod.o psb_metispart_mod.o \
+BASEOBJS= psb_blockpart_mod.o psb_metispart_mod.o psb_partidx_mod.o \
       psb_hbio_mod.o psb_mmio_mod.o psb_mat_dist_mod.o \
       psb_s_mat_dist_mod.o psb_d_mat_dist_mod.o psb_c_mat_dist_mod.o psb_z_mat_dist_mod.o \
       psb_renum_mod.o psb_gps_mod.o
diff --git a/util/psb_partidx_mod.F90 b/util/psb_partidx_mod.F90
new file mode 100644
index 00000000..9b294e8f
--- /dev/null
+++ b/util/psb_partidx_mod.F90
@@ -0,0 +1,259 @@
+!   
+!                Parallel Sparse BLAS  version 3.5
+!      (C) Copyright 2006-2018
+!        Salvatore Filippone    
+!        Alfredo Buttari      
+!   
+!    Redistribution and use in source and binary forms, with or without
+!    modification, are permitted provided that the following conditions
+!    are met:
+!      1. Redistributions of source code must retain the above copyright
+!         notice, this list of conditions and the following disclaimer.
+!      2. Redistributions in binary form must reproduce the above copyright
+!         notice, this list of conditions, and the following disclaimer in the
+!         documentation and/or other materials provided with the distribution.
+!      3. The name of the PSBLAS group or the names of its contributors may
+!         not be used to endorse or promote products derived from this
+!         software without specific written permission.
+!   
+!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+!    POSSIBILITY OF SUCH DAMAGE.
+!   
+!    
+!
+! Purpose: 
+!  Proide  functions to handle a distribution of a general
+!  rectangular 2/3/n-dimensional domain onto
+!  a rectangular 2/3/n-dimensional grid of processes
+!
+!  See test/pargen/psb_X_pdeNd for examples of usage
+!
+module psb_partidx_mod
+  use psb_base_mod, only : psb_ipk_
+
+  interface idx2ijk
+    module procedure idx2ijk3d, idx2ijkv, idx2ijk2d
+  end interface idx2ijk
+
+  interface ijk2idx
+    module procedure ijk2idx3d, ijk2idxv, ijk2idx2d
+  end interface ijk2idx
+
+
+contains
+  !
+  ! Given  a global index IDX and the domain size (NX,NY,NZ)
+  ! compute the point coordinates (I,J,K) 
+  ! Optional argument: base 0 or 1, default 1
+  !
+  ! This mapping is equivalent to a loop nesting:
+  !  idx = base
+  !  do i=1,nx
+  !    do j=1,ny
+  !      do k=1,nz
+  !         ijk2idx(i,j,k) = idx
+  !         idx = idx + 1
+  subroutine  idx2ijk3d(i,j,k,idx,nx,ny,nz,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: i,j,k
+    integer(psb_ipk_), intent(in)  :: idx,nx,ny,nz
+    integer(psb_ipk_), intent(in), optional :: base
+    
+    integer(psb_ipk_) :: coords(3)
+
+    call idx2ijk(coords,idx,[nx,ny,nz],base)
+    
+    k = coords(3)
+    j = coords(2)
+    i = coords(1)
+
+  end subroutine idx2ijk3d
+  
+  subroutine  idx2ijk2d(i,j,idx,nx,ny,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: i,j
+    integer(psb_ipk_), intent(in)  :: idx,nx,ny
+    integer(psb_ipk_), intent(in), optional :: base
+    
+    integer(psb_ipk_) :: coords(2)
+
+    call idx2ijk(coords,idx,[nx,ny],base)
+    
+    j = coords(2)
+    i = coords(1)
+    
+  end subroutine idx2ijk2d
+  
+  !
+  ! Given  a global index IDX and the domain size (NX,NY,NZ)
+  ! compute the point coordinates (I,J,K) 
+  ! Optional argument: base 0 or 1, default 1
+  !
+  ! This mapping is equivalent to a loop nesting:
+  !  idx = base
+  !  do i=1,nx
+  !    do j=1,ny
+  !      do k=1,nz
+  !         ijk2idx(i,j,k) = idx
+  !         idx = idx + 1
+  subroutine  idx2ijkv(coords,idx,dims,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: coords(:)
+    integer(psb_ipk_), intent(in)  :: idx,dims(:)
+    integer(psb_ipk_), intent(in), optional :: base
+
+    integer(psb_ipk_) :: base_, idx_, i, sz
+    if (present(base)) then
+      base_  = base
+    else
+      base_ = 1
+    end if
+
+    idx_ = idx - base_
+
+    if (size(coords) < size(dims)) then
+      write(0,*) 'Error: size mismatch ',size(coords),size(dims)
+      coords = 0
+      return
+    end if
+
+    !
+    ! This code is equivalent to (3D case)
+    ! k = mod(idx_,nz) + base_
+    ! j = mod(idx_/nz,ny) + base_
+    ! i = mod(idx_/(nx*ny),nx) + base_
+    !
+    do i=size(dims),1,-1
+      coords(i) = mod(idx_,dims(i)) + base_
+      idx_ = idx_ / dims(i)
+    end do
+
+  end subroutine idx2ijkv
+  
+  !
+  ! Given  a triple (I,J,K) and  the domain size (NX,NY,NZ)
+  ! compute the global index IDX 
+  ! Optional argument: base 0 or 1, default 1
+  !
+  ! This mapping is equivalent to a loop nesting:
+  !  idx = base
+  !  do i=1,nx
+  !    do j=1,ny
+  !      do k=1,nz
+  !         ijk2idx(i,j,k) = idx
+  !         idx = idx + 1
+  subroutine  ijk2idxv(idx,coords,dims,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(in)  :: coords(:),dims(:)
+    integer(psb_ipk_), intent(out) :: idx
+    integer(psb_ipk_), intent(in), optional :: base
+    
+    integer(psb_ipk_) :: base_, i, sz
+    if (present(base)) then
+      base_  = base
+    else
+      base_ = 1
+    end if
+    sz = size(coords)
+    if (sz /= size(dims)) then
+      write(0,*) 'Error: size mismatch ',size(coords),size(dims)
+      idx = 0
+      return
+    end if
+
+    idx = coords(1) - base_
+    do i=2, sz
+      idx = (idx * dims(i)) + coords(i) - base_
+    end do
+    idx = idx + base_
+    
+  end subroutine ijk2idxv
+  !
+  ! Given  a triple (I,J,K) and  the domain size (NX,NY,NZ)
+  ! compute the global index IDX 
+  ! Optional argument: base 0 or 1, default 1
+  !
+  ! This mapping is equivalent to a loop nesting:
+  !  idx = base
+  !  do i=1,nx
+  !    do j=1,ny
+  !      do k=1,nz
+  !         ijk2idx(i,j,k) = idx
+  !         idx = idx + 1
+  subroutine  ijk2idx3d(idx,i,j,k,nx,ny,nz,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: idx
+    integer(psb_ipk_), intent(in)  :: i,j,k,nx,ny,nz
+    integer(psb_ipk_), intent(in), optional :: base
+    
+    !    idx = ((i-base_)*nz*ny + (j-base_)*nz + k - base_) + base_
+    call ijk2idx(idx,[i,j,k],[nx,ny,nz],base)
+  end subroutine ijk2idx3d
+
+  subroutine  ijk2idx2d(idx,i,j,nx,ny,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: idx
+    integer(psb_ipk_), intent(in)  :: i,j,nx,ny
+    integer(psb_ipk_), intent(in), optional :: base
+    
+    !    idx = ((i-base_)*ny + (j-base_) + base_
+    call ijk2idx(idx,[i,j],[nx,ny],base)
+  end subroutine ijk2idx2d
+
+  !
+  ! dist1Didx
+  !   Given an index space [base : N-(1-base)] and
+  !   a set of NP processes, split the index base as
+  !   evenly as possible, i.e. difference in size 
+  !   between any two processes is either 0 or 1,
+  !   then return the boundaries in a vector
+  !   such that
+  !     V(P)   : first index owned by process P
+  !     V(P+1) : first index owned by process P+1
+  !
+  subroutine dist1Didx(v,n,np,base)
+    use psb_base_mod, only : psb_ipk_
+    implicit none 
+    integer(psb_ipk_), intent(out) :: v(:)
+    integer(psb_ipk_), intent(in)  :: n, np
+    integer(psb_ipk_), intent(in), optional :: base
+    !
+    integer(psb_ipk_) :: base_, nb, i
+
+    if (present(base)) then
+      base_  = base
+    else
+      base_ = 1
+    end if
+    
+    nb = n/np
+    do i=1,mod(n,np)
+      v(i) = nb + 1
+    end do
+    do i=mod(n,np)+1,np
+      v(i) = nb 
+    end do
+    v(2:np+1)  = v(1:np)
+    v(1) = base_
+    do i=2,np+1
+      v(i) = v(i) + v(i-1)
+    end do
+  end subroutine dist1Didx
+  
+end module psb_partidx_mod
+
diff --git a/util/psb_util_mod.f90 b/util/psb_util_mod.f90
index 42f536ee..6743a2d4 100644
--- a/util/psb_util_mod.f90
+++ b/util/psb_util_mod.f90
@@ -34,11 +34,10 @@
 module psb_util_mod
   use psb_blockpart_mod
   use psb_metispart_mod
+  use psb_partidx_mod
   use psb_hbio_mod
   use psb_mmio_mod
   use psb_mat_dist_mod
   use psb_renum_mod
-!!$  use psb_d_genpde_mod
-!!$  use psb_s_genpde_mod
 end module psb_util_mod