From b76fb7cd69c2a1ed7758b7c3631700edfee35d68 Mon Sep 17 00:00:00 2001
From: Salvatore Filippone <salvatore.filippone@cranfield.ac.uk>
Date: Sun, 18 Feb 2018 12:28:08 +0000
Subject: [PATCH] Put in new cartesian data partition from PSBLAS. Minor typo
 in Makefile.

---
 Makefile                     |   3 +-
 tests/pdegen/mld_d_pde2d.f90 | 167 ++++++++++++++++++++++++++++-------
 tests/pdegen/mld_d_pde3d.f90 | 151 ++++++++++++++++++++++++-------
 tests/pdegen/mld_s_pde2d.f90 | 167 ++++++++++++++++++++++++++++-------
 tests/pdegen/mld_s_pde3d.f90 | 151 ++++++++++++++++++++++++-------
 5 files changed, 502 insertions(+), 137 deletions(-)

diff --git a/Makefile b/Makefile
index eb496629..f72d29d0 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,8 @@ install: all
 	   /bin/cp -fr docs/*pdf docs/html $(INSTALL_DOCSDIR)
 	$(SHELL) ./mkdir.sh  $(INSTALL_DOCSDIR) && \
 	   $(INSTALL_DATA) README LICENSE $(INSTALL_DOCSDIR)
-	$(SHELL) ./mkdir.sh  $(INSTALL_SAMPLESDIR) && ./mkdir.sh  $(INSTALL_SAMPLESDIR)/simple &&\
+	$(SHELL) ./mkdir.sh  $(INSTALL_SAMPLESDIR) && \
+		 ./mkdir.sh  $(INSTALL_SAMPLESDIR)/simple &&\
 	 	 ./mkdir.sh  $(INSTALL_SAMPLESDIR)/advanced && \
 		(cd examples; /bin/cp -fr pdegen fileread $(INSTALL_SAMPLESDIR)/simple ) && \
 		(cd tests; /bin/cp -fr pdegen fileread $(INSTALL_SAMPLESDIR)/advanced )
diff --git a/tests/pdegen/mld_d_pde2d.f90 b/tests/pdegen/mld_d_pde2d.f90
index 67be3884..15ccd34a 100644
--- a/tests/pdegen/mld_d_pde2d.f90
+++ b/tests/pdegen/mld_d_pde2d.f90
@@ -56,10 +56,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module mld_d_pde2d_mod
   use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,&
@@ -94,8 +96,9 @@ contains
   !  the rhs. 
   !
   subroutine mld_d_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod    
     !
     !   Discretizes the partial differential equation
     ! 
@@ -123,7 +126,7 @@ contains
     class(psb_d_base_sparse_mat), optional :: amold
     class(psb_d_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -132,9 +135,13 @@ contains
     type(psb_d_coo_sparse_mat)  :: acoo
     type(psb_d_csr_sparse_mat)  :: acsr
     real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_dpk_), allocatable :: val(:)
@@ -164,6 +171,17 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.e0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
 
@@ -172,32 +190,121 @@ contains
     nnz = ((n*7)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (present(nrl)) then 
-      nr = nrl
-    else
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
+      if (present(nrl)) then 
+        nr = nrl
+      else
+        !
+        ! Using a simple BLOCK distribution.
+        !
+        nt = (m+np-1)/np
+        nr = max(0,min(nt,m-(iam*nt)))
+      end if
+
+      nt = nr
+      call psb_sum(ictxt,nt) 
+      if (nt /= m) then 
+        write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      ! Using a simple BLOCK distribution.
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      nt = (m+np-1)/np
-      nr = max(0,min(nt,m-(iam*nt)))
-    end if
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
 
-    nt = nr
-    call psb_sum(ictxt,nt) 
-    if (nt /= m) then 
-      write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
       call psb_barrier(ictxt)
       call psb_abort(ictxt)
-      return    
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    call psb_cdall(ictxt,desc_a,info,nl=nr)
+      return
+    end select
+
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
     call psb_barrier(ictxt)
     talc = psb_wtime()-t0
 
@@ -221,9 +328,6 @@ contains
     endif
 
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
     ! loop over rows belonging to current process in a block
     ! distribution.
 
@@ -237,13 +341,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
 
diff --git a/tests/pdegen/mld_d_pde3d.f90 b/tests/pdegen/mld_d_pde3d.f90
index 63cb0ac3..7f89d06c 100644
--- a/tests/pdegen/mld_d_pde3d.f90
+++ b/tests/pdegen/mld_d_pde3d.f90
@@ -57,10 +57,12 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+!    into subcubes, each one assigned to a process.
 !
 module mld_d_pde3d_mod
   use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,&
@@ -96,8 +98,9 @@ contains
   !  the rhs. 
   !
   subroutine mld_d_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -125,7 +128,7 @@ contains
     class(psb_d_base_sparse_mat), optional :: amold
     class(psb_d_base_vect_type), optional :: vmold 
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl,iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -134,9 +137,13 @@ contains
     type(psb_d_coo_sparse_mat)  :: acoo
     type(psb_d_csr_sparse_mat)  :: acsr
     real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_dpk_), allocatable :: val(:)
@@ -166,15 +173,28 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.d0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
-
+    
     m   = idim*idim*idim
     n   = m
     nnz = ((n*9)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
       if (present(nrl)) then 
         nr = nrl
       else
@@ -194,24 +214,99 @@ contains
         call psb_abort(ictxt)
         return    
       end if
-    else
-      if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ictxt)
         call psb_abort(ictxt)
         return    
       end if
 
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
       call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
-    end if
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
+
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
 
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
@@ -239,8 +334,6 @@ contains
       goto 9999
     endif
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
 
     ! loop over rows belonging to current process in a block
     ! distribution.
@@ -255,18 +348,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
         z = (iz-1)*deltah
diff --git a/tests/pdegen/mld_s_pde2d.f90 b/tests/pdegen/mld_s_pde2d.f90
index 01bd11d7..63d82cd3 100644
--- a/tests/pdegen/mld_s_pde2d.f90
+++ b/tests/pdegen/mld_s_pde2d.f90
@@ -56,10 +56,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module mld_s_pde2d_mod
   use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,&
@@ -94,8 +96,9 @@ contains
   !  the rhs. 
   !
   subroutine mld_s_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod    
     !
     !   Discretizes the partial differential equation
     ! 
@@ -123,7 +126,7 @@ contains
     class(psb_s_base_sparse_mat), optional :: amold
     class(psb_s_base_vect_type), optional :: vmold
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -132,9 +135,13 @@ contains
     type(psb_s_coo_sparse_mat)  :: acoo
     type(psb_s_csr_sparse_mat)  :: acsr
     real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_spk_), allocatable :: val(:)
@@ -164,6 +171,17 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.e0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
 
@@ -172,32 +190,121 @@ contains
     nnz = ((n*7)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (present(nrl)) then 
-      nr = nrl
-    else
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
+      if (present(nrl)) then 
+        nr = nrl
+      else
+        !
+        ! Using a simple BLOCK distribution.
+        !
+        nt = (m+np-1)/np
+        nr = max(0,min(nt,m-(iam*nt)))
+      end if
+
+      nt = nr
+      call psb_sum(ictxt,nt) 
+      if (nt /= m) then 
+        write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      ! Using a simple BLOCK distribution.
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
       !
-      nt = (m+np-1)/np
-      nr = max(0,min(nt,m-(iam*nt)))
-    end if
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
 
-    nt = nr
-    call psb_sum(ictxt,nt) 
-    if (nt /= m) then 
-      write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
       info = -1
       call psb_barrier(ictxt)
       call psb_abort(ictxt)
-      return    
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    call psb_cdall(ictxt,desc_a,info,nl=nr)
+      return
+    end select
+
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
     if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
     call psb_barrier(ictxt)
     talc = psb_wtime()-t0
 
@@ -221,9 +328,6 @@ contains
     endif
 
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
     ! loop over rows belonging to current process in a block
     ! distribution.
 
@@ -237,13 +341,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
 
diff --git a/tests/pdegen/mld_s_pde3d.f90 b/tests/pdegen/mld_s_pde3d.f90
index 6c7384d9..21d9d1d0 100644
--- a/tests/pdegen/mld_s_pde3d.f90
+++ b/tests/pdegen/mld_s_pde3d.f90
@@ -57,10 +57,12 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+!    into subcubes, each one assigned to a process.
 !
 module mld_s_pde3d_mod
   use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,&
@@ -96,8 +98,9 @@ contains
   !  the rhs. 
   !
   subroutine mld_s_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
     use psb_base_mod
+    use psb_util_mod
     !
     !   Discretizes the partial differential equation
     ! 
@@ -125,7 +128,7 @@ contains
     class(psb_s_base_sparse_mat), optional :: amold
     class(psb_s_base_vect_type), optional :: vmold 
     class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl,iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)
 
     ! Local variables.
 
@@ -134,9 +137,13 @@ contains
     type(psb_s_coo_sparse_mat)  :: acoo
     type(psb_s_csr_sparse_mat)  :: acsr
     real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
     integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
     integer(psb_ipk_) :: icoeff
     integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
     real(psb_spk_), allocatable :: val(:)
@@ -166,15 +173,28 @@ contains
     sqdeltah = deltah*deltah
     deltah2  = 2.d0* deltah
 
+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
     ! initialize array descriptor and sparse matrix storage. provide an
     ! estimate of the number of non zeroes 
-
+    
     m   = idim*idim*idim
     n   = m
     nnz = ((n*9)/(np))
     if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
 
-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
       if (present(nrl)) then 
         nr = nrl
       else
@@ -194,24 +214,99 @@ contains
         call psb_abort(ictxt)
         return    
       end if
-    else
-      if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
         info = -1
         call psb_barrier(ictxt)
         call psb_abort(ictxt)
         return    
       end if
 
-    end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
       call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
-    end if
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
+
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
 
+    
     if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
     ! define  rhs from boundary conditions; also build initial guess 
     if (info == psb_success_) call psb_geall(xv,desc_a,info)
@@ -239,8 +334,6 @@ contains
       goto 9999
     endif
 
-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
 
     ! loop over rows belonging to current process in a block
     ! distribution.
@@ -255,18 +348,8 @@ contains
         ! local matrix pointer 
         glob_row=myidx(i)
         ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
         x = (ix-1)*deltah
         y = (iy-1)*deltah
         z = (iz-1)*deltah