Put in new cartesian data partition from PSBLAS.

Minor typo in Makefile.
7 years ago · b76fb7cd69
parent fa1b5dcdea
commit b76fb7cd69
5 changed files with 502 additions and 137 deletions
--- a/3
+++ b/3
@ -30,7 +30,8 @@ install: all
 	   /bin/cp -fr docs/*pdf docs/html $(INSTALL_DOCSDIR)
 	$(SHELL) ./mkdir.sh  $(INSTALL_DOCSDIR) && \
 	   $(INSTALL_DATA) README LICENSE $(INSTALL_DOCSDIR)
-	$(SHELL) ./mkdir.sh  $(INSTALL_SAMPLESDIR) && ./mkdir.sh  $(INSTALL_SAMPLESDIR)/simple &&\
+	$(SHELL) ./mkdir.sh  $(INSTALL_SAMPLESDIR) && \
+		 ./mkdir.sh  $(INSTALL_SAMPLESDIR)/simple &&\
 	 	 ./mkdir.sh  $(INSTALL_SAMPLESDIR)/advanced && \
 		(cd examples; /bin/cp -fr pdegen fileread $(INSTALL_SAMPLESDIR)/simple ) && \
 		(cd tests; /bin/cp -fr pdegen fileread $(INSTALL_SAMPLESDIR)/advanced )
--- a/tests/pdegen/mld_d_pde2d.f90
+++ b/tests/pdegen/mld_d_pde2d.f90
@ -56,10 +56,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module mld_d_pde2d_mod
  use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,&
@ -94,8 +96,9 @@ contains
  !  the rhs. 
  !
  subroutine mld_d_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
    use psb_base_mod
+    use psb_util_mod    
    !
    !   Discretizes the partial differential equation
    ! 
@ -123,7 +126,7 @@ contains
    class(psb_d_base_sparse_mat), optional :: amold
    class(psb_d_base_vect_type), optional :: vmold
    class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)

    ! Local variables.

@ -132,9 +135,13 @@ contains
    type(psb_d_coo_sparse_mat)  :: acoo
    type(psb_d_csr_sparse_mat)  :: acsr
    real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
    integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
    integer(psb_ipk_) :: icoeff
    integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
    real(psb_dpk_), allocatable :: val(:)
@ -164,6 +171,17 @@ contains
    sqdeltah = deltah*deltah
    deltah2  = 2.e0* deltah

+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
    ! initialize array descriptor and sparse matrix storage. provide an
    ! estimate of the number of non zeroes 

@ -172,6 +190,9 @@ contains
    nnz = ((n*7)/(np))
    if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n

+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
      if (present(nrl)) then 
        nr = nrl
      else
@ -191,13 +212,99 @@ contains
        call psb_abort(ictxt)
        return    
      end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
+
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
    if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
    ! define  rhs from boundary conditions; also build initial guess 
    if (info == psb_success_) call psb_geall(xv,desc_a,info)
    if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
    call psb_barrier(ictxt)
    talc = psb_wtime()-t0

@ -221,9 +328,6 @@ contains
    endif


-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
    ! loop over rows belonging to current process in a block
    ! distribution.

@ -237,13 +341,8 @@ contains
        ! local matrix pointer 
        glob_row=myidx(i)
        ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
        x = (ix-1)*deltah
        y = (iy-1)*deltah

--- a/tests/pdegen/mld_d_pde3d.f90
+++ b/tests/pdegen/mld_d_pde3d.f90
@ -57,10 +57,12 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+!    into subcubes, each one assigned to a process.
 !
 module mld_d_pde3d_mod
  use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,&
@ -96,8 +98,9 @@ contains
  !  the rhs. 
  !
  subroutine mld_d_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
    use psb_base_mod
+    use psb_util_mod
    !
    !   Discretizes the partial differential equation
    ! 
@ -125,7 +128,7 @@ contains
    class(psb_d_base_sparse_mat), optional :: amold
    class(psb_d_base_vect_type), optional :: vmold 
    class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl,iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)

    ! Local variables.

@ -134,9 +137,13 @@ contains
    type(psb_d_coo_sparse_mat)  :: acoo
    type(psb_d_csr_sparse_mat)  :: acsr
    real(psb_dpk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
    integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
    integer(psb_ipk_) :: icoeff
    integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
    real(psb_dpk_), allocatable :: val(:)
@ -166,6 +173,17 @@ contains
    sqdeltah = deltah*deltah
    deltah2  = 2.d0* deltah

+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
    ! initialize array descriptor and sparse matrix storage. provide an
    ! estimate of the number of non zeroes 
    
@ -174,7 +192,9 @@ contains
    nnz = ((n*9)/(np))
    if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n

-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
      if (present(nrl)) then 
        nr = nrl
      else
@ -194,24 +214,99 @@ contains
        call psb_abort(ictxt)
        return    
      end if
-    else
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
        if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
          info = -1
          call psb_barrier(ictxt)
          call psb_abort(ictxt)
          return    
        end if
-
-    end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
        call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+        call psb_abort(ictxt)
+        return    
+      end if
+
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
      call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
      end if

+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
    if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
    ! define  rhs from boundary conditions; also build initial guess 
    if (info == psb_success_) call psb_geall(xv,desc_a,info)
@ -239,8 +334,6 @@ contains
      goto 9999
    endif

-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)

    ! loop over rows belonging to current process in a block
    ! distribution.
@ -255,18 +348,8 @@ contains
        ! local matrix pointer 
        glob_row=myidx(i)
        ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
        x = (ix-1)*deltah
        y = (iy-1)*deltah
        z = (iz-1)*deltah
--- a/tests/pdegen/mld_s_pde2d.f90
+++ b/tests/pdegen/mld_s_pde2d.f90
@ -56,10 +56,12 @@
 !
 ! Note that if b1=b2=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 2D distribution in which the unit square is partitioned
+!    into rectangles, each one assigned to a process.
 !
 module mld_s_pde2d_mod
  use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,&
@ -94,8 +96,9 @@ contains
  !  the rhs. 
  !
  subroutine mld_s_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl)
+       & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
    use psb_base_mod
+    use psb_util_mod    
    !
    !   Discretizes the partial differential equation
    ! 
@ -123,7 +126,7 @@ contains
    class(psb_s_base_sparse_mat), optional :: amold
    class(psb_s_base_vect_type), optional :: vmold
    class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)

    ! Local variables.

@ -132,9 +135,13 @@ contains
    type(psb_s_coo_sparse_mat)  :: acoo
    type(psb_s_csr_sparse_mat)  :: acsr
    real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
    integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 2D partition
+    integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
    integer(psb_ipk_) :: icoeff
    integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
    real(psb_spk_), allocatable :: val(:)
@ -164,6 +171,17 @@ contains
    sqdeltah = deltah*deltah
    deltah2  = 2.e0* deltah

+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
    ! initialize array descriptor and sparse matrix storage. provide an
    ! estimate of the number of non zeroes 

@ -172,6 +190,9 @@ contains
    nnz = ((n*7)/(np))
    if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n

+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
      if (present(nrl)) then 
        nr = nrl
      else
@ -191,13 +212,99 @@ contains
        call psb_abort(ictxt)
        return    
      end if
-    call psb_barrier(ictxt)
-    t0 = psb_wtime()
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
+        if (size(iv) /= m) then
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+          info = -1
+          call psb_barrier(ictxt)
+          call psb_abort(ictxt)
+          return    
+        end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+        return    
+      end if
+
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vg=iv)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 2-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,2,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+
+      allocate(bndx(0:npx),bndy(0:npy))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iam,npx,npy,base=0)
+      ! Now let's split the 2D square in rectangles
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          nr = nr + 1
+          call ijk2idx(myidx(nr),i,j,idim,idim)
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
+      end if
+
+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
    if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
    ! define  rhs from boundary conditions; also build initial guess 
    if (info == psb_success_) call psb_geall(xv,desc_a,info)
    if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
    call psb_barrier(ictxt)
    talc = psb_wtime()-t0

@ -221,9 +328,6 @@ contains
    endif


-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)
-
    ! loop over rows belonging to current process in a block
    ! distribution.

@ -237,13 +341,8 @@ contains
        ! local matrix pointer 
        glob_row=myidx(i)
        ! compute gridpoint coordinates
-        if (mod(glob_row,(idim)) == 0) then
-          ix = glob_row/(idim)
-        else
-          ix = glob_row/(idim)+1
-        endif
-        iy = (glob_row-(ix-1)*idim)
-        ! x, y
+        call idx2ijk(ix,iy,glob_row,idim,idim)
+        ! x, y coordinates
        x = (ix-1)*deltah
        y = (iy-1)*deltah

--- a/tests/pdegen/mld_s_pde3d.f90
+++ b/tests/pdegen/mld_s_pde3d.f90
@ -57,10 +57,12 @@
 !
 ! Note that if b1=b2=b3=c=0., the PDE is the  Laplace equation.
 !
-! In this sample program the index space of the discretized
-! computational domain is first numbered sequentially in a standard way, 
-! then the corresponding vector is distributed according to a BLOCK
-! data distribution.
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+!    typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+!    into subcubes, each one assigned to a process.
 !
 module mld_s_pde3d_mod
  use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,&
@ -96,8 +98,9 @@ contains
  !  the rhs. 
  !
  subroutine mld_s_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,&
-       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv)
+       & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv)
    use psb_base_mod
+    use psb_util_mod
    !
    !   Discretizes the partial differential equation
    ! 
@ -125,7 +128,7 @@ contains
    class(psb_s_base_sparse_mat), optional :: amold
    class(psb_s_base_vect_type), optional :: vmold 
    class(psb_i_base_vect_type), optional :: imold
-    integer(psb_ipk_), optional :: nrl,iv(:)
+    integer(psb_ipk_), optional :: partition, nrl,iv(:)

    ! Local variables.

@ -134,9 +137,13 @@ contains
    type(psb_s_coo_sparse_mat)  :: acoo
    type(psb_s_csr_sparse_mat)  :: acsr
    real(psb_spk_)           :: zt(nb),x,y,z
-    integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k
+    integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_
    integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
-    integer(psb_ipk_) :: np, iam, nr, nt
+    ! For 3D partition
+    integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz
+    integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+    ! Process grid
+    integer(psb_ipk_) :: np, iam
    integer(psb_ipk_) :: icoeff
    integer(psb_ipk_), allocatable     :: irow(:),icol(:),myidx(:)
    real(psb_spk_), allocatable :: val(:)
@ -166,6 +173,17 @@ contains
    sqdeltah = deltah*deltah
    deltah2  = 2.d0* deltah

+    if (present(partition)) then
+      if ((1<= partition).and.(partition <= 3)) then
+        partition_ = partition
+      else
+        write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+        partition_ = 3
+      end if
+    else
+      partition_ = 3
+    end if
+    
    ! initialize array descriptor and sparse matrix storage. provide an
    ! estimate of the number of non zeroes 
    
@ -174,7 +192,9 @@ contains
    nnz = ((n*9)/(np))
    if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n

-    if (.not.present(iv)) then 
+    select case(partition_)
+    case(1)
+      ! A BLOCK partition 
      if (present(nrl)) then 
        nr = nrl
      else
@ -194,24 +214,99 @@ contains
        call psb_abort(ictxt)
        return    
      end if
-    else
+
+      !
+      ! First example  of use of CDALL: specify for each process a number of
+      ! contiguous rows
+      ! 
+      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(2)
+      ! A  partition  defined by the user through IV
+      
+      if (present(iv)) then 
        if (size(iv) /= m) then
-        write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m
+          write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
          info = -1
          call psb_barrier(ictxt)
          call psb_abort(ictxt)
          return    
        end if
-
-    end if
+      else
+        write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+        info = -1
        call psb_barrier(ictxt)
-    t0 = psb_wtime()
-    if (present(iv)) then 
+        call psb_abort(ictxt)
+        return    
+      end if
+
+      !
+      ! Second example  of use of CDALL: specify for each row the
+      ! process that owns it 
+      ! 
      call psb_cdall(ictxt,desc_a,info,vg=iv)
-    else
-      call psb_cdall(ictxt,desc_a,info,nl=nr)
+      myidx = desc_a%get_global_indices()
+      nlr = size(myidx)
+
+    case(3)
+      ! A 3-dimensional partition
+
+      ! A nifty MPI function will split the process list
+      npdims = 0
+      call mpi_dims_create(np,3,npdims,info)
+      npx = npdims(1)
+      npy = npdims(2)
+      npz = npdims(3)
+
+      allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+      ! We can reuse idx2ijk for process indices as well. 
+      call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+      ! Now let's split the 3D cube in hexahedra
+      call dist1Didx(bndx,idim,npx)
+      mynx = bndx(iamx+1)-bndx(iamx)
+      call dist1Didx(bndy,idim,npy)
+      myny = bndy(iamy+1)-bndy(iamy)
+      call dist1Didx(bndz,idim,npz)
+      mynz = bndz(iamz+1)-bndz(iamz)
+
+      ! How many indices do I own? 
+      nlr = mynx*myny*mynz
+      allocate(myidx(nlr))
+      ! Now, let's generate the list of indices I own
+      nr = 0
+      do i=bndx(iamx),bndx(iamx+1)-1
+        do j=bndy(iamy),bndx(iamy+1)-1
+          do k=bndz(iamz),bndz(iamz+1)-1
+            nr = nr + 1
+            call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+          end do
+        end do
+      end do
+      if (nr /= nlr) then
+        write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+             & nr,nlr,mynx,myny,mynz
+        info = -1
+        call psb_barrier(ictxt)
+        call psb_abort(ictxt)
      end if

+      !
+      ! Third example  of use of CDALL: specify for each process
+      ! the set of global indices it owns.
+      ! 
+      call psb_cdall(ictxt,desc_a,info,vl=myidx)
+      
+    case default
+      write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+      info = -1
+      call psb_barrier(ictxt)
+      call psb_abort(ictxt)
+      return
+    end select
+
+    
    if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz)
    ! define  rhs from boundary conditions; also build initial guess 
    if (info == psb_success_) call psb_geall(xv,desc_a,info)
@ -239,8 +334,6 @@ contains
      goto 9999
    endif

-    myidx = desc_a%get_global_indices()
-    nlr = size(myidx)

    ! loop over rows belonging to current process in a block
    ! distribution.
@ -255,18 +348,8 @@ contains
        ! local matrix pointer 
        glob_row=myidx(i)
        ! compute gridpoint coordinates
-        if (mod(glob_row,(idim*idim)) == 0) then
-          ix = glob_row/(idim*idim)
-        else
-          ix = glob_row/(idim*idim)+1
-        endif
-        if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then
-          iy = (glob_row-(ix-1)*idim*idim)/idim
-        else
-          iy = (glob_row-(ix-1)*idim*idim)/idim+1
-        endif
-        iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim
-        ! x, y, x coordinates
+        call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+        ! x, y, z coordinates
        x = (ix-1)*deltah
        y = (iy-1)*deltah
        z = (iz-1)*deltah