From 04a57098d4f86ae57c19cbe765bb8f5ec88b00bb Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Mon, 5 Mar 2018 14:26:26 +0000 Subject: [PATCH] 3D data distribution. --- tests/pdegen/mld_d_pde2d.f90 | 167 ++++++++++++++++++++++++++++------- tests/pdegen/mld_d_pde3d.f90 | 151 ++++++++++++++++++++++++------- tests/pdegen/mld_s_pde2d.f90 | 167 ++++++++++++++++++++++++++++------- tests/pdegen/mld_s_pde3d.f90 | 151 ++++++++++++++++++++++++------- 4 files changed, 500 insertions(+), 136 deletions(-) diff --git a/tests/pdegen/mld_d_pde2d.f90 b/tests/pdegen/mld_d_pde2d.f90 index b48a02e6..0244a922 100644 --- a/tests/pdegen/mld_d_pde2d.f90 +++ b/tests/pdegen/mld_d_pde2d.f90 @@ -56,10 +56,12 @@ ! ! Note that if b1=b2=c=0., the PDE is the Laplace equation. ! -! In this sample program the index space of the discretized -! computational domain is first numbered sequentially in a standard way, -! then the corresponding vector is distributed according to a BLOCK -! data distribution. +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 2D distribution in which the unit square is partitioned +! into rectangles, each one assigned to a process. ! module mld_d_pde2d_mod use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,& @@ -94,8 +96,9 @@ contains ! the rhs. ! subroutine mld_d_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,& - & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl) + & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod + use psb_util_mod ! ! Discretizes the partial differential equation ! @@ -123,7 +126,7 @@ contains class(psb_d_base_sparse_mat), optional :: amold class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold - integer(psb_ipk_), optional :: nrl + integer(psb_ipk_), optional :: partition, nrl,iv(:) ! Local variables. @@ -132,9 +135,13 @@ contains type(psb_d_coo_sparse_mat) :: acoo type(psb_d_csr_sparse_mat) :: acsr real(psb_dpk_) :: zt(nb),x,y,z - integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k + integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner - integer(psb_ipk_) :: np, iam, nr, nt + ! For 2D partition + integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny + integer(psb_ipk_), allocatable :: bndx(:),bndy(:) + ! Process grid + integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: icoeff integer(psb_ipk_), allocatable :: irow(:),icol(:),myidx(:) real(psb_dpk_), allocatable :: val(:) @@ -164,6 +171,17 @@ contains sqdeltah = deltah*deltah deltah2 = 2.e0* deltah + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + ! initialize array descriptor and sparse matrix storage. provide an ! estimate of the number of non zeroes @@ -172,32 +190,121 @@ contains nnz = ((n*7)/(np)) if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n - if (present(nrl)) then - nr = nrl - else + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ictxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + ! - ! Using a simple BLOCK distribution. + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ictxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + ! - nt = (m+np-1)/np - nr = max(0,min(nt,m-(iam*nt))) - end if + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ictxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 2-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,2,npdims,info) + npx = npdims(1) + npy = npdims(2) + + allocate(bndx(0:npx),bndy(0:npy)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iam,npx,npy,base=0) + ! Now let's split the 2D square in rectangles + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + + ! How many indices do I own? + nlr = mynx*myny + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,idim,idim) + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + end if - nt = nr - call psb_sum(ictxt,nt) - if (nt /= m) then - write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ictxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 call psb_barrier(ictxt) call psb_abort(ictxt) - return - end if - call psb_barrier(ictxt) - t0 = psb_wtime() - call psb_cdall(ictxt,desc_a,info,nl=nr) + return + end select + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) + call psb_barrier(ictxt) talc = psb_wtime()-t0 @@ -221,9 +328,6 @@ contains endif - myidx = desc_a%get_global_indices() - nlr = size(myidx) - ! loop over rows belonging to current process in a block ! distribution. @@ -237,13 +341,8 @@ contains ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates - if (mod(glob_row,(idim)) == 0) then - ix = glob_row/(idim) - else - ix = glob_row/(idim)+1 - endif - iy = (glob_row-(ix-1)*idim) - ! x, y + call idx2ijk(ix,iy,glob_row,idim,idim) + ! x, y coordinates x = (ix-1)*deltah y = (iy-1)*deltah diff --git a/tests/pdegen/mld_d_pde3d.f90 b/tests/pdegen/mld_d_pde3d.f90 index 6ae56c19..e179a860 100644 --- a/tests/pdegen/mld_d_pde3d.f90 +++ b/tests/pdegen/mld_d_pde3d.f90 @@ -57,10 +57,12 @@ ! ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. ! -! In this sample program the index space of the discretized -! computational domain is first numbered sequentially in a standard way, -! then the corresponding vector is distributed according to a BLOCK -! data distribution. +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 3D distribution in which the unit cube is partitioned +! into subcubes, each one assigned to a process. ! module mld_d_pde3d_mod use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_desc_type,& @@ -96,8 +98,9 @@ contains ! the rhs. ! subroutine mld_d_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,& - & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv) + & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod + use psb_util_mod ! ! Discretizes the partial differential equation ! @@ -125,7 +128,7 @@ contains class(psb_d_base_sparse_mat), optional :: amold class(psb_d_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold - integer(psb_ipk_), optional :: nrl,iv(:) + integer(psb_ipk_), optional :: partition, nrl,iv(:) ! Local variables. @@ -134,9 +137,13 @@ contains type(psb_d_coo_sparse_mat) :: acoo type(psb_d_csr_sparse_mat) :: acsr real(psb_dpk_) :: zt(nb),x,y,z - integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k + integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner - integer(psb_ipk_) :: np, iam, nr, nt + ! For 3D partition + integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: icoeff integer(psb_ipk_), allocatable :: irow(:),icol(:),myidx(:) real(psb_dpk_), allocatable :: val(:) @@ -166,15 +173,28 @@ contains sqdeltah = deltah*deltah deltah2 = 2.d0* deltah + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + ! initialize array descriptor and sparse matrix storage. provide an ! estimate of the number of non zeroes - + m = idim*idim*idim n = m nnz = ((n*9)/(np)) if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n - if (.not.present(iv)) then + select case(partition_) + case(1) + ! A BLOCK partition if (present(nrl)) then nr = nrl else @@ -194,24 +214,99 @@ contains call psb_abort(ictxt) return end if - else - if (size(iv) /= m) then - write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ictxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ictxt) call psb_abort(ictxt) return end if - end if - call psb_barrier(ictxt) - t0 = psb_wtime() - if (present(iv)) then + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! call psb_cdall(ictxt,desc_a,info,vg=iv) - else - call psb_cdall(ictxt,desc_a,info,nl=nr) - end if + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ictxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end select + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) @@ -239,8 +334,6 @@ contains goto 9999 endif - myidx = desc_a%get_global_indices() - nlr = size(myidx) ! loop over rows belonging to current process in a block ! distribution. @@ -255,18 +348,8 @@ contains ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates - if (mod(glob_row,(idim*idim)) == 0) then - ix = glob_row/(idim*idim) - else - ix = glob_row/(idim*idim)+1 - endif - if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then - iy = (glob_row-(ix-1)*idim*idim)/idim - else - iy = (glob_row-(ix-1)*idim*idim)/idim+1 - endif - iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim - ! x, y, x coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates x = (ix-1)*deltah y = (iy-1)*deltah z = (iz-1)*deltah diff --git a/tests/pdegen/mld_s_pde2d.f90 b/tests/pdegen/mld_s_pde2d.f90 index 604802cd..98f36e3b 100644 --- a/tests/pdegen/mld_s_pde2d.f90 +++ b/tests/pdegen/mld_s_pde2d.f90 @@ -56,10 +56,12 @@ ! ! Note that if b1=b2=c=0., the PDE is the Laplace equation. ! -! In this sample program the index space of the discretized -! computational domain is first numbered sequentially in a standard way, -! then the corresponding vector is distributed according to a BLOCK -! data distribution. +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 2D distribution in which the unit square is partitioned +! into rectangles, each one assigned to a process. ! module mld_s_pde2d_mod use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,& @@ -94,8 +96,9 @@ contains ! the rhs. ! subroutine mld_s_gen_pde2d(ictxt,idim,a,bv,xv,desc_a,afmt,& - & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,nrl) + & a1,a2,b1,b2,c,g,info,f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod + use psb_util_mod ! ! Discretizes the partial differential equation ! @@ -123,7 +126,7 @@ contains class(psb_s_base_sparse_mat), optional :: amold class(psb_s_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold - integer(psb_ipk_), optional :: nrl + integer(psb_ipk_), optional :: partition, nrl,iv(:) ! Local variables. @@ -132,9 +135,13 @@ contains type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat) :: acsr real(psb_spk_) :: zt(nb),x,y,z - integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k + integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner - integer(psb_ipk_) :: np, iam, nr, nt + ! For 2D partition + integer(psb_ipk_) :: npx,npy,npdims(2),iamx,iamy,mynx,myny + integer(psb_ipk_), allocatable :: bndx(:),bndy(:) + ! Process grid + integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: icoeff integer(psb_ipk_), allocatable :: irow(:),icol(:),myidx(:) real(psb_spk_), allocatable :: val(:) @@ -164,6 +171,17 @@ contains sqdeltah = deltah*deltah deltah2 = 2.e0* deltah + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + ! initialize array descriptor and sparse matrix storage. provide an ! estimate of the number of non zeroes @@ -172,32 +190,121 @@ contains nnz = ((n*7)/(np)) if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n - if (present(nrl)) then - nr = nrl - else + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ictxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + ! - ! Using a simple BLOCK distribution. + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ictxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + ! - nt = (m+np-1)/np - nr = max(0,min(nt,m-(iam*nt))) - end if + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ictxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 2-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,2,npdims,info) + npx = npdims(1) + npy = npdims(2) + + allocate(bndx(0:npx),bndy(0:npy)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iam,npx,npy,base=0) + ! Now let's split the 2D square in rectangles + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + + ! How many indices do I own? + nlr = mynx*myny + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,idim,idim) + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + end if - nt = nr - call psb_sum(ictxt,nt) - if (nt /= m) then - write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ictxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' info = -1 call psb_barrier(ictxt) call psb_abort(ictxt) - return - end if - call psb_barrier(ictxt) - t0 = psb_wtime() - call psb_cdall(ictxt,desc_a,info,nl=nr) + return + end select + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) if (info == psb_success_) call psb_geall(bv,desc_a,info) + call psb_barrier(ictxt) talc = psb_wtime()-t0 @@ -221,9 +328,6 @@ contains endif - myidx = desc_a%get_global_indices() - nlr = size(myidx) - ! loop over rows belonging to current process in a block ! distribution. @@ -237,13 +341,8 @@ contains ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates - if (mod(glob_row,(idim)) == 0) then - ix = glob_row/(idim) - else - ix = glob_row/(idim)+1 - endif - iy = (glob_row-(ix-1)*idim) - ! x, y + call idx2ijk(ix,iy,glob_row,idim,idim) + ! x, y coordinates x = (ix-1)*deltah y = (iy-1)*deltah diff --git a/tests/pdegen/mld_s_pde3d.f90 b/tests/pdegen/mld_s_pde3d.f90 index 210a6ce3..a8865ddd 100644 --- a/tests/pdegen/mld_s_pde3d.f90 +++ b/tests/pdegen/mld_s_pde3d.f90 @@ -57,10 +57,12 @@ ! ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. ! -! In this sample program the index space of the discretized -! computational domain is first numbered sequentially in a standard way, -! then the corresponding vector is distributed according to a BLOCK -! data distribution. +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 3D distribution in which the unit cube is partitioned +! into subcubes, each one assigned to a process. ! module mld_s_pde3d_mod use psb_base_mod, only : psb_spk_, psb_ipk_, psb_desc_type,& @@ -96,8 +98,9 @@ contains ! the rhs. ! subroutine mld_s_gen_pde3d(ictxt,idim,a,bv,xv,desc_a,afmt,& - & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,nrl,iv) + & a1,a2,a3,b1,b2,b3,c,g,info,f,amold,vmold,imold,partition,nrl,iv) use psb_base_mod + use psb_util_mod ! ! Discretizes the partial differential equation ! @@ -125,7 +128,7 @@ contains class(psb_s_base_sparse_mat), optional :: amold class(psb_s_base_vect_type), optional :: vmold class(psb_i_base_vect_type), optional :: imold - integer(psb_ipk_), optional :: nrl,iv(:) + integer(psb_ipk_), optional :: partition, nrl,iv(:) ! Local variables. @@ -134,9 +137,13 @@ contains type(psb_s_coo_sparse_mat) :: acoo type(psb_s_csr_sparse_mat) :: acsr real(psb_spk_) :: zt(nb),x,y,z - integer(psb_ipk_) :: m,n,nnz,glob_row,nlr,i,ii,ib,k + integer(psb_ipk_) :: m,n,nnz,nr,nt,glob_row,nlr,i,j,ii,ib,k, partition_ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner - integer(psb_ipk_) :: np, iam, nr, nt + ! For 3D partition + integer(psb_ipk_) :: npx,npy,npz, npdims(3),iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam integer(psb_ipk_) :: icoeff integer(psb_ipk_), allocatable :: irow(:),icol(:),myidx(:) real(psb_spk_), allocatable :: val(:) @@ -166,15 +173,28 @@ contains sqdeltah = deltah*deltah deltah2 = 2.d0* deltah + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + ! initialize array descriptor and sparse matrix storage. provide an ! estimate of the number of non zeroes - + m = idim*idim*idim n = m nnz = ((n*9)/(np)) if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n - if (.not.present(iv)) then + select case(partition_) + case(1) + ! A BLOCK partition if (present(nrl)) then nr = nrl else @@ -194,24 +214,99 @@ contains call psb_abort(ictxt) return end if - else - if (size(iv) /= m) then - write(psb_err_unit,*) iam, 'Initialization error IV',size(iv),m + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ictxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' info = -1 call psb_barrier(ictxt) call psb_abort(ictxt) return end if - end if - call psb_barrier(ictxt) - t0 = psb_wtime() - if (present(iv)) then + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! call psb_cdall(ictxt,desc_a,info,vg=iv) - else - call psb_cdall(ictxt,desc_a,info,nl=nr) - end if + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ictxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ictxt) + call psb_abort(ictxt) + return + end select + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz) ! define rhs from boundary conditions; also build initial guess if (info == psb_success_) call psb_geall(xv,desc_a,info) @@ -239,8 +334,6 @@ contains goto 9999 endif - myidx = desc_a%get_global_indices() - nlr = size(myidx) ! loop over rows belonging to current process in a block ! distribution. @@ -255,18 +348,8 @@ contains ! local matrix pointer glob_row=myidx(i) ! compute gridpoint coordinates - if (mod(glob_row,(idim*idim)) == 0) then - ix = glob_row/(idim*idim) - else - ix = glob_row/(idim*idim)+1 - endif - if (mod((glob_row-(ix-1)*idim*idim),idim) == 0) then - iy = (glob_row-(ix-1)*idim*idim)/idim - else - iy = (glob_row-(ix-1)*idim*idim)/idim+1 - endif - iz = glob_row-(ix-1)*idim*idim-(iy-1)*idim - ! x, y, x coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates x = (ix-1)*deltah y = (iy-1)*deltah z = (iz-1)*deltah