!   
!                Parallel Sparse BLAS  version 3.5
!      (C) Copyright 2006-2018
!        Salvatore Filippone    
!        Alfredo Buttari      
!   
!    Redistribution and use in source and binary forms, with or without
!    modification, are permitted provided that the following conditions
!    are met:
!      1. Redistributions of source code must retain the above copyright
!         notice, this list of conditions and the following disclaimer.
!      2. Redistributions in binary form must reproduce the above copyright
!         notice, this list of conditions, and the following disclaimer in the
!         documentation and/or other materials provided with the distribution.
!      3. The name of the PSBLAS group or the names of its contributors may
!         not be used to endorse or promote products derived from this
!         software without specific written permission.
!   
!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
!    POSSIBILITY OF SUCH DAMAGE.
!   
!    
subroutine psb_cmatdist(a_glob, a, ictxt, desc_a,&
     & info, parts, vg, vsz, inroot,fmt,mold)
  !
  ! an utility subroutine to distribute a matrix among processors
  ! according to a user defined data distribution, using
  ! sparse matrix subroutines.
  !
  !  type(psb_cspmat)                       :: a_glob
  !     on entry: this contains the global sparse matrix as follows:
  !
  !  type(psb_cspmat_type)                            :: a
  !     on exit : this will contain the local sparse matrix.
  !
  !       interface parts
  !         !   .....user passed subroutine.....
  !         subroutine parts(global_indx,n,np,pv,nv)
  !           implicit none
  !           integer(psb_ipk_), intent(in)  :: global_indx, n, np
  !           integer(psb_ipk_), intent(out) :: nv
  !           integer(psb_ipk_), intent(out) :: pv(*)
  !
  !       end subroutine parts
  !       end interface
  !     on entry:  subroutine providing user defined data distribution.
  !        for each global_indx the subroutine should return
  !        the list  pv of all processes owning the row with
  !        that index; the list will contain nv entries.
  !        usually nv=1; if nv >1 then we have an overlap in the data
  !        distribution.
  !
  !  integer(psb_ipk_) :: ictxt
  !     on entry: the PSBLAS parallel environment context.
  !
  !  type (desc_type)                  :: desc_a
  !     on exit : the updated array descriptor
  !
  !  integer(psb_ipk_), optional    :: inroot
  !     on entry: specifies processor holding a_glob. default: 0
  !     on exit : unchanged.
  !
  use psb_base_mod
  use psb_mat_mod
  implicit none

  ! parameters
  type(psb_cspmat_type)      :: a_glob
  integer(psb_ipk_) :: ictxt
  type(psb_cspmat_type)      :: a
  type(psb_desc_type)        :: desc_a
  integer(psb_ipk_), intent(out)       :: info
  integer(psb_ipk_), optional       :: inroot
  character(len=*), optional :: fmt
  class(psb_c_base_sparse_mat), optional :: mold
  procedure(psb_parts), optional  :: parts
  integer(psb_ipk_), optional     :: vg(:)
  integer(psb_ipk_), optional     :: vsz(:)

  ! local variables
  logical           :: use_parts, use_vg, use_vsz
  integer(psb_ipk_) :: np, iam, np_sharing
  integer(psb_ipk_) :: k_count, root, liwork,  nnzero, nrhs,&
       & i, ll, nz, isize, iproc, nnr, err, err_act
  integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp
  integer(psb_ipk_), allocatable  :: iwork(:), iwrk2(:)
  integer(psb_lpk_), allocatable  :: irow(:),icol(:)
  complex(psb_spk_), allocatable    :: val(:)
  integer(psb_ipk_), parameter    :: nb=30
  real(psb_dpk_)              :: t0, t1, t2, t3, t4, t5
  character(len=20)           :: name, ch_err

  info = psb_success_
  err  = 0
  name = 'psb_c_mat_dist'
  call psb_erractionsave(err_act)

  ! executable statements    
  if (present(inroot)) then
    root = inroot
  else
    root = psb_root_
  end if
  call psb_info(ictxt, iam, np)     

  use_parts = present(parts)
  use_vg    = present(vg)
  use_vsz   = present(vsz)
  if (count((/ use_parts, use_vg, use_vsz /)) /= 1) then 
    info=psb_err_no_optional_arg_
    call psb_errpush(info,name,a_err=" vg, vsz, parts")
    goto 9999 
  endif
  
  if (iam == root) then
    nrow = a_glob%get_nrows()
    ncol = a_glob%get_ncols()
    if (nrow /= ncol) then
      write(psb_err_unit,*) 'a rectangular matrix ? ',nrow,ncol
      info=-1
      call psb_errpush(info,name)
      goto 9999
    endif
    nnzero = a_glob%get_nzeros()
    nrhs   = 1
    if (use_vsz) then 
      if (sum(vsz(1:np)) /= nrow) then
        write(0,*) 'Input data mismatch :',nrow,sum(vsz(1:np))
      end if
    end if
    
  endif
  ! broadcast informations to other processors
  call psb_bcast(ictxt,nrow, root)
  call psb_bcast(ictxt,ncol, root)
  call psb_bcast(ictxt,nnzero, root)
  call psb_bcast(ictxt,nrhs, root)
  liwork = max(np, nrow + ncol)
  allocate(iwork(liwork), iwrk2(np),stat = info)
  if (info /= psb_success_) then
    info=psb_err_alloc_request_
    call psb_errpush(info,name,i_err=(/liwork/),a_err='integer')
    goto 9999
  endif
  if (iam == root) then
    write (*, fmt = *) 'start matdist',root, size(iwork),&
         &nrow, ncol, nnzero,nrhs
  endif
  if (use_parts) then 
    call psb_cdall(ictxt,desc_a,info,mg=nrow,parts=parts)
  else if (use_vg) then 
    call psb_cdall(ictxt,desc_a,info,vg=vg)
  else if (use_vsz) then
    call psb_cdall(ictxt,desc_a,info,nl=vsz(iam+1))
  else
    info = -1
  end if
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='psb_cdall'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if
  call psb_spall(a,desc_a,info,nnz=((nnzero+np-1)/np))
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='psb_spall'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  isize = 3*nb*max(((nnzero+nrow)/nrow),nb)
  allocate(val(isize),irow(isize),icol(isize),stat=info)
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='Allocate'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  i_count   = 1
  if (use_vsz) then
    iproc = 0
    lastigp = vsz(iproc+1)
  end if
  do while (i_count <= nrow)

    if (use_parts) then 
      call parts(i_count,nrow,np,iwork, np_sharing)
      !
      ! np_sharing allows for overlap in the data distribution.
      ! If an index is overlapped, then we have to send its row
      ! to multiple processes. NOTE: we are assuming the output 
      ! from PARTS is coherent, otherwise a deadlock is the most
      ! likely outcome.
      ! 
      j_count = i_count 
      if (np_sharing == 1) then 
        iproc   = iwork(1) 
        do 
          j_count = j_count + 1 
          if (j_count-i_count >= nb) exit
          if (j_count > nrow) exit
          call parts(j_count,nrow,np,iwrk2, np_sharing)
          if (np_sharing /= 1 ) exit
          if (iwrk2(1) /= iproc ) exit
        end do
      end if
    else if (use_vg) then 
      np_sharing = 1
      j_count = i_count 
      iproc   = vg(i_count)
      iwork(1:np_sharing) = iproc
      do 
        j_count = j_count + 1 
        if (j_count-i_count >= nb) exit
        if (j_count > nrow) exit
        if (vg(j_count) /= iproc ) exit
      end do
    else if (use_vsz) then
      np_sharing = 1
      j_count = i_count 
      iwork(1:np_sharing) = iproc
      do 
        j_count = j_count + 1 
        if (j_count-i_count >= nb) exit
        if (j_count > nrow) exit
        if (j_count > lastigp) exit
      end do      
    end if

    ! now we should insert rows i_count..j_count-1
    nnr = j_count - i_count
    
    if (iam == root) then
      
      ll = 0
      do i= i_count, j_count-1
        call a_glob%csget(i,i,nz,&
             & irow,icol,val,info,nzin=ll,append=.true.)
        if (info /= psb_success_) then            
          if (nz >min(size(irow(ll+1:)),size(icol(ll+1:)),size(val(ll+1:)))) then 
            write(psb_err_unit,*) 'Allocation failure? This should not happen!'
          end if
          call psb_errpush(info,name,a_err=ch_err)
          goto 9999
        end if
        ll = ll + nz
      end do

      do k_count = 1, np_sharing
        iproc = iwork(k_count)
        
        if (iproc == iam) then
          call psb_spins(ll,irow,icol,val,a,desc_a,info)
          if(info /= psb_success_) then
            info=psb_err_from_subroutine_
            ch_err='psb_spins'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
        else
          call psb_snd(ictxt,nnr,iproc)
          call psb_snd(ictxt,ll,iproc)
          call psb_snd(ictxt,irow(1:ll),iproc)
          call psb_snd(ictxt,icol(1:ll),iproc)
          call psb_snd(ictxt,val(1:ll),iproc)
          call psb_rcv(ictxt,ll,iproc)
        endif
      end do
    else if (iam /= root) then
      
      do k_count = 1, np_sharing
        iproc = iwork(k_count)
        if (iproc == iam) then
          call psb_rcv(ictxt,nnr,root)
          call psb_rcv(ictxt,ll,root)
          if (ll > size(irow)) then 
            write(psb_err_unit,*) iam,'need to reallocate ',ll
            deallocate(val,irow,icol)
            allocate(val(ll),irow(ll),icol(ll),stat=info)
            if(info /= psb_success_) then
              info=psb_err_from_subroutine_
              ch_err='Allocate'
              call psb_errpush(info,name,a_err=ch_err)
              goto 9999
            end if
            
          endif
          call psb_rcv(ictxt,irow(1:ll),root)
          call psb_rcv(ictxt,icol(1:ll),root)
          call psb_rcv(ictxt,val(1:ll),root)
          call psb_snd(ictxt,ll,root)
          call psb_spins(ll,irow,icol,val,a,desc_a,info)
          if(info /= psb_success_) then
            info=psb_err_from_subroutine_
            ch_err='psspins'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
        endif
      end do
    endif
    i_count = j_count
    if ((use_vsz).and.(j_count <= nrow)) then 
      if (j_count > lastigp) then
        iproc = iproc + 1
        lastigp = lastigp + vsz(iproc+1)
      end if
    end if
  end do

  call psb_barrier(ictxt)
  t0 = psb_wtime()
  call psb_cdasb(desc_a,info)     
  t1 = psb_wtime()
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='psb_cdasb'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  call psb_barrier(ictxt)
  t2 = psb_wtime()
  call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=fmt,mold=mold)     
  t3 = psb_wtime()
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='psb_spasb'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if


  if (iam == root) then 
    write(psb_out_unit,*) 'descriptor assembly: ',t1-t0
    write(psb_out_unit,*) 'sparse matrix assembly: ',t3-t2
  end if


  
  deallocate(val,irow,icol,iwork,iwrk2,stat=info)
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='deallocate'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  if (iam == root) write (*, fmt = *) 'end matdist'     

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(ictxt,err_act)

  return

end subroutine psb_cmatdist



subroutine psb_lcmatdist(a_glob, a, ictxt, desc_a,&
     & info, parts, vg, vsz, inroot,fmt,mold)
  !
  ! an utility subroutine to distribute a matrix among processors
  ! according to a user defined data distribution, using
  ! sparse matrix subroutines.
  !
  !  type(psb_cspmat)                       :: a_glob
  !     on entry: this contains the global sparse matrix as follows:
  !
  !  type(psb_cspmat_type)                            :: a
  !     on exit : this will contain the local sparse matrix.
  !
  !       interface parts
  !         !   .....user passed subroutine.....
  !         subroutine parts(global_indx,n,np,pv,nv)
  !           implicit none
  !           integer(psb_ipk_), intent(in)  :: global_indx, n, np
  !           integer(psb_ipk_), intent(out) :: nv
  !           integer(psb_ipk_), intent(out) :: pv(*)
  !
  !       end subroutine parts
  !       end interface
  !     on entry:  subroutine providing user defined data distribution.
  !        for each global_indx the subroutine should return
  !        the list  pv of all processes owning the row with
  !        that index; the list will contain nv entries.
  !        usually nv=1; if nv >1 then we have an overlap in the data
  !        distribution.
  !
  !  integer(psb_ipk_) :: ictxt
  !     on entry: the PSBLAS parallel environment context.
  !
  !  type (desc_type)                  :: desc_a
  !     on exit : the updated array descriptor
  !
  !  integer(psb_ipk_), optional    :: inroot
  !     on entry: specifies processor holding a_glob. default: 0
  !     on exit : unchanged.
  !
  use psb_base_mod
  use psb_mat_mod
  implicit none

  ! parameters
  type(psb_lcspmat_type)      :: a_glob
  integer(psb_ipk_) :: ictxt
  type(psb_cspmat_type)      :: a
  type(psb_desc_type)        :: desc_a
  integer(psb_ipk_), intent(out)       :: info
  integer(psb_ipk_), optional       :: inroot
  character(len=*), optional :: fmt
  class(psb_c_base_sparse_mat), optional :: mold
  procedure(psb_parts), optional  :: parts
  integer(psb_ipk_), optional     :: vg(:) 
  integer(psb_ipk_), optional     :: vsz(:)
  
  ! local variables
  logical           :: use_parts, use_vg, use_vsz
  integer(psb_ipk_) :: np, iam, np_sharing, root, iproc
  integer(psb_ipk_) :: err_act, il, inz
  integer(psb_lpk_) :: k_count, liwork,  nnzero, nrhs,&
       & i, ll, nz, isize, nnr, err
  integer(psb_lpk_) :: i_count, j_count, nrow, ncol, ig, lastigp
  integer(psb_ipk_), allocatable  :: iwork(:), iwrk2(:)
  integer(psb_lpk_), allocatable  :: irow(:),icol(:)
  complex(psb_spk_), allocatable    :: val(:)
  integer(psb_ipk_), parameter    :: nb=30
  real(psb_dpk_)              :: t0, t1, t2, t3, t4, t5
  character(len=20)           :: name, ch_err

  info = psb_success_
  err  = 0
  name = 'psb_c_mat_dist'
  call psb_erractionsave(err_act)

  ! executable statements    
  if (present(inroot)) then
    root = inroot
  else
    root = psb_root_
  end if
  call psb_info(ictxt, iam, np)     
  if (iam == root) then
    nrow = a_glob%get_nrows()
    ncol = a_glob%get_ncols()
    if (nrow /= ncol) then
      write(psb_err_unit,*) 'a rectangular matrix ? ',nrow,ncol
      info=-1
      call psb_errpush(info,name)
      goto 9999
    endif
    nnzero = a_glob%get_nzeros()
    nrhs   = 1
  endif

  use_parts = present(parts)
  use_vg    = present(vg)
  use_vsz   = present(vsz)
  if (count((/ use_parts, use_vg, use_vsz /)) /= 1) then 
    info=psb_err_no_optional_arg_
    call psb_errpush(info,name,a_err=" vg, vsz, parts")
    goto 9999 
  endif
  
  ! broadcast informations to other processors
  call psb_bcast(ictxt,nrow, root)
  call psb_bcast(ictxt,ncol, root)
  call psb_bcast(ictxt,nnzero, root)
  call psb_bcast(ictxt,nrhs, root)
  liwork = max(np, nrow + ncol)
  allocate(iwork(liwork), iwrk2(np),stat = info)
  if (info /= psb_success_) then
    info=psb_err_alloc_request_
    call psb_errpush(info,name,l_err=(/liwork/),a_err='integer')
    goto 9999
  endif
  if (iam == root) then
    write (*, fmt = *) 'start matdist',root, size(iwork),&
         &nrow, ncol, nnzero,nrhs
  endif
  if (use_parts) then 
    call psb_cdall(ictxt,desc_a,info,mg=nrow,parts=parts)
  else if (use_vg) then 
    call psb_cdall(ictxt,desc_a,info,vg=vg)
  else if (use_vsz) then 
    call psb_cdall(ictxt,desc_a,info,nl=vsz(iam+1))
  else
    info = -1
  end if
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='psb_cdall'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if
  inz = ((nnzero+np-1)/np)
  call psb_spall(a,desc_a,info,nnz=inz)
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='psb_spall'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  isize = 3*nb*max(((nnzero+nrow)/nrow),nb)
  allocate(val(isize),irow(isize),icol(isize),stat=info)
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    ch_err='Allocate'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  i_count   = 1
  if (use_vsz) then
    iproc = 0
    lastigp = vsz(iproc+1)
  end if
  do while (i_count <= nrow)

    if (use_parts) then 
      call parts(i_count,nrow,np,iwork, np_sharing)
      !
      ! np_sharing allows for overlap in the data distribution.
      ! If an index is overlapped, then we have to send its row
      ! to multiple processes. NOTE: we are assuming the output 
      ! from PARTS is coherent, otherwise a deadlock is the most
      ! likely outcome.
      ! 
      j_count = i_count 
      if (np_sharing == 1) then 
        iproc   = iwork(1) 
        do 
          j_count = j_count + 1 
          if (j_count-i_count >= nb) exit
          if (j_count > nrow) exit
          call parts(j_count,nrow,np,iwrk2, np_sharing)
          if (np_sharing /= 1 ) exit
          if (iwrk2(1) /= iproc ) exit
        end do
      end if
    else if (use_vg) then 
      np_sharing = 1
      j_count = i_count 
      iproc   = vg(i_count)
      iwork(1:np_sharing) = iproc
      do 
        j_count = j_count + 1 
        if (j_count-i_count >= nb) exit
        if (j_count > nrow) exit
        if (vg(j_count) /= iproc ) exit
      end do
    else if (use_vsz) then
      np_sharing = 1
      j_count = i_count 
      iwork(1:np_sharing) = iproc
      do 
        j_count = j_count + 1 
        if (j_count-i_count >= nb) exit
        if (j_count > nrow) exit
        if (j_count > lastigp) exit
      end do      
    end if

    ! now we should insert rows i_count..j_count-1
    nnr = j_count - i_count
    
    if (iam == root) then
      
      ll = 0
      do i= i_count, j_count-1
        call a_glob%csget(i,i,nz,&
             & irow,icol,val,info,nzin=ll,append=.true.)
        if (info /= psb_success_) then            
          if (nz >min(size(irow(ll+1:)),size(icol(ll+1:)),size(val(ll+1:)))) then 
            write(psb_err_unit,*) 'Allocation failure? This should not happen!'
          end if
          call psb_errpush(info,name,a_err=ch_err)
          goto 9999
        end if
        ll = ll + nz
      end do

      do k_count = 1, np_sharing
        iproc = iwork(k_count)
        
        if (iproc == iam) then
          il = ll
          call psb_spins(il,irow,icol,val,a,desc_a,info)
          if(info /= psb_success_) then
            info=psb_err_from_subroutine_
            ch_err='psb_spins'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
        else
          call psb_snd(ictxt,nnr,iproc)
          call psb_snd(ictxt,ll,iproc)
          call psb_snd(ictxt,irow(1:ll),iproc)
          call psb_snd(ictxt,icol(1:ll),iproc)
          call psb_snd(ictxt,val(1:ll),iproc)
          call psb_rcv(ictxt,ll,iproc)
        endif
      end do
    else if (iam /= root) then
      
      do k_count = 1, np_sharing
        iproc = iwork(k_count)
        if (iproc == iam) then
          call psb_rcv(ictxt,nnr,root)
          call psb_rcv(ictxt,ll,root)
          if (ll > size(irow)) then 
            write(psb_err_unit,*) iam,'need to reallocate ',ll
            deallocate(val,irow,icol)
            allocate(val(ll),irow(ll),icol(ll),stat=info)
            if(info /= psb_success_) then
              info=psb_err_from_subroutine_
              ch_err='Allocate'
              call psb_errpush(info,name,a_err=ch_err)
              goto 9999
            end if
            
          endif
          call psb_rcv(ictxt,irow(1:ll),root)
          call psb_rcv(ictxt,icol(1:ll),root)
          call psb_rcv(ictxt,val(1:ll),root)
          call psb_snd(ictxt,ll,root)
          il = ll 
          call psb_spins(il,irow,icol,val,a,desc_a,info)
          if(info /= psb_success_) then
            info=psb_err_from_subroutine_
            ch_err='psspins'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
        endif
      end do
    endif
    i_count = j_count
    if ((use_vsz).and.(j_count <= nrow)) then 
      if (j_count > lastigp) then
        iproc = iproc + 1
        lastigp = lastigp + vsz(iproc+1)
      end if
    end if
  end do

  call psb_barrier(ictxt)
  t0 = psb_wtime()
  call psb_cdasb(desc_a,info)     
  t1 = psb_wtime()
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='psb_cdasb'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  call psb_barrier(ictxt)
  t2 = psb_wtime()
  call psb_spasb(a,desc_a,info,dupl=psb_dupl_err_,afmt=fmt,mold=mold)     
  t3 = psb_wtime()
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='psb_spasb'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if


  if (iam == root) then 
    write(psb_out_unit,*) 'descriptor assembly: ',t1-t0
    write(psb_out_unit,*) 'sparse matrix assembly: ',t3-t2
  end if


  
  deallocate(val,irow,icol,iwork,iwrk2,stat=info)
  if(info /= psb_success_)then
    info=psb_err_from_subroutine_
    ch_err='deallocate'
    call psb_errpush(info,name,a_err=ch_err)
    goto 9999
  end if

  if (iam == root) write (*, fmt = *) 'end matdist'     

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(ictxt,err_act)

  return

end subroutine psb_lcmatdist