!!$ 
!!$ 
!!$                    MD2P4
!!$    Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS
!!$                      for 
!!$              Parallel Sparse BLAS  v2.0
!!$    (C) Copyright 2006 Salvatore Filippone    University of Rome Tor Vergata
!!$                       Alfredo Buttari        University of Rome Tor Vergata
!!$                       Daniela di Serafino    Second University of Naples
!!$                       Pasqua D'Ambra         ICAR-CNR                      
!!$ 
!!$  Redistribution and use in source and binary forms, with or without
!!$  modification, are permitted provided that the following conditions
!!$  are met:
!!$    1. Redistributions of source code must retain the above copyright
!!$       notice, this list of conditions and the following disclaimer.
!!$    2. Redistributions in binary form must reproduce the above copyright
!!$       notice, this list of conditions, and the following disclaimer in the
!!$       documentation and/or other materials provided with the distribution.
!!$    3. The name of the MD2P4 group or the names of its contributors may
!!$       not be used to endorse or promote products derived from this
!!$       software without specific written permission.
!!$ 
!!$  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
!!$  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
!!$  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
!!$  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS
!!$  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
!!$  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
!!$  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
!!$  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
!!$  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
!!$  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
!!$  POSSIBILITY OF SUCH DAMAGE.
!!$ 
!!$  
subroutine psb_dilu_fct(a,l,u,d,info,blck)
  
  !
  ! This routine copies and factors "on the fly" from A and BLCK
  ! into L/D/U. 
  !
  !
  use psb_base_mod
  implicit none
  !     .. Scalar Arguments ..
  integer, intent(out)                ::     info
  !     .. Array Arguments ..
  type(psb_dspmat_type),intent(in)    :: a
  type(psb_dspmat_type),intent(inout) :: l,u
  type(psb_dspmat_type),intent(in), optional, target :: blck
  real(kind(1.d0)), intent(inout)     ::  d(:)
  !     .. Local Scalars ..
  real(kind(1.d0)) ::  dia, temp
  integer   ::  i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act
  
  type(psb_dspmat_type), pointer  :: blck_
  character(len=20)   :: name, ch_err
  logical, parameter :: debug=.false.

  name='psb_dilu_fct'
  info = 0
  call psb_erractionsave(err_act)
  !     .. Executable Statements ..
  !

  if (present(blck)) then 
    blck_ => blck
  else
    allocate(blck_,stat=info) 
    if (info /= 0) then 
      call psb_errpush(4010,name,a_err='Allocate')
      goto 9999      
    end if

    call psb_nullify_sp(blck_)  ! Why do we need this? Who knows.... 
    call psb_sp_all(0,0,blck_,1,info)
    if(info.ne.0) then
       info=4010
       ch_err='psb_sp_all'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
    end if

    blck_%m=0
  endif

!!$  write(0,*) 'ilu_fct: ',size(l%ia2),size(u%ia2),a%m,blck_%m
  call psb_dilu_fctint(m,a%m,a,blck_%m,blck_,&
       & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info)
  if(info.ne.0) then
     info=4010
     ch_err='psb_dilu_fctint'
     call psb_errpush(info,name,a_err=ch_err)
     goto 9999
  end if
  
  l%infoa(1) = l1
  l%fida     = 'CSR'
  l%descra   = 'TLU'
  u%infoa(1) = l2
  u%fida     = 'CSR'
  u%descra   = 'TUU'
  l%m = m
  l%k = m
  u%m = m
  u%k = m
  if (present(blck)) then 
    blck_ => null() 
  else
    call psb_sp_free(blck_,info)
    if(info.ne.0) then
       info=4010
       ch_err='psb_sp_free'
       call psb_errpush(info,name,a_err=ch_err)
       goto 9999
    end if
    deallocate(blck_) 
  endif

  call psb_erractionrestore(err_act)
  return

9999 continue
  call psb_erractionrestore(err_act)
  if (err_act.eq.psb_act_abort_) then
     call psb_error()
     return
  end if
  return

contains
  subroutine psb_dilu_fctint(m,ma,a,mb,b,&
       & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info)
    implicit none 

    type(psb_dspmat_type)          :: a,b
    integer                        :: m,ma,mb,l1,l2,info
    integer, dimension(*)          :: lia1,lia2,uia1,uia2
    real(kind(1.d0)), dimension(*) :: laspk,uaspk,d

    integer :: i,j,k,l,low1,low2,kk,jj,ll, irb, ktrw,err_act
    real(kind(1.d0)) :: dia,temp
    integer, parameter :: nrb=16
    logical,parameter  :: debug=.false.
    type(psb_dspmat_type) :: trw
    integer             :: int_err(5) 
    character(len=20)   :: name, ch_err

    name='psb_dilu_fctint'
    if(psb_get_errstatus().ne.0) return 
    info=0
    call psb_erractionsave(err_act)
    call psb_nullify_sp(trw)
    trw%m=0
    trw%k=0
    if(debug) write(0,*)'LUINT Allocating TRW'
    call psb_sp_all(trw,1,info)
    if(info.ne.0) then
      info=4010
      ch_err='psb_sp_all'
      call psb_errpush(info,name,a_err=ch_err)
      goto 9999
    end if
    if(debug) write(0,*)'LUINT Done  Allocating TRW'
    lia2(1) = 1
    uia2(1) = 1
    l1=0
    l2=0
    m = ma+mb
    if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb

    do i = 1, ma
      if(debug) write(0,*)'LUINT: Loop index ',i,ma
      d(i) = 0.d0

      !
      ! Here we take a fast shortcut if possible, otherwise 
      ! use spgtblk, slower but able (in principle) to handle 
      ! anything. 
      !
      if (toupper(a%fida)=='CSR') then 
        do j = a%ia2(i), a%ia2(i+1) - 1
          k = a%ia1(j)
          !           write(0,*)'KKKKK',k
          if ((k < i).and.(k >= 1)) then
            l1 = l1 + 1
            laspk(l1) = a%aspk(j)
            lia1(l1) = k
          else if (k == i) then
            d(i) = a%aspk(j)
          else if ((k > i).and.(k <= m)) then
            l2 = l2 + 1
            uaspk(l2) = a%aspk(j)
            uia1(l2) = k
          end if
        enddo

      else

        if ((mod(i,nrb) == 1).or.(nrb==1)) then 
          irb = min(ma-i+1,nrb)
          call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
          if(info.ne.0) then
            info=4010
            ch_err='psb_sp_getblk'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
          ktrw=1
        end if

        do 
          if (ktrw > trw%infoa(psb_nnz_)) exit
          if (trw%ia1(ktrw) > i) exit
          k = trw%ia2(ktrw)
          if ((k < i).and.(k >= 1)) then
            l1 = l1 + 1
            laspk(l1) = trw%aspk(ktrw)
            lia1(l1) = k
          else if (k == i) then
            d(i) = trw%aspk(ktrw)
          else if ((k > i).and.(k <= m)) then
            l2 = l2 + 1
            uaspk(l2) = trw%aspk(ktrw)
            uia1(l2) = k
          end if
          ktrw = ktrw + 1
        enddo

      end if

!!$

      lia2(i+1) = l1 + 1
      uia2(i+1) = l2 + 1

      dia = d(i)
      do kk = lia2(i), lia2(i+1) - 1
        !     
        !     compute element alo(i,k) of incomplete factorization
        !     
        temp = laspk(kk)
        k = lia1(kk)
        laspk(kk) = temp*d(k)
        !     update the rest of row i using alo(i,k)
        low1 = kk + 1
        low2 = uia2(i)
        updateloop: do  jj = uia2(k), uia2(k+1) - 1
          j = uia1(jj)
          !
          if (j < i) then
            !     search alo(i,*) for matching index J
            do  ll = low1, lia2(i+1) - 1
              l = lia1(ll)
              if (l > j) then
                low1 = ll
                exit
              else if (l == j) then
                laspk(ll) = laspk(ll) - temp*uaspk(jj)
                low1 = ll + 1
                cycle updateloop
              end if
            enddo
            !     
          else if (j == i) then
            !    j=i  update diagonal
            !    write(0,*)'aggiorno dia',dia,'temp',temp,'jj',jj,'u%aspk',uaspk(jj)
            dia = dia - temp*uaspk(jj)
            !    write(0,*)'dia',dia,'temp',temp,'jj',jj,'aspk',uaspk(jj)
            cycle updateloop
            !     
          else if (j > i) then
            !     search aup(i,*) for matching index j
            do ll = low2, uia2(i+1) - 1
              l = uia1(ll)
              if (l > j) then
                low2 = ll
                exit
              else if (l == j) then
                uaspk(ll) = uaspk(ll) - temp*uaspk(jj)
                low2 = ll + 1
                cycle updateloop
              end if
            enddo
          end if
          !     
          !     for milu al=1.;  for ilu al=0.
          !     al = 1.d0
          !     dia = dia - al*temp*aup(jj)
        enddo updateloop
      enddo
      !    
      !     
      !     Non singularity
      !     
      if (dabs(dia) < epstol) then
        !
        !     Pivot too small: unstable factorization
        !     
        info = 2
        int_err(1) = i
        write(ch_err,'(g20.10)') dia
        call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
        goto 9999
      else
        dia = 1.d0/dia
      end if
      d(i) = dia
      ! write(6,*)'diag(',i,')=',d(i)
      !     Scale row i of upper triangle
      do  kk = uia2(i), uia2(i+1) - 1
        uaspk(kk) = uaspk(kk)*dia
      enddo
    enddo

    do i = ma+1, m
      d(i) = 0.d0


      if (toupper(b%fida)=='CSR') then 

        do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1
          k = b%ia1(j)
          !           if (me.eq.2)  write(0,*)'ecco k=',k
          if ((k < i).and.(k >= 1)) then
            l1 = l1 + 1
            laspk(l1) = b%aspk(j)
            lia1(l1) = k
            !              if(me.eq.2) write(0,*)'scrivo l'
          else if (k == i) then
            d(i) = b%aspk(j)
          else if ((k > i).and.(k <= m)) then
            l2 = l2 + 1
            uaspk(l2) = b%aspk(j)
            !              write(0,*)'KKKKK',k
            uia1(l2) = k
          end if
        enddo

      else

        if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then 
          irb = min(m-i+1,nrb)
          call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1)
          if(info.ne.0) then
            info=4010
            ch_err='psb_sp_getblk'
            call psb_errpush(info,name,a_err=ch_err)
            goto 9999
          end if
          ktrw=1
        end if

        do 
          if (ktrw > trw%infoa(psb_nnz_)) exit
          if (trw%ia1(ktrw) > i) exit
          k = trw%ia2(ktrw)
          ! write(0,*)'KKKKK',k
          if ((k < i).and.(k >= 1)) then
            l1 = l1 + 1
            laspk(l1) = trw%aspk(ktrw)
            lia1(l1) = k
          else if (k == i) then
            d(i) = trw%aspk(ktrw)
          else if ((k > i).and.(k <= m)) then
            l2 = l2 + 1
            uaspk(l2) = trw%aspk(ktrw)
            uia1(l2) = k
          end if
          ktrw = ktrw + 1
        enddo

      endif


      lia2(i+1) = l1 + 1
      uia2(i+1) = l2 + 1

      dia = d(i)
      do kk = lia2(i), lia2(i+1) - 1
        !     
        !     compute element alo(i,k) of incomplete factorization
        !     
        temp = laspk(kk)
        k = lia1(kk)
        laspk(kk) = temp*d(k)
        !     update the rest of row i using alo(i,k)
        low1 = kk + 1
        low2 = uia2(i)
        updateloopb: do  jj = uia2(k), uia2(k+1) - 1
          j = uia1(jj)
          !
          if (j < i) then
            !     search alo(i,*) for matching index J
            do  ll = low1, lia2(i+1) - 1
              l = lia1(ll)
              if (l > j) then
                low1 = ll
                exit
              else if (l == j) then
                laspk(ll) = laspk(ll) - temp*uaspk(jj)
                low1 = ll + 1
                cycle updateloopb
              end if
            enddo
            !     
          else if (j == i) then
            !   j=i  update diagonal
            dia = dia - temp*uaspk(jj)
            cycle updateloopb
            !     
          else if (j > i) then
            !    search aup(i,*) for matching index j
            do ll = low2, uia2(i+1) - 1
              l = uia1(ll)
              if (l > j) then
                low2 = ll
                exit
              else if (l == j) then
                uaspk(ll) = uaspk(ll) - temp*uaspk(jj)
                low2 = ll + 1
                cycle updateloopb
              end if
            enddo
          end if
          !     
          !     for milu al=1.;  for ilu al=0.
          !     al = 1.d0
          !     dia = dia - al*temp*aup(jj)
        enddo updateloopb
      enddo
      !     
      !     
      !     Non singularity
      !     
      if (dabs(dia) < epstol) then
        !
        !     Pivot too small: unstable factorization
        !     
        int_err(1) = i
        write(ch_err,'(g20.10)') dia
        info = 2
        call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
        goto 9999
      else
        dia = 1.d0/dia
      end if
      d(i) = dia
      !     Scale row i of upper triangle
      do  kk = uia2(i), uia2(i+1) - 1
        uaspk(kk) = uaspk(kk)*dia
      enddo
    enddo

    call psb_sp_free(trw,info)
    if(info.ne.0) then
      info=4010
      ch_err='psb_sp_free'
      call psb_errpush(info,name,a_err=ch_err)
      goto 9999
    end if
    if(debug) write(0,*)'Leaving ilu_fct'

    call psb_erractionrestore(err_act)
    return

9999 continue
    call psb_erractionrestore(err_act)
    if (err_act.eq.psb_act_abort_) then
      call psb_error()
      return
    end if
    return
  end subroutine psb_dilu_fctint
end subroutine psb_dilu_fct