!
!                Parallel Sparse BLAS  version 3.5
!      (C) Copyright 2006-2018
!        Salvatore Filippone
!        Alfredo Buttari
!
!    Redistribution and use in source and binary forms, with or without
!    modification, are permitted provided that the following conditions
!    are met:
!      1. Redistributions of source code must retain the above copyright
!         notice, this list of conditions and the following disclaimer.
!      2. Redistributions in binary form must reproduce the above copyright
!         notice, this list of conditions, and the following disclaimer in the
!         documentation and/or other materials provided with the distribution.
!      3. The name of the PSBLAS group or the names of its contributors may
!         not be used to endorse or promote products derived from this
!         software without specific written permission.
!
!    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
!    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
!    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
!    PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
!    BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
!    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
!    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
!    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
!    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
!    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
!    POSSIBILITY OF SUCH DAMAGE.
!
!

! == ===================================
!
!
!
! Computational routines
!
!
!
!
!
!
! == ===================================

subroutine psb_c_csr_csmv(alpha,a,x,beta,y,info,trans)
  use psb_error_mod
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csmv
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(in)          :: alpha, beta, x(:)
  complex(psb_spk_), intent(inout)       :: y(:)
  integer(psb_ipk_), intent(out)                :: info
  character, optional, intent(in)     :: trans

  character :: trans_
  integer(psb_ipk_) :: m, n
  logical   :: tra, ctra
  integer(psb_ipk_) :: err_act
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='c_csr_csmv'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  if (a%is_dev())   call a%sync()

  if (present(trans)) then
    trans_ = trans
  else
    trans_ = 'N'
  end if

  if (.not.a%is_asb()) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  endif


  tra  = (psb_toupper(trans_) == 'T')
  ctra = (psb_toupper(trans_) == 'C')

  if (tra.or.ctra) then
    m = a%get_ncols()
    n = a%get_nrows()
  else
    n = a%get_ncols()
    m = a%get_nrows()
  end if

  if (size(x,1)<n) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if (size(y,1)<m) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if


  call psb_c_csr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
       & a%is_triangle(),a%is_unit(),&
       & x,beta,y,tra,ctra)

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains
  subroutine psb_c_csr_csmv_inner(m,n,alpha,irp,ja,val,is_triangle,is_unit,&
       & x,beta,y,tra,ctra)
    integer(psb_ipk_), intent(in)             :: m,n,irp(*),ja(*)
    complex(psb_spk_), intent(in)      :: alpha, beta, x(*),val(*)
    complex(psb_spk_), intent(inout)   :: y(*)
    logical, intent(in)             :: is_triangle,is_unit,tra, ctra


    integer(psb_ipk_) :: i,j,ir
    complex(psb_spk_) :: acc

    if (alpha == czero) then
      if (beta == czero) then
        !$omp parallel do private(i)
        do i = 1, m
          y(i) = czero
        enddo
      else
        !$omp parallel do private(i)
        do  i = 1, m
          y(i) = beta*y(i)
        end do
      endif
      return
    end if


    if ((.not.tra).and.(.not.ctra)) then

      if (beta == czero) then

        if (alpha == cone) then
          !$omp parallel do private(i,j, acc) schedule(static)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = acc
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j, acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = -acc
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = alpha*acc
          end do

        end if


      else if (beta == cone) then

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = y(i) + acc
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = y(i) -acc
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = y(i) + alpha*acc
          end do

        end if

      else if (beta == -cone) then

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = -y(i) + acc
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = -y(i) -acc
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = -y(i) + alpha*acc
          end do

        end if

      else

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = beta*y(i) + acc
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = beta*y(i) - acc
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc  = czero
            !$omp  simd
            do j=irp(i), irp(i+1)-1
              acc  = acc + val(j) * x(ja(j))
            enddo
            y(i) = beta*y(i) + alpha*acc
          end do

        end if

      end if

    else if (tra) then

      if (beta == czero) then
        !$omp parallel do private(i)
        do i=1, m
          y(i) = czero
        end do
      else if (beta == cone) then
        ! Do nothing
      else if (beta == -cone) then
        !$omp parallel do private(i)
        do i=1, m
          y(i) = -y(i)
        end do
      else
        !$omp parallel do private(i)
        do i=1, m
          y(i) = beta*y(i)
        end do
      end if

      if (alpha == cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) +  val(j)*x(i)
          end do
        enddo

      else if (alpha == -cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) -  val(j)*x(i)
          end do
        enddo

      else

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) + alpha*val(j)*x(i)
          end do
        enddo

      end if

    else if (ctra) then

      if (beta == czero) then
        do i=1, m
          y(i) = czero
        end do
      else if (beta == cone) then
        ! Do nothing
      else if (beta == -cone) then
        do i=1, m
          y(i) = -y(i)
        end do
      else
        do i=1, m
          y(i) = beta*y(i)
        end do
      end if

      if (alpha == cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) +  conjg(val(j))*x(i)
          end do
        enddo

      else if (alpha == -cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) -  conjg(val(j))*x(i)
          end do
        enddo

      else

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir) = y(ir) + alpha*conjg(val(j))*x(i)
          end do
        enddo

      end if

    endif

    if (is_unit) then
      do i=1, min(m,n)
        y(i) = y(i) + alpha*x(i)
      end do
    end if


  end subroutine psb_c_csr_csmv_inner


end subroutine psb_c_csr_csmv

subroutine psb_c_csr_csmm(alpha,a,x,beta,y,info,trans)
  use psb_error_mod
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csmm
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(in)          :: alpha, beta, x(:,:)
  complex(psb_spk_), intent(inout)       :: y(:,:)
  integer(psb_ipk_), intent(out)                :: info
  character, optional, intent(in)     :: trans

  character :: trans_
  integer(psb_ipk_) :: j,m,n, nc
  complex(psb_spk_), allocatable  :: acc(:)
  logical   :: tra, ctra
  integer(psb_ipk_) :: err_act
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='c_csr_csmm'
  logical, parameter :: debug=.false.

  info = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  if (present(trans)) then
    trans_ = trans
  else
    trans_ = 'N'
  end if

  if (.not.a%is_asb()) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  endif
  tra  = (psb_toupper(trans_) == 'T')
  ctra = (psb_toupper(trans_) == 'C')

  if (tra.or.ctra) then
    m = a%get_ncols()
    n = a%get_nrows()
  else
    n = a%get_ncols()
    m = a%get_nrows()
  end if

  if (size(x,1)<n) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 3; ierr(2) = size(x,1); ierr(3) = n;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if (size(y,1)<m) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 5; ierr(2) = size(y,1); ierr(3) =m;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  nc = min(size(x,2) , size(y,2) )

  allocate(acc(nc), stat=info)
  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    call psb_errpush(info,name,a_err='allocate')
    goto 9999
  end if

  call  psb_c_csr_csmm_inner(m,n,nc,alpha,a%irp,a%ja,a%val, &
       & a%is_triangle(),a%is_unit(),x,size(x,1,kind=psb_ipk_), &
       & beta,y,size(y,1,kind=psb_ipk_),tra,ctra,acc)


  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains
  subroutine psb_c_csr_csmm_inner(m,n,nc,alpha,irp,ja,val,&
       & is_triangle,is_unit,x,ldx,beta,y,ldy,tra,ctra,acc)
    integer(psb_ipk_), intent(in)             :: m,n,ldx,ldy,nc,irp(*),ja(*)
    complex(psb_spk_), intent(in)      :: alpha, beta, x(ldx,*),val(*)
    complex(psb_spk_), intent(inout)   :: y(ldy,*)
    logical, intent(in)             :: is_triangle,is_unit,tra,ctra

    complex(psb_spk_), intent(inout)   :: acc(:)
    integer(psb_ipk_) :: i,j, ir


    if (alpha == czero) then
      if (beta == czero) then
        !$omp parallel do private(i)
        do i = 1, m
          y(i,1:nc) = czero
        enddo
      else
        !$omp parallel do private(i)
        do  i = 1, m
          y(i,1:nc) = beta*y(i,1:nc)
        end do
      endif
      return
    end if

    if ((.not.tra).and.(.not.ctra)) then
      if (beta == czero) then

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = acc(1:nc)
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = -acc(1:nc)
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = alpha*acc(1:nc)
          end do

        end if


      else if (beta == cone) then

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = y(i,1:nc) + acc(1:nc)
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = y(i,1:nc) -acc(1:nc)
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = y(i,1:nc) + alpha*acc(1:nc)
          end do

        end if

      else if (beta == -cone) then

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = -y(i,1:nc) + acc(1:nc)
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = -y(i,1:nc) -acc(1:nc)
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = -y(i,1:nc) + alpha*acc(1:nc)
          end do

        end if

      else

        if (alpha == cone) then
          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = beta*y(i,1:nc) + acc(1:nc)
          end do

        else if (alpha == -cone) then

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = beta*y(i,1:nc) - acc(1:nc)
          end do

        else

          !$omp parallel do private(i,j,acc)
          do i=1,m
            acc(1:nc)  = czero
            do j=irp(i), irp(i+1)-1
              acc(1:nc)  = acc(1:nc) + val(j) * x(ja(j),1:nc)
            enddo
            y(i,1:nc) = beta*y(i,1:nc) + alpha*acc(1:nc)
          end do

        end if

      end if

    else if (tra) then

      if (beta == czero) then
        do i=1, m
          y(i,1:nc) = czero
        end do
      else if (beta == cone) then
        ! Do nothing
      else if (beta == -cone) then
        do i=1, m
          y(i,1:nc) = -y(i,1:nc)
        end do
      else
        do i=1, m
          y(i,1:nc) = beta*y(i,1:nc)
        end do
      end if

      if (alpha == cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) +  val(j)*x(i,1:nc)
          end do
        enddo

      else if (alpha == -cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) -  val(j)*x(i,1:nc)
          end do
        enddo

      else

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) + alpha*val(j)*x(i,1:nc)
          end do
        enddo

      end if

    else if (ctra) then

      if (beta == czero) then
        do i=1, m
          y(i,1:nc) = czero
        end do
      else if (beta == cone) then
        ! Do nothing
      else if (beta == -cone) then
        do i=1, m
          y(i,1:nc) = -y(i,1:nc)
        end do
      else
        do i=1, m
          y(i,1:nc) = beta*y(i,1:nc)
        end do
      end if

      if (alpha == cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) +  conjg(val(j))*x(i,1:nc)
          end do
        enddo

      else if (alpha == -cone) then

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) -  conjg(val(j))*x(i,1:nc)
          end do
        enddo

      else

        do i=1,n
          do j=irp(i), irp(i+1)-1
            ir = ja(j)
            y(ir,1:nc) = y(ir,1:nc) + alpha*conjg(val(j))*x(i,1:nc)
          end do
        enddo

      end if

    endif

    if (is_unit) then
      do i=1, min(m,n)
        y(i,1:nc) = y(i,1:nc) + alpha*x(i,1:nc)
      end do
    end if

  end subroutine psb_c_csr_csmm_inner

end subroutine psb_c_csr_csmm


subroutine psb_c_csr_cssv(alpha,a,x,beta,y,info,trans)
  use psb_error_mod
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_cssv
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(in)          :: alpha, beta, x(:)
  complex(psb_spk_), intent(inout)       :: y(:)
  integer(psb_ipk_), intent(out)                :: info
  character, optional, intent(in)     :: trans

  character :: trans_
  integer(psb_ipk_) :: i,k,m
  complex(psb_spk_), allocatable :: tmp(:)
  logical   :: tra,ctra
  integer(psb_ipk_) :: err_act
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='c_csr_cssv'
  logical, parameter :: debug=.false.

  info = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()
  if (present(trans)) then
    trans_ = trans
  else
    trans_ = 'N'
  end if
  if (.not.a%is_asb()) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  endif

  tra  = (psb_toupper(trans_) == 'T')
  ctra = (psb_toupper(trans_) == 'C')
  m = a%get_nrows()

  if (.not. (a%is_triangle())) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  end if

  if (size(x)<m) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 3; ierr(2) = size(x,1); ierr(3) = m;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if (size(y)<m) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 5; ierr(2) = size(y,1); ierr(3) =m;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if (alpha == czero) then
    if (beta == czero) then
      do i = 1, m
        y(i) = czero
      enddo
    else
      do  i = 1, m
        y(i) = beta*y(i)
      end do
    endif
    return
  end if

  if (beta == czero) then

    call inner_csrsv(tra,ctra,a%is_lower(),a%is_unit(),a%get_nrows(),&
         & a%irp,a%ja,a%val,x,y)
    if (alpha == cone) then
      ! do nothing
    else if (alpha == -cone) then
      do  i = 1, m
        y(i) = -y(i)
      end do
    else
      do  i = 1, m
        y(i) = alpha*y(i)
      end do
    end if
  else
    allocate(tmp(m), stat=info)
    if (info /= psb_success_) then
      return
    end if

    call inner_csrsv(tra,ctra,a%is_lower(),a%is_unit(),a%get_nrows(),&
         & a%irp,a%ja,a%val,x,tmp)

    call psb_geaxpby(m,alpha,tmp,beta,y,info)

  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine inner_csrsv(tra,ctra,lower,unit,n,irp,ja,val,x,y)
    implicit none
    logical, intent(in)            :: tra,ctra,lower,unit
    integer(psb_ipk_), intent(in)            :: irp(*), ja(*),n
    complex(psb_spk_), intent(in)  :: val(*)
    complex(psb_spk_), intent(in)  :: x(*)
    complex(psb_spk_), intent(out) :: y(*)

    integer(psb_ipk_) :: i,j, jc
    complex(psb_spk_) :: acc

    if ((.not.tra).and.(.not.ctra)) then

      if (lower) then
        if (unit) then
          do i=1, n
            acc = czero
            do j=irp(i), irp(i+1)-1
              acc = acc + val(j)*y(ja(j))
            end do
            y(i) = x(i) - acc
          end do
        else if (.not.unit) then
          do i=1, n
            acc = czero
            do j=irp(i), irp(i+1)-2
              acc = acc + val(j)*y(ja(j))
            end do
            y(i) = (x(i) - acc)/val(irp(i+1)-1)
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=n, 1, -1
            acc = czero
            do j=irp(i), irp(i+1)-1
              acc = acc + val(j)*y(ja(j))
            end do
            y(i) = x(i) - acc
          end do
        else if (.not.unit) then
          do i=n, 1, -1
            acc = czero
            do j=irp(i)+1, irp(i+1)-1
              acc = acc + val(j)*y(ja(j))
            end do
            y(i) = (x(i) - acc)/val(irp(i))
          end do
        end if

      end if

    else if (tra) then

      do i=1, n
        y(i) = x(i)
      end do

      if (lower) then
        if (unit) then
          do i=n, 1, -1
            acc = y(i)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - val(j)*acc
            end do
          end do
        else if (.not.unit) then
          do i=n, 1, -1
            y(i) = y(i)/val(irp(i+1)-1)
            acc  = y(i)
            do j=irp(i), irp(i+1)-2
              jc    = ja(j)
              y(jc) = y(jc) - val(j)*acc
            end do
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=1, n
            acc  = y(i)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - val(j)*acc
            end do
          end do
        else if (.not.unit) then
          do i=1, n
            y(i) = y(i)/val(irp(i))
            acc  = y(i)
            do j=irp(i)+1, irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - val(j)*acc
            end do
          end do
        end if

      end if

    else if (ctra) then

      do i=1, n
        y(i) = x(i)
      end do

      if (lower) then
        if (unit) then
          do i=n, 1, -1
            acc = y(i)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - conjg(val(j))*acc
            end do
          end do
        else if (.not.unit) then
          do i=n, 1, -1
            y(i) = y(i)/conjg(val(irp(i+1)-1))
            acc  = y(i)
            do j=irp(i), irp(i+1)-2
              jc    = ja(j)
              y(jc) = y(jc) - conjg(val(j))*acc
            end do
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=1, n
            acc  = y(i)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - conjg(val(j))*acc
            end do
          end do
        else if (.not.unit) then
          do i=1, n
            y(i) = y(i)/conjg(val(irp(i)))
            acc  = y(i)
            do j=irp(i)+1, irp(i+1)-1
              jc    = ja(j)
              y(jc) = y(jc) - conjg(val(j))*acc
            end do
          end do
        end if

      end if
    end if
  end subroutine inner_csrsv

end subroutine psb_c_csr_cssv



subroutine psb_c_csr_cssm(alpha,a,x,beta,y,info,trans)
  use psb_error_mod
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_cssm
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(in)          :: alpha, beta, x(:,:)
  complex(psb_spk_), intent(inout)       :: y(:,:)
  integer(psb_ipk_), intent(out)                :: info
  character, optional, intent(in)     :: trans

  character :: trans_
  integer(psb_ipk_) :: i,k,m, nc
  complex(psb_spk_), allocatable :: tmp(:,:)
  logical   :: tra, ctra
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='c_csr_cssm'
  logical, parameter :: debug=.false.

  info = psb_success_
  call psb_erractionsave(err_act)

  if (a%is_dev())   call a%sync()
  if (present(trans)) then
    trans_ = trans
  else
    trans_ = 'N'
  end if
  if (.not.a%is_asb()) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  endif


  tra  = (psb_toupper(trans_) == 'T')
  ctra = (psb_toupper(trans_) == 'C')

  m   = a%get_nrows()
  nc  = min(size(x,2) , size(y,2))

  if (.not. (a%is_triangle())) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  end if


  if (alpha == czero) then
    if (beta == czero) then
      do i = 1, m
        y(i,:) = czero
      enddo
    else
      do  i = 1, m
        y(i,:) = beta*y(i,:)
      end do
    endif
    return
  end if

  if (beta == czero) then
    call inner_csrsm(tra,ctra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,&
         & a%irp,a%ja,a%val,x,size(x,1,kind=psb_ipk_),y,size(y,1,kind=psb_ipk_),info)
    do  i = 1, m
      y(i,1:nc) = alpha*y(i,1:nc)
    end do
  else
    allocate(tmp(m,nc), stat=info)
    if(info /= psb_success_) then
      info=psb_err_from_subroutine_
      call psb_errpush(info,name,a_err='allocate')
      goto 9999
    end if

    call inner_csrsm(tra,ctra,a%is_lower(),a%is_unit(),a%get_nrows(),nc,&
         & a%irp,a%ja,a%val,x,size(x,1,kind=psb_ipk_),tmp,size(tmp,1,kind=psb_ipk_),info)
    do  i = 1, m
      y(i,1:nc) = alpha*tmp(i,1:nc) + beta*y(i,1:nc)
    end do
  end if

  if(info /= psb_success_) then
    info=psb_err_from_subroutine_
    call psb_errpush(info,name,a_err='inner_csrsm')
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return


9999 call psb_error_handler(err_act)

  return


contains

  subroutine inner_csrsm(tra,ctra,lower,unit,nr,nc,&
       & irp,ja,val,x,ldx,y,ldy,info)
    implicit none
    logical, intent(in)              :: tra,ctra,lower,unit
    integer(psb_ipk_), intent(in)              :: nr,nc,ldx,ldy,irp(*),ja(*)
    complex(psb_spk_), intent(in)    :: val(*), x(ldx,*)
    complex(psb_spk_), intent(out)   :: y(ldy,*)
    integer(psb_ipk_), intent(out)             :: info
    integer(psb_ipk_) :: i,j, jc
    complex(psb_spk_), allocatable  :: acc(:)

    info = psb_success_
    allocate(acc(nc), stat=info)
    if(info /= psb_success_) then
      info=psb_err_from_subroutine_
      return
    end if


    if ((.not.tra).and.(.not.ctra)) then
      if (lower) then
        if (unit) then
          do i=1, nr
            acc = czero
            do j=irp(i), irp(i+1)-1
              acc = acc + val(j)*y(ja(j),1:nc)
            end do
            y(i,1:nc) = x(i,1:nc) - acc
          end do
        else if (.not.unit) then
          do i=1, nr
            acc = czero
            do j=irp(i), irp(i+1)-2
              acc = acc + val(j)*y(ja(j),1:nc)
            end do
            y(i,1:nc) = (x(i,1:nc) - acc)/val(irp(i+1)-1)
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=nr, 1, -1
            acc = czero
            do j=irp(i), irp(i+1)-1
              acc = acc + val(j)*y(ja(j),1:nc)
            end do
            y(i,1:nc) = x(i,1:nc) - acc
          end do
        else if (.not.unit) then
          do i=nr, 1, -1
            acc = czero
            do j=irp(i)+1, irp(i+1)-1
              acc = acc + val(j)*y(ja(j),1:nc)
            end do
            y(i,1:nc) = (x(i,1:nc) - acc)/val(irp(i))
          end do
        end if

      end if

    else if (tra) then

      do i=1, nr
        y(i,1:nc) = x(i,1:nc)
      end do

      if (lower) then
        if (unit) then
          do i=nr, 1, -1
            acc = y(i,1:nc)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - val(j)*acc
            end do
          end do
        else if (.not.unit) then
          do i=nr, 1, -1
            y(i,1:nc) = y(i,1:nc)/val(irp(i+1)-1)
            acc  = y(i,1:nc)
            do j=irp(i), irp(i+1)-2
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - val(j)*acc
            end do
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=1, nr
            acc  = y(i,1:nc)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - val(j)*acc
            end do
          end do
        else if (.not.unit) then
          do i=1, nr
            y(i,1:nc) = y(i,1:nc)/val(irp(i))
            acc    = y(i,1:nc)
            do j=irp(i)+1, irp(i+1)-1
              jc      = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - val(j)*acc
            end do
          end do
        end if

      end if

    else if (ctra) then

      do i=1, nr
        y(i,1:nc) = x(i,1:nc)
      end do

      if (lower) then
        if (unit) then
          do i=nr, 1, -1
            acc = y(i,1:nc)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - conjg(val(j))*acc
            end do
          end do
        else if (.not.unit) then
          do i=nr, 1, -1
            y(i,1:nc) = y(i,1:nc)/conjg(val(irp(i+1)-1))
            acc  = y(i,1:nc)
            do j=irp(i), irp(i+1)-2
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - conjg(val(j))*acc
            end do
          end do
        end if
      else if (.not.lower) then

        if (unit) then
          do i=1, nr
            acc  = y(i,1:nc)
            do j=irp(i), irp(i+1)-1
              jc    = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - conjg(val(j))*acc
            end do
          end do
        else if (.not.unit) then
          do i=1, nr
            y(i,1:nc) = y(i,1:nc)/conjg(val(irp(i)))
            acc    = y(i,1:nc)
            do j=irp(i)+1, irp(i+1)-1
              jc      = ja(j)
              y(jc,1:nc) = y(jc,1:nc) - conjg(val(j))*acc
            end do
          end do
        end if

      end if
    end if
  end subroutine inner_csrsm

end subroutine psb_c_csr_cssm

function psb_c_csr_maxval(a) result(res)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_maxval
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  real(psb_spk_)         :: res

  integer(psb_ipk_) :: nnz, nc
  integer(psb_ipk_) :: info
  character(len=20)  :: name='c_csr_maxval'
  logical, parameter :: debug=.false.

  if (a%is_dev())   call a%sync()

  res = szero
  nnz = a%get_nzeros()
  if (allocated(a%val)) then
    nnz = min(nnz,size(a%val))
    res = maxval(abs(a%val(1:nnz)))
  end if
end function psb_c_csr_maxval

function psb_c_csr_csnmi(a) result(res)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csnmi
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  real(psb_spk_)         :: res

  integer(psb_ipk_) :: i,j
  real(psb_spk_) :: acc
  logical   :: tra
  character(len=20)  :: name='c_csnmi'
  logical, parameter :: debug=.false.


  res = szero
  if (a%is_dev())   call a%sync()

  !$omp parallel do private(i,j,acc)  reduction(max: res)
  do i = 1, a%get_nrows()
    acc = szero
    do j=a%irp(i),a%irp(i+1)-1
      acc = acc + abs(a%val(j))
    end do
    res = max(res,acc)
  end do

end function psb_c_csr_csnmi

subroutine psb_c_csr_rowsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_rowsum
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)             :: d(:)

  integer(psb_ipk_) :: i,j,k,m
  integer(psb_ipk_) :: err_act, info
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='rowsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  if (size(d) < m) then
    info=psb_err_input_asize_small_i_
    ierr(1) = 1; ierr(2) = size(d); ierr(3) = m
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  do i = 1, a%get_nrows()
    d(i) = czero
    do j=a%irp(i),a%irp(i+1)-1
      d(i) = d(i) + (a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, m
      d(i) = d(i) + cone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_rowsum

subroutine psb_c_csr_arwsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_arwsum
  class(psb_c_csr_sparse_mat), intent(in) :: a
  real(psb_spk_), intent(out)              :: d(:)

  integer(psb_ipk_) :: i,j,m
  logical   :: tra
  integer(psb_ipk_) :: err_act, info
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='rowsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  if (size(d) < m) then
    info=psb_err_input_asize_small_i_
    ierr(1) = 1; ierr(2) = size(d); ierr(3) = m
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if


  do i = 1, a%get_nrows()
    d(i) = szero
    do j=a%irp(i),a%irp(i+1)-1
      d(i) = d(i) + abs(a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, m
      d(i) = d(i) + sone
    end do
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_arwsum

subroutine psb_c_csr_colsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_colsum
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)              :: d(:)

  integer(psb_ipk_) :: i,j,k,m,n
  integer(psb_ipk_) :: err_act, info
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='colsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  n = a%get_ncols()
  if (size(d) < n) then
    info=psb_err_input_asize_small_i_
    ierr(1) = 1; ierr(2) = size(d); ierr(3) = n
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  d   = czero

  do i=1, m
    do j=a%irp(i),a%irp(i+1)-1
      k = a%ja(j)
      d(k) = d(k) + (a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, n
      d(i) = d(i) + cone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_colsum

subroutine psb_c_csr_aclsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_aclsum
  class(psb_c_csr_sparse_mat), intent(in) :: a
  real(psb_spk_), intent(out)              :: d(:)

  integer(psb_ipk_) :: i,j,k,m,n, nnz
  integer(psb_ipk_) :: err_act, info
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='aclsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  n = a%get_ncols()
  if (size(d) < n) then
    info=psb_err_input_asize_small_i_
    ierr(1) = 1; ierr(2) = size(d); ierr(3) = n
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  d   = szero

  do i=1, m
    do j=a%irp(i),a%irp(i+1)-1
      k = a%ja(j)
      d(k) = d(k) + abs(a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, n
      d(i) = d(i) + sone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_aclsum

subroutine psb_c_csr_get_diag(a,d,info)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_get_diag
  implicit none
  class(psb_c_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)     :: d(:)
  integer(psb_ipk_), intent(out)            :: info

  integer(psb_ipk_) :: err_act, mnm, i, j, k
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='get_diag'
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  mnm = min(a%get_nrows(),a%get_ncols())
  if (size(d) < mnm) then
    info=psb_err_input_asize_invalid_i_
    ierr(1) = 2; ierr(2) = size(d);
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if


  if (a%is_unit()) then
    !$omp parallel do private(i)
    do i=1, mnm
      d(i) = cone
    end do
  else
    !$omp parallel do private(i,j,k)
    do i=1, mnm
      d(i) = czero
      do k=a%irp(i),a%irp(i+1)-1
        j=a%ja(k)
        if ((j == i) .and.(j <= mnm )) then
          d(i) = a%val(k)
        endif
      enddo
    end do
  end if
  !$omp parallel do private(i)
  do i=mnm+1,size(d)
    d(i) = czero
  end do

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_c_csr_get_diag


subroutine psb_c_csr_scal(d,a,info,side)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_scal
  use psb_string_mod
  implicit none
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: d(:)
  integer(psb_ipk_), intent(out)            :: info
  character, intent(in), optional :: side

  integer(psb_ipk_) :: err_act, i, j, m
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='scal'
  character :: side_
  logical   :: left
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  if (a%is_unit()) then
    call a%make_nonunit()
  end if

  side_ = 'L'
  if (present(side)) then
    side_ = psb_toupper(side)
  end if

  left = (side_ == 'L')

  if (left) then
    m = a%get_nrows()
    if (size(d) < m) then
      info=psb_err_input_asize_invalid_i_
      ierr(1) = 2; ierr(2) = size(d);
      call psb_errpush(info,name,i_err=ierr)
      goto 9999
    end if

    !$omp parallel do private(i,j)
    do i=1, m
      do j = a%irp(i), a%irp(i+1) -1
        a%val(j) = a%val(j) * d(i)
      end do
    enddo
  else
    m = a%get_ncols()
    if (size(d) < m) then
      info=psb_err_input_asize_invalid_i_
      ierr(1) = 2; ierr(2) = size(d);
      call psb_errpush(info,name,i_err=ierr)
      goto 9999
    end if

    !$omp parallel do private(i,j)
    do i=1,a%get_nzeros()
      j        = a%ja(i)
      a%val(i) = a%val(i) * d(j)
    enddo
  end if

  call a%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_c_csr_scal


subroutine psb_c_csr_scals(d,a,info)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_scals
  implicit none
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: d
  integer(psb_ipk_), intent(out)            :: info

  integer(psb_ipk_) :: err_act, i
  character(len=20)  :: name='scal'
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  if (a%is_unit()) then
    call a%make_nonunit()
  end if

  !$omp parallel do private(i)
  do i=1,a%get_nzeros()
    a%val(i) = a%val(i) * d
  enddo
  call a%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_c_csr_scals




! == ===================================
!
!
!
! Data management
!
!
!
!
!
! == ===================================


subroutine  psb_c_csr_reallocate_nz(nz,a)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_reallocate_nz
  implicit none
  integer(psb_ipk_), intent(in) :: nz
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_) :: err_act, info
  character(len=20)  :: name='c_csr_reallocate_nz'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)

  call psb_realloc(max(nz,ione),a%ja,info)
  if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info)
  if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info)
  if (info /= psb_success_) then
    call psb_errpush(psb_err_alloc_dealloc_,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_reallocate_nz

subroutine psb_c_csr_mold(a,b,info)
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_mold
  use psb_error_mod
  implicit none
  class(psb_c_csr_sparse_mat), intent(in)                  :: a
  class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
  integer(psb_ipk_), intent(out)                    :: info
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='csr_mold'
  logical, parameter :: debug=.false.

  call psb_get_erraction(err_act)

  info = 0
  if (allocated(b)) then
    call b%free()
    deallocate(b,stat=info)
  end if
  if (info == 0) allocate(psb_c_csr_sparse_mat :: b, stat=info)

  if (info /= 0) then
    info = psb_err_alloc_dealloc_
    call psb_errpush(info, name)
    goto 9999
  end if
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_c_csr_mold

subroutine  psb_c_csr_allocate_mnnz(m,n,a,nz)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_allocate_mnnz
  implicit none
  integer(psb_ipk_), intent(in) :: m,n
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_), intent(in), optional :: nz
  integer(psb_ipk_) :: err_act, info, nz_
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='allocate_mnz'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  if (m < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = ione; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif
  if (n < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = 2; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif
  if (present(nz)) then
    nz_ = max(nz,ione)
  else
    nz_ = max(7*m,7*n,ione)
  end if
  if (nz_ < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = 3; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif

  if (info == psb_success_) call psb_realloc(m+1,a%irp,info)
  if (info == psb_success_) call psb_realloc(nz_,a%ja,info)
  if (info == psb_success_) call psb_realloc(nz_,a%val,info)
  if (info == psb_success_) then
    a%irp=0
    call a%set_nrows(m)
    call a%set_ncols(n)
    call a%set_bld()
    call a%set_triangle(.false.)
    call a%set_unit(.false.)
    call a%set_dupl(psb_dupl_def_)
    call a%set_host()
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_allocate_mnnz


subroutine psb_c_csr_csgetptn(imin,imax,a,nz,ia,ja,info,&
     & jmin,jmax,iren,append,nzin,rscale,cscale)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csgetptn
  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a
  integer(psb_ipk_), intent(in)                  :: imin,imax
  integer(psb_ipk_), intent(out)                 :: nz
  integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
  integer(psb_ipk_),intent(out)                  :: info
  logical, intent(in), optional        :: append
  integer(psb_ipk_), intent(in), optional        :: iren(:)
  integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
  logical, intent(in), optional        :: rscale,cscale

  logical :: append_, rscale_, cscale_
  integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i
  character(len=20)  :: name='csget'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()
  info = psb_success_
  nz = 0

  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  endif
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  endif

  if ((imax<imin).or.(jmax_<jmin_)) return

  if (present(append)) then
    append_=append
  else
    append_=.false.
  endif
  if ((append_).and.(present(nzin))) then
    nzin_ = nzin
  else
    nzin_ = 0
  endif
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .false.
  endif
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .false.
  endif
  if ((rscale_.or.cscale_).and.(present(iren))) then
    info = psb_err_many_optional_arg_
    call psb_errpush(info,name,a_err='iren (rscale.or.cscale)')
    goto 9999
  end if

  call csr_getptn(imin,imax,jmin_,jmax_,a,nz,ia,ja,nzin_,append_,info,iren)

  if (rscale_) then
    do i=nzin_+1, nzin_+nz
      ia(i) = ia(i) - imin + 1
    end do
  end if
  if (cscale_) then
    do i=nzin_+1, nzin_+nz
      ja(i) = ja(i) - jmin_ + 1
    end do
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_getptn(imin,imax,jmin,jmax,a,nz,ia,ja,nzin,append,info,&
       & iren)

    use psb_const_mod
    use psb_error_mod
    use psb_realloc_mod
    use psb_sort_mod
    implicit none

    class(psb_c_csr_sparse_mat), intent(in)    :: a
    integer(psb_ipk_) :: imin,imax,jmin,jmax
    integer(psb_ipk_), intent(inout)               :: nz
    integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
    integer(psb_ipk_), intent(in)                  :: nzin
    logical, intent(in)                  :: append
    integer(psb_ipk_) :: info
    integer(psb_ipk_), optional                    :: iren(:)
    integer(psb_ipk_) :: nzin_, nza,i,j, nzt, irw, lrw, icl,lcl
    integer(psb_ipk_) :: debug_level, debug_unit
    character(len=20) :: name='csr_getptn'

    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    nza = a%get_nzeros()
    irw = imin
    lrw = min(imax,a%get_nrows())
    icl = jmin
    lcl = min(jmax,a%get_ncols())
    if (irw<0) then
      info = psb_err_pivot_too_small_
      return
    end if

    if (append) then
      nzin_ = nzin
    else
      nzin_ = 0
    endif
    !
    ! This is a row-oriented routine, so the following is a
    ! good choice.
    !
    nzt = (a%irp(lrw+1)-a%irp(irw))
    nz = 0

    call psb_ensure_size(nzin_+nzt,ia,info)
    if (info == psb_success_) call psb_ensure_size(nzin_+nzt,ja,info)

    if (info /= psb_success_) return

    if (present(iren)) then
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            ia(nzin_)  = iren(i)
            ja(nzin_)  = iren(a%ja(j))
          end if
        enddo
      end do
    else
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            ia(nzin_)  = (i)
            ja(nzin_)  = (a%ja(j))
          end if
        enddo
      end do
    end if

  end subroutine csr_getptn

end subroutine psb_c_csr_csgetptn


subroutine psb_c_csr_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
     & jmin,jmax,iren,append,nzin,rscale,cscale,chksz)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csgetrow
  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a
  integer(psb_ipk_), intent(in)                  :: imin,imax
  integer(psb_ipk_), intent(out)                 :: nz
  integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
  complex(psb_spk_), allocatable,  intent(inout)    :: val(:)
  integer(psb_ipk_),intent(out)                  :: info
  logical, intent(in), optional        :: append
  integer(psb_ipk_), intent(in), optional        :: iren(:)
  integer(psb_ipk_), intent(in), optional        :: jmin,jmax, nzin
  logical, intent(in), optional        :: rscale,cscale,chksz

  logical :: append_, rscale_, cscale_, chksz_
  integer(psb_ipk_) :: nzin_, jmin_, jmax_, err_act, i
  character(len=20)  :: name='csget'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()
  info = psb_success_
  nz = 0

  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  endif
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  endif

  if ((imax<imin).or.(jmax_<jmin_)) return

  if (present(append)) then
    append_=append
  else
    append_=.false.
  endif
  if ((append_).and.(present(nzin))) then
    nzin_ = nzin
  else
    nzin_ = 0
  endif
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .false.
  endif
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .false.
  endif
  if (present(chksz)) then
    chksz_ = chksz
  else
    chksz_ = .true.
  endif
  if ((rscale_.or.cscale_).and.(present(iren))) then
    info = psb_err_many_optional_arg_
    call psb_errpush(info,name,a_err='iren (rscale.or.cscale)')
    goto 9999
  end if

  call csr_getrow(imin,imax,jmin_,jmax_,a,nz,ia,ja,val,nzin_,append_,chksz_,info,&
       & iren)

  if (rscale_) then
    do i=nzin_+1, nzin_+nz
      ia(i) = ia(i) - imin + 1
    end do
  end if
  if (cscale_) then
    do i=nzin_+1, nzin_+nz
      ja(i) = ja(i) - jmin_ + 1
    end do
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_getrow(imin,imax,jmin,jmax,a,nz,ia,ja,val,nzin,append,chksz,info,&
       & iren)

    use psb_const_mod
    use psb_error_mod
    use psb_realloc_mod
    use psb_sort_mod
    implicit none

    class(psb_c_csr_sparse_mat), intent(in)    :: a
    integer(psb_ipk_) :: imin,imax,jmin,jmax
    integer(psb_ipk_), intent(inout)               :: nz
    integer(psb_ipk_), allocatable, intent(inout)  :: ia(:), ja(:)
    complex(psb_spk_), allocatable,  intent(inout)    :: val(:)
    integer(psb_ipk_), intent(in)                  :: nzin
    logical, intent(in)                  :: append, chksz
    integer(psb_ipk_) :: info
    integer(psb_ipk_), optional                    :: iren(:)
    integer(psb_ipk_) :: nzin_, nza,i,j, nzt, irw, lrw, icl,lcl
    integer(psb_ipk_) :: debug_level, debug_unit
    character(len=20) :: name='coo_getrow'

    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    nza = a%get_nzeros()
    irw = imin
    lrw = min(imax,a%get_nrows())
    icl = jmin
    lcl = min(jmax,a%get_ncols())
    if (irw<0) then
      info = psb_err_pivot_too_small_
      return
    end if

    if (append) then
      nzin_ = nzin
    else
      nzin_ = 0
    endif

    !
    ! This is a row-oriented routine, so the following is a
    ! good choice.
    !
    nzt = (a%irp(lrw+1)-a%irp(irw))
    nz = 0

    if (chksz) then
      call psb_ensure_size(nzin_+nzt,ia,info)
      if (info == psb_success_) call psb_ensure_size(nzin_+nzt,ja,info)
      if (info == psb_success_) call psb_ensure_size(nzin_+nzt,val,info)

      if (info /= psb_success_) return
    end if

    if (present(iren)) then
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            val(nzin_) = a%val(j)
            ia(nzin_)  = iren(i)
            ja(nzin_)  = iren(a%ja(j))
          end if
        enddo
      end do
    else
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            val(nzin_) = a%val(j)
            ia(nzin_)  = (i)
            ja(nzin_)  = (a%ja(j))
          end if
        enddo
      end do
    end if

  end subroutine csr_getrow

end subroutine psb_c_csr_csgetrow


!
! CSR implementation of tril/triu
!
subroutine psb_c_csr_tril(a,l,info,&
     & diag,imin,imax,jmin,jmax,rscale,cscale,u)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_tril
  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a
  class(psb_c_coo_sparse_mat), intent(out) :: l
  integer(psb_ipk_),intent(out)            :: info
  integer(psb_ipk_), intent(in), optional  :: diag,imin,imax,jmin,jmax
  logical, intent(in), optional            :: rscale,cscale
  class(psb_c_coo_sparse_mat), optional, intent(out) :: u

  integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
  integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
  character(len=20)  :: name='tril'
  logical :: rscale_, cscale_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (present(diag)) then
    diag_ = diag
  else
    diag_ = 0
  end if
  if (present(imin)) then
    imin_ = imin
  else
    imin_ = 1
  end if
  if (present(imax)) then
    imax_ = imax
  else
    imax_ = a%get_nrows()
  end if
  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  end if
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  end if
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .true.
  end if
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .true.
  end if

  if (rscale_) then
    mb = imax_ - imin_ +1
  else
    mb = imax_
  endif
  if (cscale_) then
    nb = jmax_ - jmin_ +1
  else
    nb = jmax_
  endif

#if defined(OPENMP)
  block
    integer(psb_ipk_), allocatable :: lrws(:),urws(:)
    integer(psb_ipk_)   ::  lpnt, upnt, lnz, unz
    call psb_realloc(mb,lrws,info)
    !$omp workshare
    lrws(:) = 0
    !$omp end workshare
    nz = a%get_nzeros()
    call l%allocate(mb,nb,nz)
    !write(0,*) 'Invocation of COO%TRIL', present(u),nz
    if (present(u)) then
      nzlin = l%get_nzeros() ! At this point it should be 0
      call u%allocate(mb,nb,nz)
      nzuin = u%get_nzeros() ! At this point it should be 0
      if (info == 0) call psb_realloc(mb,urws,info)
      !$omp workshare
      urws(:) = 0
      !$omp end workshare
      !write(0,*) 'omp version of COO%TRIL/TRIU'
      lnz = 0
      unz = 0 
      !$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
      loop1: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<=diag_) then
              !$omp atomic update
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              !$omp end atomic
              lnz = lnz + 1
            else
              !$omp atomic update
              urws(i-imin_+1) = urws(i-imin_+1) +1
              !$omp end atomic
              unz = unz + 1
            end if
          end if
        end do
      end do loop1
      !$omp end parallel do

      call psi_exscan(mb,lrws,info)
      call psi_exscan(mb,urws,info) 
      !write(0,*) lrws(:), urws(:)
      !$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
      loop2: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<=diag_) then
              !$omp atomic capture
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              lpnt            = lrws(i-imin_+1)
              !$omp end atomic
              l%ia(lpnt)  = i
              l%ja(lpnt)  = a%ja(k)
              l%val(lpnt) = a%val(k)
            else
              !$omp atomic capture
              urws(i-imin_+1) = urws(i-imin_+1) +1
              upnt            = urws(i-imin_+1)
              !$omp end atomic
              u%ia(upnt)  = i
              u%ja(upnt)  = a%ja(k)
              u%val(upnt) = a%val(k)
            end if
          end if
        end do
      end do loop2
      !$omp end parallel do
      !write(0,*) 'End of copyout',lnz,unz
      call l%set_nzeros(lnz)      
      call l%fix(info)
      call u%set_nzeros(unz)
      call u%fix(info)
      nzout = u%get_nzeros()
      if (rscale_) then
        !$omp workshare
        u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
        !$omp end workshare
      end if
      if (cscale_) then
        !$omp workshare
        u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
        !$omp end workshare
      end if
      if ((diag_ >=-1).and.(imin_ == jmin_)) then
        call u%set_triangle(.true.)
        call u%set_lower(.false.)
      end if
    else
      lnz = 0
      !$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws) reduction(+: lnz)
      loop3: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<=diag_) then
              !$omp atomic update
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              !$omp end atomic
              lnz = lnz + 1
            end if
          end if
        end do
      end do loop3
      !$omp end parallel do
      call psi_exscan(mb,lrws,info)
      !$omp parallel do private(i,j,k,lpnt) shared(imin_,imax_,a)
      loop4: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<=diag_) then
              !$omp atomic capture
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              lpnt            = lrws(i-imin_+1)
              !$omp end atomic
              l%ia(lpnt)  = i
              l%ja(lpnt)  = a%ja(k)
              l%val(lpnt) = a%val(k)
            end if
          end if
        end do
      end do loop4
      !$omp end parallel do
      call l%set_nzeros(lnz)      
      call l%fix(info)
    end if
    nzout = l%get_nzeros()
    if (rscale_) then
      !$omp workshare
      l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
      !$omp end workshare
    end if
    if (cscale_) then
      !$omp workshare
      l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
      !$omp end workshare
    end if

    if ((diag_ <= 0).and.(imin_ == jmin_)) then
      call l%set_triangle(.true.)
      call l%set_lower(.true.)
    end if
  end block
#else
  nz = a%get_nzeros()
  call l%allocate(mb,nb,nz)

  if (present(u)) then
    nzlin = l%get_nzeros() ! At this point it should be 0
    call u%allocate(mb,nb,nz)
    nzuin = u%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          j = ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<=diag_) then
              nzlin = nzlin + 1
              l%ia(nzlin)  = i
              l%ja(nzlin)  = ja(k)
              l%val(nzlin) = val(k)
            else
              nzuin = nzuin + 1
              u%ia(nzuin)  = i
              u%ja(nzuin)  = ja(k)
              u%val(nzuin) = val(k)
            end if
          end if
        end do
      end do
    end associate

    call l%set_nzeros(nzlin)
    call u%set_nzeros(nzuin)
    call u%fix(info)
    nzout = u%get_nzeros()
    if (rscale_) &
         & u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
    if (cscale_) &
         & u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
    if ((diag_ >=-1).and.(imin_ == jmin_)) then
      call u%set_triangle(.true.)
      call u%set_lower(.false.)
    end if
  else
    nzin = l%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<=diag_) then
              nzin = nzin + 1
              l%ia(nzin)  = i
              l%ja(nzin)  = ja(k)
              l%val(nzin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call l%set_nzeros(nzin)
  end if
  call l%fix(info)
  nzout = l%get_nzeros()
  if (rscale_) &
       & l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
  if (cscale_) &
       & l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1

  if ((diag_ <= 0).and.(imin_ == jmin_)) then
    call l%set_triangle(.true.)
    call l%set_lower(.true.)
  end if
#endif
  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_tril

subroutine psb_c_csr_triu(a,u,info,&
     & diag,imin,imax,jmin,jmax,rscale,cscale,l)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_triu
  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a
  class(psb_c_coo_sparse_mat), intent(out) :: u
  integer(psb_ipk_),intent(out)            :: info
  integer(psb_ipk_), intent(in), optional  :: diag,imin,imax,jmin,jmax
  logical, intent(in), optional            :: rscale,cscale
  class(psb_c_coo_sparse_mat), optional, intent(out) :: l

  integer(psb_ipk_) :: err_act, nzin, nzout, i, j, k
  integer(psb_ipk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
  character(len=20)  :: name='triu'
  logical :: rscale_, cscale_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (present(diag)) then
    diag_ = diag
  else
    diag_ = 0
  end if
  if (present(imin)) then
    imin_ = imin
  else
    imin_ = 1
  end if
  if (present(imax)) then
    imax_ = imax
  else
    imax_ = a%get_nrows()
  end if
  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  end if
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  end if
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .true.
  end if
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .true.
  end if

  if (rscale_) then
    mb = imax_ - imin_ +1
  else
    mb = imax_
  endif
  if (cscale_) then
    nb = jmax_ - jmin_ +1
  else
    nb = jmax_
  endif


#if defined(OPENMP)
  block
    integer(psb_ipk_), allocatable :: lrws(:),urws(:)
    integer(psb_ipk_)   ::  lpnt, upnt, lnz, unz
    call psb_realloc(mb,urws,info)
    !$omp workshare
    urws(:) = 0
    !$omp end workshare
    nz = a%get_nzeros()
    call u%allocate(mb,nb,nz)
    !write(0,*) 'Invocation of COO%TRIL', present(u),nz
    if (present(l)) then
      nzuin = u%get_nzeros() ! At this point it should be 0
      call l%allocate(mb,nb,nz)
      nzlin = l%get_nzeros() ! At this point it should be 0
      if (info == 0) call psb_realloc(mb,urws,info)
      !$omp workshare
      lrws(:) = 0
      !$omp end workshare
      !write(0,*) 'omp version of COO%TRIL/TRIU'
      lnz = 0
      unz = 0 
      !$omp parallel do private(i,j,k) shared(imin_,imax_,a,lrws,urws) reduction(+: lnz,unz)
      loop1: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<diag_) then
              !$omp atomic update
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              !$omp end atomic
              lnz = lnz + 1
            else
              !$omp atomic update
              urws(i-imin_+1) = urws(i-imin_+1) +1
              !$omp end atomic
              unz = unz + 1
            end if
          end if
        end do
      end do loop1
      !$omp end parallel do

      call psi_exscan(mb,lrws,info)
      call psi_exscan(mb,urws,info) 
      !write(0,*) lrws(:), urws(:)
      !$omp parallel do private(i,j,k,lpnt,upnt) shared(imin_,imax_,a)
      loop2: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)<diag_) then
              !$omp atomic capture
              lrws(i-imin_+1) = lrws(i-imin_+1) +1
              lpnt            = lrws(i-imin_+1)
              !$omp end atomic
              l%ia(lpnt)  = i
              l%ja(lpnt)  = a%ja(k)
              l%val(lpnt) = a%val(k)
            else
              !$omp atomic capture
              urws(i-imin_+1) = urws(i-imin_+1) +1
              upnt            = urws(i-imin_+1)
              !$omp end atomic
              u%ia(upnt)  = i
              u%ja(upnt)  = a%ja(k)
              u%val(upnt) = a%val(k)
            end if
          end if
        end do
      end do loop2
      !$omp end parallel do
      !write(0,*) 'End of copyout',lnz,unz
      call l%set_nzeros(lnz)      
      call l%fix(info)
      call u%set_nzeros(unz)
      call u%fix(info)
      nzout = l%get_nzeros()
      if (rscale_) then
        !$omp workshare
        l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
        !$omp end workshare
      end if
      if (cscale_) then
        !$omp workshare
        l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
        !$omp end workshare
      end if
      if ((diag_ <=-1).and.(imin_ == jmin_)) then
        call l%set_triangle(.true.)
        call l%set_lower(.false.)
      end if
    else
      unz = 0
      !$omp parallel do private(i,j,k) shared(imin_,imax_,a,urws) reduction(+: unz)
      loop3: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)>=diag_) then
              !$omp atomic update
              urws(i-imin_+1) = urws(i-imin_+1) +1
              !$omp end atomic
              unz = unz + 1
            end if
          end if
        end do
      end do loop3
      !$omp end parallel do
      call psi_exscan(mb,urws,info)
      !$omp parallel do private(i,j,k,upnt) shared(imin_,imax_,a)
      loop4: do i=imin_,imax_
        do k = a%irp(i),a%irp(i+1)-1
          j = a%ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((j-i)>=diag_) then
              !$omp atomic capture
              urws(i-imin_+1) = urws(i-imin_+1) +1
              upnt            = urws(i-imin_+1)
              !$omp end atomic
              u%ia(upnt)  = i
              u%ja(upnt)  = a%ja(k)
              u%val(upnt) = a%val(k)
            end if
          end if
        end do
      end do loop4
      !$omp end parallel do
      call u%set_nzeros(unz)      
      call u%fix(info)
    end if
    nzout = u%get_nzeros()
    if (rscale_) then
      !$omp workshare
      u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
      !$omp end workshare
    end if
    if (cscale_) then
      !$omp workshare
      u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
      !$omp end workshare
    end if

    if ((diag_ >= 0).and.(imin_ == jmin_)) then
      call u%set_triangle(.true.)
      call u%set_upper(.true.)
    end if
  end block


#else

  nz = a%get_nzeros()
  call u%allocate(mb,nb,nz)

  if (present(l)) then
    nzuin = u%get_nzeros() ! At this point it should be 0
    call l%allocate(mb,nb,nz)
    nzlin = l%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          j = ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<diag_) then
              nzlin = nzlin + 1
              l%ia(nzlin)  = i
              l%ja(nzlin)  = ja(k)
              l%val(nzlin) = val(k)
            else
              nzuin = nzuin + 1
              u%ia(nzuin)  = i
              u%ja(nzuin)  = ja(k)
              u%val(nzuin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call u%set_nzeros(nzuin)
    call l%set_nzeros(nzlin)
    call l%fix(info)
    nzout = l%get_nzeros()
    if (rscale_) &
         & l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
    if (cscale_) &
         & l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
    if ((diag_ <=1).and.(imin_ == jmin_)) then
      call l%set_triangle(.true.)
      call l%set_lower(.true.)
    end if
  else
    nzin = u%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)>=diag_) then
              nzin = nzin + 1
              u%ia(nzin)  = i
              u%ja(nzin)  = ja(k)
              u%val(nzin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call u%set_nzeros(nzin)
  end if
  call u%fix(info)
  nzout = u%get_nzeros()
  if (rscale_) &
       & u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
  if (cscale_) &
       & u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1

  if ((diag_ >= 0).and.(imin_ == jmin_)) then
    call u%set_triangle(.true.)
    call u%set_upper(.true.)
  end if
#endif
  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_triu


subroutine psb_c_csr_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_csput_a
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: val(:)
  integer(psb_ipk_), intent(in)             :: nz, ia(:), ja(:), imin,imax,jmin,jmax
  integer(psb_ipk_), intent(out)            :: info

  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='c_csr_csput_a'
  logical, parameter :: debug=.false.
  integer(psb_ipk_) :: nza, i, debug_level, debug_unit


  call psb_erractionsave(err_act)
  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (nz <= 0) then
    info = psb_err_iarg_neg_; i=1
    call psb_errpush(info,name,i_err=(/i/))
    goto 9999
  end if
  if (size(ia) < nz) then
    info = psb_err_input_asize_invalid_i_; i=2
    call psb_errpush(info,name,i_err=(/i/))
    goto 9999
  end if

  if (size(ja) < nz) then
    info = psb_err_input_asize_invalid_i_; i=3
    call psb_errpush(info,name,i_err=(/i/))
    goto 9999
  end if
  if (size(val) < nz) then
    info = psb_err_input_asize_invalid_i_; i=4
    call psb_errpush(info,name,i_err=(/i/))
    goto 9999
  end if

  if (nz == 0) return
  if (a%is_dev())   call a%sync()

  nza  = a%get_nzeros()

  if (a%is_bld()) then
    ! Build phase should only ever be in COO
    info = psb_err_invalid_mat_state_

  else  if (a%is_upd()) then
    call  psb_c_csr_srch_upd(nz,ia,ja,val,a,&
         & imin,imax,jmin,jmax,info)

    if (info < 0) then
      info = psb_err_internal_error_
    else if (info > 0) then
      if (debug_level >= psb_debug_serial_) &
           & write(debug_unit,*) trim(name),&
           & ': Discarded entries not  belonging to us.'
      info = psb_success_
    end if
    call a%set_host()

  else
    ! State is wrong.
    info = psb_err_invalid_mat_state_
  end if
  if (info /= psb_success_) then
    call psb_errpush(info,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return


contains

  subroutine psb_c_csr_srch_upd(nz,ia,ja,val,a,&
       & imin,imax,jmin,jmax,info)

    use psb_const_mod
    use psb_realloc_mod
    use psb_string_mod
    use psb_sort_mod
    implicit none

    class(psb_c_csr_sparse_mat), intent(inout) :: a
    integer(psb_ipk_), intent(in) :: nz, imin,imax,jmin,jmax
    integer(psb_ipk_), intent(in) :: ia(:),ja(:)
    complex(psb_spk_), intent(in) :: val(:)
    integer(psb_ipk_), intent(out) :: info
    integer(psb_ipk_) :: i,ir,ic, ilr, ilc, ip, &
         & i1,i2,nr,nc,nnz,dupl
    integer(psb_ipk_) :: debug_level, debug_unit
    character(len=20)    :: name='c_csr_srch_upd'

    info = psb_success_
    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    dupl = a%get_dupl()

    if (.not.a%is_sorted()) then
      info = -4
      return
    end if

    ilr = -1
    ilc = -1
    nnz = a%get_nzeros()
    nr  = a%get_nrows()
    nc  = a%get_ncols()

    select case(dupl)
    case(psb_dupl_ovwrt_,psb_dupl_err_)
      ! Overwrite.
      ! Cannot test for error, should have been caught earlier.

      ilr = -1
      ilc = -1
      do i=1, nz
        ir = ia(i)
        ic = ja(i)

        if ((ir > 0).and.(ir <= nr)) then

          i1 = a%irp(ir)
          i2 = a%irp(ir+1)
          nc=i2-i1

          ip = psb_bsrch(ic,nc,a%ja(i1:i2-1))
          if (ip>0) then
            a%val(i1+ip-1) = val(i)
          else
            info = max(info,3)
          end if
        else
          info = max(info,2)
        end if
      end do

    case(psb_dupl_add_)
      ! Add
      ilr = -1
      ilc = -1
      do i=1, nz
        ir = ia(i)
        ic = ja(i)
        if ((ir > 0).and.(ir <= nr)) then
          i1 = a%irp(ir)
          i2 = a%irp(ir+1)
          nc = i2-i1
          ip = psb_bsrch(ic,nc,a%ja(i1:i2-1))
          if (ip>0) then
            a%val(i1+ip-1) = a%val(i1+ip-1) + val(i)
          else
            info = max(info,3)
          end if
        else
          info = max(info,2)
        end if
      end do

    case default
      info = -3
      if (debug_level >= psb_debug_serial_) &
           & write(debug_unit,*) trim(name),&
           & ': Duplicate handling: ',dupl
    end select

  end subroutine psb_c_csr_srch_upd

end subroutine psb_c_csr_csput_a


subroutine psb_c_csr_reinit(a,clear)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_reinit
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  logical, intent(in), optional :: clear

  integer(psb_ipk_) :: err_act, info
  character(len=20)  :: name='reinit'
  logical  :: clear_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (a%is_dev())   call a%sync()

  if (present(clear)) then
    clear_ = clear
  else
    clear_ = .true.
  end if

  if (a%is_bld() .or. a%is_upd()) then
    ! do nothing
  else if (a%is_asb()) then
    if (clear_) a%val(:) = czero
    call a%set_upd()
    call a%set_host()
  else
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_reinit

subroutine  psb_c_csr_trim(a)
  use psb_realloc_mod
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_trim
  implicit none
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_) :: err_act, info, nz, m
  character(len=20)  :: name='trim'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  m   = max(1_psb_ipk_,a%get_nrows())
  nz  = max(1_psb_ipk_,a%get_nzeros())
  if (info == psb_success_) call psb_realloc(m+1,a%irp,info)

  if (info == psb_success_) call psb_realloc(nz,a%ja,info)
  if (info == psb_success_) call psb_realloc(nz,a%val,info)

  if (info /= psb_success_) goto 9999
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_c_csr_trim

subroutine psb_c_csr_print(iout,a,iv,head,ivr,ivc)
  use psb_string_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_print
  implicit none

  integer(psb_ipk_), intent(in)               :: iout
  class(psb_c_csr_sparse_mat), intent(in) :: a
  integer(psb_lpk_), intent(in), optional     :: iv(:)
  character(len=*), optional        :: head
  integer(psb_lpk_), intent(in), optional     :: ivr(:), ivc(:)

  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='c_csr_print'
  logical, parameter :: debug=.false.
  character(len=80)                 :: frmt
  integer(psb_ipk_) :: i,j, nr, nc, nz


  write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general'
  if (present(head)) write(iout,'(a,a)') '% ',head
  write(iout,'(a)') '%'
  write(iout,'(a,a)') '% COO'

  if (a%is_dev())   call a%sync()

  nr = a%get_nrows()
  nc = a%get_ncols()
  nz = a%get_nzeros()
  frmt = psb_c_get_print_frmt(nr,nc,nz,iv,ivr,ivc)

  write(iout,*) nr, nc, nz
  if(present(iv)) then
    do i=1, nr
      do j=a%irp(i),a%irp(i+1)-1
        write(iout,frmt) iv(i),iv(a%ja(j)),a%val(j)
      end do
    enddo
  else
    if (present(ivr).and..not.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) ivr(i),(a%ja(j)),a%val(j)
        end do
      enddo
    else if (present(ivr).and.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) ivr(i),ivc(a%ja(j)),a%val(j)
        end do
      enddo
    else if (.not.present(ivr).and.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) (i),ivc(a%ja(j)),a%val(j)
        end do
      enddo
    else if (.not.present(ivr).and..not.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) (i),(a%ja(j)),a%val(j)
        end do
      enddo
    endif
  endif

end subroutine psb_c_csr_print


subroutine psb_c_cp_csr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_coo
#if defined(OPENMP)
  use omp_lib 
#endif  
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  class(psb_c_coo_sparse_mat), intent(in)    :: b
  integer(psb_ipk_), intent(out)               :: info

  type(psb_c_coo_sparse_mat)   :: tmp
  integer(psb_ipk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, nc, i,k,ip, ncl
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name='c_cp_csr_from_coo'
  logical :: use_openmp = .false.

  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (.not.b%is_by_rows()) then
    ! This is to have fix_coo called behind the scenes
    call tmp%cp_from_coo(b,info)
    if (info /= psb_success_) return

    nr  = tmp%get_nrows()
    nc  = tmp%get_ncols()
    nza = tmp%get_nzeros()

    a%psb_c_base_sparse_mat = tmp%psb_c_base_sparse_mat

    ! Dirty trick: call move_alloc to have the new data allocated just once.
    call move_alloc(tmp%ia,itemp)
    call move_alloc(tmp%ja,a%ja)
    call move_alloc(tmp%val,a%val)
    call psb_realloc(max(nr+1,nc+1),a%irp,info)
    call tmp%free()

  else

    if (info /= psb_success_) return
    if (b%is_dev())   call b%sync()

    nr  = b%get_nrows()
    nc  = b%get_ncols()
    nza = b%get_nzeros()

    a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat

    ! Dirty trick: call move_alloc to have the new data allocated just once.
    call psb_safe_ab_cpy(b%ia,itemp,info)
    if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
    if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
    if (info == psb_success_) call psb_realloc(max(nr+1,nc+1),a%irp,info)

  endif


#if defined(OPENMP)

  !$OMP PARALLEL default(shared) reduction(max:info)

  !$OMP WORKSHARE
  a%irp(:) = 0
  !$OMP END WORKSHARE

  !$OMP DO schedule(STATIC) &
  !$OMP private(k,i)
  do k=1,nza
    i = itemp(k)
    !$OMP ATOMIC UPDATE 
    a%irp(i) = a%irp(i) + 1
    !$OMP END ATOMIC
  end do
  !$OMP END DO
  call psi_exscan(nr+1,a%irp,info,shift=ione) 
  !$OMP END PARALLEL
#else
  a%irp(:) = 0
  do k=1,nza
    i = itemp(k)
    a%irp(i) = a%irp(i) + 1
  end do
  call psi_exscan(nr+1,a%irp,info,shift=ione) 
#endif

  call a%set_host()

end subroutine psb_c_cp_csr_from_coo



subroutine psb_c_cp_csr_to_coo(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_to_coo
  implicit none

  class(psb_c_csr_sparse_mat), intent(in)  :: a
  class(psb_c_coo_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                      :: info

  integer(psb_ipk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, nc,i,j,irw, err_act
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_

  if (a%is_dev())   call a%sync()
  nr  = a%get_nrows()
  nc  = a%get_ncols()
  nza = a%get_nzeros()

  call b%allocate(nr,nc,nza)
  b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat

  do i=1, nr
    do j=a%irp(i),a%irp(i+1)-1
      b%ia(j)  = i
      b%ja(j)  = a%ja(j)
      b%val(j) = a%val(j)
    end do
  end do
  call b%set_nzeros(a%get_nzeros())
  call b%set_sort_status(psb_row_major_)
  call b%set_asb()
  call b%set_host()

end subroutine psb_c_cp_csr_to_coo


subroutine psb_c_mv_csr_to_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_to_coo
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  class(psb_c_coo_sparse_mat), intent(inout)   :: b
  integer(psb_ipk_), intent(out)                        :: info

  integer(psb_ipk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, nc,i,j,k,irw, err_act
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_

  if (a%is_dev())   call a%sync()
  nr  = a%get_nrows()
  nc  = a%get_ncols()
  nza = a%get_nzeros()

  b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
  call b%set_nzeros(a%get_nzeros())
  call move_alloc(a%ja,b%ja)
  call move_alloc(a%val,b%val)
  call psb_realloc(nza,b%ia,info)
  if (info /= psb_success_) return
  do i=1, nr
    do j=a%irp(i),a%irp(i+1)-1
      b%ia(j)  = i
    end do
  end do
  call a%free()
  call b%set_sort_status(psb_row_major_)
  call b%set_asb()
  call b%set_host()

end subroutine psb_c_mv_csr_to_coo



subroutine psb_c_mv_csr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_coo
#if defined(OPENMP)
  use omp_lib 
#endif  
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  class(psb_c_coo_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                        :: info

  integer(psb_ipk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, nc, i,j,k, ip,irw, err_act, ncl
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name='mv_from_coo'

  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (b%is_dev())   call b%sync()

  if (.not.b%is_by_rows()) call b%fix(info)
  if (info /= psb_success_) return

  nr  = b%get_nrows()
  nc  = b%get_ncols()
  nza = b%get_nzeros()

  a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat

  ! Dirty trick: call move_alloc to have the new data allocated just once.
  call move_alloc(b%ia,itemp)
  call move_alloc(b%ja,a%ja)
  call move_alloc(b%val,a%val)
  call psb_realloc(max(nr+1,nc+1),a%irp,info)
  call b%free()

#if defined(OPENMP)

  !$OMP PARALLEL default(shared)  reduction(max:info)

  !$OMP WORKSHARE
  a%irp(:) = 0
  !$OMP END WORKSHARE

  !$OMP DO schedule(STATIC) &
  !$OMP private(k,i)
  do k=1,nza
    i = itemp(k)
    !$OMP ATOMIC UPDATE 
    a%irp(i) = a%irp(i) + 1
    !$OMP END ATOMIC
  end do
  !$OMP END DO
  call psi_exscan(nr+1,a%irp,info,shift=ione) 
  !$OMP END PARALLEL
#else
  a%irp(:) = 0
  do k=1,nza
    i = itemp(k)
    a%irp(i) = a%irp(i) + 1
  end do
  call psi_exscan(nr+1,a%irp,info,shift=ione) 
#endif

  call a%set_host()

end subroutine psb_c_mv_csr_from_coo


subroutine psb_c_mv_csr_to_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_to_fmt
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  class(psb_c_base_sparse_mat), intent(inout)  :: b
  integer(psb_ipk_), intent(out)                        :: info

  !locals
  type(psb_c_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, i,j,irw, err_act, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_c_coo_sparse_mat)
    call a%mv_to_coo(b,info)
    ! Need to fix trivial copies!
  type is (psb_c_csr_sparse_mat)
    if (a%is_dev())   call a%sync()
    b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
    call move_alloc(a%irp, b%irp)
    call move_alloc(a%ja,  b%ja)
    call move_alloc(a%val, b%val)
    call a%free()
    call b%set_host()

  class default
    call a%mv_to_coo(tmp,info)
    if (info == psb_success_) call b%mv_from_coo(tmp,info)
  end select

end subroutine psb_c_mv_csr_to_fmt


subroutine psb_c_cp_csr_to_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_to_fmt
  implicit none

  class(psb_c_csr_sparse_mat), intent(in)   :: a
  class(psb_c_base_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                       :: info

  !locals
  type(psb_c_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_ipk_) :: nz, nr, i,j,irw, err_act, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_


  select type (b)
  type is (psb_c_coo_sparse_mat)
    call a%cp_to_coo(b,info)

  type is (psb_c_csr_sparse_mat)
    if (a%is_dev())   call a%sync()
    b%psb_c_base_sparse_mat = a%psb_c_base_sparse_mat
    nr = a%get_nrows()
    nz = a%get_nzeros()
    if (.false.) then 
      if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
      if (info == 0) call psb_safe_cpy( a%ja(1:nz),    b%ja  , info)
      if (info == 0) call psb_safe_cpy( a%val(1:nz),   b%val , info)
    else
      ! Despite the implementation in safe_cpy, it seems better this way
      call psb_realloc(nr+1,b%irp,info)
      call psb_realloc(nz,b%ja,info)
      call psb_realloc(nz,b%val,info)
      !$omp parallel do private(i) schedule(static)
      do i=1,nr+1
        b%irp(i)=a%irp(i)
      end do
      !$omp end parallel do
      !$omp parallel do private(j) schedule(static)
      do j=1,nz  
        b%ja(j)  = a%ja(j)
        b%val(j) = a%val(j)
      end do
      !$omp end parallel do
    end if

    call b%set_host()

  class default
    call a%cp_to_coo(tmp,info)
    if (info == psb_success_) call b%mv_from_coo(tmp,info)
  end select

end subroutine psb_c_cp_csr_to_fmt


subroutine psb_c_mv_csr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_csr_from_fmt
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout)  :: a
  class(psb_c_base_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                         :: info

  !locals
  type(psb_c_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_ipk_) :: nza, nr, i,j,irw, err_act, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_c_coo_sparse_mat)
    call a%mv_from_coo(b,info)

  type is (psb_c_csr_sparse_mat)
    if (b%is_dev())   call b%sync()

    a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
    call move_alloc(b%irp, a%irp)
    call move_alloc(b%ja,  a%ja)
    call move_alloc(b%val, a%val)
    call b%free()
    call a%set_host()

  class default
    call b%mv_to_coo(tmp,info)
    if (info == psb_success_) call a%mv_from_coo(tmp,info)
  end select

end subroutine psb_c_mv_csr_from_fmt



subroutine psb_c_cp_csr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_csr_from_fmt
  implicit none

  class(psb_c_csr_sparse_mat), intent(inout) :: a
  class(psb_c_base_sparse_mat), intent(in)   :: b
  integer(psb_ipk_), intent(out)                        :: info

  !locals
  type(psb_c_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_ipk_) :: nz, nr, i,j,irw, err_act, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_c_coo_sparse_mat)
    call a%cp_from_coo(b,info)

  type is (psb_c_csr_sparse_mat)
    if (b%is_dev())   call b%sync()
    a%psb_c_base_sparse_mat = b%psb_c_base_sparse_mat
    nr = b%get_nrows()
    nz = b%get_nzeros()
    if (.false.) then 
      if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
      if (info == 0) call psb_safe_cpy( b%ja(1:nz)   , a%ja  , info)
      if (info == 0) call psb_safe_cpy( b%val(1:nz)  , a%val , info)
    else
      ! Despite the implementation in safe_cpy, it seems better this way
      call psb_realloc(nr+1,a%irp,info)
      call psb_realloc(nz,a%ja,info)
      call psb_realloc(nz,a%val,info)
      !$omp parallel do private(i) schedule(static)
      do i=1,nr+1
        a%irp(i)=b%irp(i)
      end do
      !$omp end parallel do
      !$omp parallel do private(j) schedule(static)
      do j=1,nz  
        a%ja(j)=b%ja(j)
        a%val(j)=b%val(j)
      end do
      !$omp end parallel do
    end if
    call a%set_host()

  class default
    call b%cp_to_coo(tmp,info)
    if (info == psb_success_) call a%mv_from_coo(tmp,info)
  end select
end subroutine psb_c_cp_csr_from_fmt

subroutine  psb_c_csr_clean_zeros(a, info)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_csr_clean_zeros
  implicit none
  class(psb_c_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_ipk_) :: i, j, k, nr
  integer(psb_ipk_), allocatable :: ilrp(:)

  info = 0
  call a%sync()
  nr   = a%get_nrows()
  ilrp = a%irp
  a%irp(1) = 1
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
      if (a%val(k) /= czero) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
      end if
    end do
    a%irp(i+1) = j
  end do
  call a%trim()
  call a%set_host()
end subroutine psb_c_csr_clean_zeros

#if defined(OPENMP)
subroutine psb_ccsrspspmm(a,b,c,info)
  use psb_c_mat_mod
  use psb_serial_mod, psb_protect_name => psb_ccsrspspmm

  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a,b
  type(psb_c_csr_sparse_mat), intent(out)  :: c
  integer(psb_ipk_), intent(out)                     :: info
  integer(psb_ipk_) :: ma,na,mb,nb, nzc, nza, nzb
  character(len=20) :: name
  integer(psb_ipk_) :: err_act
  name='psb_csrspspmm'
  call psb_erractionsave(err_act)
  info = psb_success_

  if (a%is_dev())   call a%sync()
  if (b%is_dev())   call b%sync()

  ma = a%get_nrows()
  na = a%get_ncols()
  mb = b%get_nrows()
  nb = b%get_ncols()


  if ( mb /= na ) then
    write(psb_err_unit,*) 'Mismatch in SPSPMM: ',ma,na,mb,nb
    info = psb_err_invalid_matrix_sizes_
    call psb_errpush(info,name)
    goto 9999
  endif

  select case(spspmm_impl)
  case (spspmm_serial)
    ! Estimate number of nonzeros on output.
    nza = a%get_nzeros()
    nzb = b%get_nzeros()
    nzc = 2*(nza+nzb)
    call c%allocate(ma,nb,nzc)

    call csr_spspmm(a,b,c,info)
  case (spspmm_omp_gustavson)
    call spmm_omp_gustavson(a,b,c,info)
  case (spspmm_omp_gustavson_1d)
    call spmm_omp_gustavson_1d(a,b,c,info)
  case (spspmm_serial_rb_tree)
    call spmm_serial_rb_tree(a,b,c,info)
  case (spspmm_omp_rb_tree)
    call spmm_omp_rb_tree(a,b,c,info)
  case (spspmm_omp_two_pass)
    call spmm_omp_two_pass(a,b,c,info)
  case default
    write(psb_err_unit,*) 'Unknown spspmm implementation'
    ! push error
    goto 9999
  end select

  call c%set_asb()
  call c%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_spspmm(a,b,c,info)
    implicit none
    type(psb_c_csr_sparse_mat), intent(in)  :: a,b
    type(psb_c_csr_sparse_mat), intent(inout) :: c
    integer(psb_ipk_), intent(out)          :: info
    integer(psb_ipk_)              :: ma,na,mb,nb
    integer(psb_ipk_), allocatable :: irow(:), idxs(:)
    complex(psb_spk_), allocatable    :: row(:)
    integer(psb_ipk_)              :: i,j,k,irw,icl,icf, iret, &
         & nzc,nnzre, isz, ipb, irwsz, nrc, nze
    complex(psb_spk_)                 :: cfb


    info = psb_success_
    ma = a%get_nrows()
    na = a%get_ncols()
    mb = b%get_nrows()
    nb = b%get_ncols()

    nze = min(size(c%val),size(c%ja))
    isz = max(ma,na,mb,nb)
    call psb_realloc(isz,row,info)
    if (info == 0) call psb_realloc(isz,idxs,info)
    if (info == 0) call psb_realloc(isz,irow,info)
    if (info /= 0) return
    row  = dzero
    irow = 0
    nzc  = 1
    do j = 1,ma
      c%irp(j) = nzc
      nrc = 0
      do k = a%irp(j), a%irp(j+1)-1
        irw = a%ja(k)
        cfb = a%val(k)
        irwsz = b%irp(irw+1)-b%irp(irw)
        do i = b%irp(irw),b%irp(irw+1)-1
          icl = b%ja(i)
          if (irow(icl)<j) then
            nrc = nrc + 1
            idxs(nrc) = icl
            irow(icl) = j
          end if
          row(icl)  = row(icl)  + cfb*b%val(i)
        end do
      end do
      if (nrc > 0 ) then
        if ((nzc+nrc)>nze) then
          nze = max(ma*((nzc+j-1)/j),nzc+2*nrc)
          call psb_realloc(nze,c%val,info)
          if (info == 0) call psb_realloc(nze,c%ja,info)
          if (info /= 0) return
        end if

        call psb_qsort(idxs(1:nrc))
        do i=1, nrc
          irw        = idxs(i)
          c%ja(nzc)  = irw
          c%val(nzc) = row(irw)
          row(irw)   = dzero
          nzc        = nzc + 1
        end do
      end if
    end do

    c%irp(ma+1) = nzc
  end subroutine csr_spspmm

  ! gustavson's algorithm using perfect hashing 
  ! and OpenMP parallelisation
  subroutine spmm_omp_gustavson(a,b,c,info)
    use omp_lib

    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    complex(psb_spk_), allocatable :: vals(:), acc(:)
    integer(psb_ipk_)   :: ma, nb
    integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
    integer(psb_ipk_)   :: irw, jj, j, k, nnz, rwnz, thread_upperbound, start_idx, end_idx

    ma = a%get_nrows()
    nb = b%get_ncols()

    call c%allocate(ma, nb)
    c%irp(1) = 1

    ! dense accumulator
    ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
    call psb_realloc(nb, acc, info)

    allocate(offsets(omp_get_max_threads()))
    !$omp parallel private(vals,col_inds,nnz,rwnz,thread_upperbound,acc,start_idx,end_idx) &
    !$omp shared(a,b,c,offsets) 
    thread_upperbound = 0
    start_idx = 0
    !$omp do schedule(static) private(irw, jj, j)
    do irw = 1, ma
      if (start_idx == 0) then
        start_idx = irw
      end if
      end_idx = irw
      do jj = a%irp(irw), a%irp(irw + 1) - 1
        j = a%ja(jj)
        thread_upperbound = thread_upperbound + b%irp(j+1) - b%irp(j)
      end do
    end do
    !$omp end do

    call psb_realloc(thread_upperbound, vals, info)
    call psb_realloc(thread_upperbound, col_inds, info)

    ! possible bottleneck
    acc = 0

    nnz = 0
    !$omp do schedule(static) private(irw, jj, j, k)
    do irw = 1, ma
      rwnz = 0
      do jj = a%irp(irw), a%irp(irw + 1) - 1
        j = a%ja(jj)
        do k = b%irp(j), b%irp(j + 1) - 1
          if (acc(b%ja(k)) == 0) then
            nnz = nnz + 1
            rwnz = rwnz + 1
            col_inds(nnz) =  b%ja(k)
          end if
          acc(b%ja(k)) = acc(b%ja(k)) + a%val(jj) * b%val(k) 
        end do
      end do
      call psb_qsort(col_inds(nnz - rwnz + 1:nnz))

      do k = nnz - rwnz + 1, nnz
        vals(k) = acc(col_inds(k))
        acc(col_inds(k)) = 0
      end do
      c%irp(irw + 1) = rwnz
    end do
    !$omp end do

    offsets(omp_get_thread_num() + 1) = nnz
    !$omp barrier

    ! possible bottleneck
    !$omp single
    do k = 1, omp_get_num_threads() - 1
      offsets(k + 1) = offsets(k + 1) + offsets(k)
    end do
    !$omp end single

    !$omp barrier

    if (omp_get_thread_num() /= 0) then
      c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
    end if

    do irw = start_idx, end_idx - 1
      c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
    end do

    !$omp barrier

    !$omp single
    c%irp(ma + 1) = c%irp(ma + 1) + c%irp(ma)
    call psb_realloc(c%irp(ma + 1), c%val, info)        
    call psb_realloc(c%irp(ma + 1), c%ja, info)
    !$omp end single

    c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
    c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
    !$omp end parallel
  end subroutine spmm_omp_gustavson

  subroutine spmm_omp_gustavson_1d(a,b,c,info)
    use omp_lib

    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    complex(psb_spk_), allocatable :: vals(:), acc(:)
    integer(psb_ipk_)   :: ma, nb
    integer(psb_ipk_), allocatable :: col_inds(:), offsets(:)
    integer(psb_ipk_)   :: irw, jj, j, k, nnz, rwnz, thread_upperbound, &
         start_idx, end_idx , blk, blk_size, rwstart,&
         rwblk, rwblkrem, nblks

    ma = a%get_nrows()
    nb = b%get_ncols()

    call c%allocate(ma, nb)
    c%irp(1) = 1

    ! dense accumulator
    ! https://sc18.supercomputing.org/proceedings/workshops/workshop_files/ws_lasalss115s2-file1.pdf
    call psb_realloc(nb, acc, info)
    allocate(offsets(omp_get_max_threads()))

    nblks = 4 * omp_get_max_threads()
    rwblk = (ma / nblks)
    rwblkrem = modulo(ma, nblks)
    !$omp parallel private(vals,col_inds,nnz,thread_upperbound,acc,start_idx,end_idx) shared(a,b,c,offsets)
    thread_upperbound = 0
    start_idx = 0
    !$omp do schedule(static) private(irw, jj, j)
    do irw = 1, ma
      do jj = a%irp(irw), a%irp(irw + 1) - 1
        j = a%ja(jj)
        thread_upperbound = thread_upperbound + b%irp(j+1) - b%irp(j)
      end do
    end do
    !$omp end do

    call psb_realloc(thread_upperbound, vals, info)
    call psb_realloc(thread_upperbound, col_inds, info)

    ! possible bottleneck
    acc = 0

    nnz = 0
    !$omp do schedule(static) private(irw,jj,j,k,rwnz,blk,blk_size,rwstart)
    do blk = 0, nblks - 1
      if (blk < rwblkrem) then
        blk_size = rwblk + 1
        rwstart = blk * rwblk + blk + 1
      else
        blk_size = rwblk
        rwstart = blk * rwblk &
             + rwblkrem + 1
      end if
      do irw = rwstart, rwstart + blk_size - 1
        if (start_idx == 0) then
          start_idx = irw
        end if
        end_idx = irw
        rwnz = 0
        do jj = a%irp(irw), a%irp(irw + 1) - 1
          j = a%ja(jj)
          do k = b%irp(j), b%irp(j + 1) - 1
            if (acc(b%ja(k)) == 0) then
              nnz = nnz + 1
              rwnz = rwnz + 1
              col_inds(nnz) =  b%ja(k)
            end if
            acc(b%ja(k)) = acc(b%ja(k)) + a%val(jj) * b%val(k) 
          end do
        end do
        call psb_qsort(col_inds(nnz - rwnz + 1:nnz))

        do k = nnz - rwnz + 1, nnz
          vals(k) = acc(col_inds(k))
          acc(col_inds(k)) = 0
        end do
        c%irp(irw + 1) = rwnz
      end do
    end do
    !$omp end do

    offsets(omp_get_thread_num() + 1) = nnz
    !$omp barrier

    ! possible bottleneck
    !$omp single
    do k = 1, omp_get_num_threads() - 1
      offsets(k + 1) = offsets(k + 1) + offsets(k)
    end do
    !$omp end single

    !$omp barrier

    if (omp_get_thread_num() /= 0) then
      c%irp(start_idx) = offsets(omp_get_thread_num()) + 1
    end if

    do irw = start_idx, end_idx - 1
      c%irp(irw + 1) = c%irp(irw + 1) + c%irp(irw)
    end do

    !$omp barrier

    !$omp single
    c%irp(ma + 1) = c%irp(ma + 1) + c%irp(ma)
    call psb_realloc(c%irp(ma + 1), c%val, info)        
    call psb_realloc(c%irp(ma + 1), c%ja, info)
    !$omp end single

    c%val(c%irp(start_idx):c%irp(end_idx + 1) - 1) = vals(1:nnz)
    c%ja(c%irp(start_idx):c%irp(end_idx + 1) - 1) = col_inds(1:nnz)
    !$omp end parallel
  end subroutine spmm_omp_gustavson_1d

  subroutine spmm_serial_rb_tree(a,b,c,info)
    use psb_rb_idx_tree_mod
    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    integer(psb_ipk_)   :: a_m, b_n
    integer(psb_ipk_)   :: row, col
    type(psb_c_rb_idx_tree), allocatable :: row_accs(:)

    a_m = a%get_nrows()
    b_n = b%get_ncols()

    allocate(row_accs(a_m))
    call c%allocate(a_m, b_n)

    do row = 1, a_m
      row_accs(row)%nnz = 0
      nullify(row_accs(row)%root)
      do col = a%irp(row), a%irp(row + 1) - 1
        call psb_rb_idx_tree_scalar_sparse_row_mul(row_accs(row), a%val(col), b, a%ja(col))
      end do
    end do
    call psb_rb_idx_tree_merge(row_accs, c)

    deallocate(row_accs)

    info = 0
  end subroutine spmm_serial_rb_tree

  subroutine spmm_omp_rb_tree(a,b,c,info)
    use omp_lib
    use psb_rb_idx_tree_mod
    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    integer(psb_ipk_)   :: a_m, b_n
    integer(psb_ipk_)   :: row, col
    type(psb_c_rb_idx_tree), allocatable :: row_accs(:)
    real(8) :: tic, toc

    a_m = a%get_nrows()
    b_n = b%get_ncols()

    call c%allocate(a_m, b_n)

    allocate(row_accs(a_m))
    call c%allocate(a_m, b_n)

    !$omp parallel do schedule(static)
    do row = 1, a_m
      row_accs(row)%nnz = 0
      nullify(row_accs(row)%root)
      do col = a%irp(row), a%irp(row + 1) - 1
        call psb_rb_idx_tree_scalar_sparse_row_mul(row_accs(row), a%val(col), b, a%ja(col))
      end do
    end do
    !$omp end parallel do

    call psb_rb_idx_tree_merge(row_accs, c)

    deallocate(row_accs)
    info = 0
  end subroutine spmm_omp_rb_tree

  subroutine compute_indices(a, b, c, info)
    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    integer :: full_mat_bound
    integer :: row, col, i, j, k, nnz

    full_mat_bound = 0
    !omp parallel do schedule(static) reduction(+:full_mat_bound)
    do row = 1, a%get_nrows()
      do col = a%irp(row), a%irp(row + 1) - 1
        j = a%ja(col)
        full_mat_bound = full_mat_bound + b%irp(j+1) - b%irp(j)
      end do
    end do
    !omp end parallel do

    call psb_realloc(a%get_nrows() + 1, c%irp, info)
    call psb_realloc(full_mat_bound, c%ja, info)
    c%ja = 0
    c%irp(1) = 1

    nnz = 0

    do row = 1, a%get_nrows()
      do col = a%irp(row), a%irp(row + 1) - 1
        do i = b%irp(a%ja(col)), b%irp(a%ja(col) + 1) - 1
          k = 0
          do while(c%ja(c%irp(row) + k) /= 0 .and. c%ja(c%irp(row) + k) /= b%ja(i))
            k = k + 1
          end do
          if (c%ja(c%irp(row) + k) == 0) then
            c%ja(c%irp(row)+k) = b%ja(i)
            nnz = nnz + 1
          end if
        end do
      end do
      c%irp(row + 1) = nnz + 1
      call psb_qsort(c%ja(c%irp(row):c%irp(row + 1)-1))
    end do


    call psb_realloc(nnz, c%ja, info)
    call psb_realloc(nnz, c%val, info)

    c%val = 0
  end subroutine compute_indices

  subroutine direct_scalar_sparse_row_mul(out_mat, out_row_num, scalar, mat, trgt_row_num)
    type(psb_c_csr_sparse_mat), intent(inout) :: out_mat
    integer(psb_ipk_), intent(in) :: out_row_num
    complex(psb_spk_), intent(in) :: scalar
    type(psb_c_csr_sparse_mat), intent(in) :: mat
    integer(psb_ipk_), intent(in) :: trgt_row_num

    integer(psb_ipk_) :: i, k, row_start, row_end

    row_start = out_mat%irp(out_row_num)
    row_end = out_mat%irp(out_row_num + 1) - 1

    do i = mat%irp(trgt_row_num), mat%irp(trgt_row_num + 1) - 1
      do k = out_mat%irp(out_row_num), out_mat%irp(out_row_num + 1) - 1
        if (out_mat%ja(k) == mat%ja(i)) then
          out_mat%val(k) = out_mat%val(k) + scalar * mat%val(i)
          exit
        end if
      end do
    end do

  end subroutine direct_scalar_sparse_row_mul

  subroutine spmm_omp_two_pass(a,b,c,info)
    use omp_lib

    implicit none
    type(psb_c_csr_sparse_mat), intent(in) :: a,b
    type(psb_c_csr_sparse_mat), intent(out):: c
    integer(psb_ipk_), intent(out) :: info

    integer(psb_ipk_)   :: a_m, b_n, row, col

    a_m = a%get_nrows()
    b_n = b%get_ncols()

    call c%allocate(a_m, b_n)

    call compute_indices(a, b, c, info)

    !$omp parallel do schedule(static)
    do row = 1, a_m
      do col = a%irp(row), a%irp(row + 1) - 1
        call direct_scalar_sparse_row_mul(c, row, a%val(col), b, a%ja(col))
      end do
    end do
    !$omp end parallel do
  end subroutine spmm_omp_two_pass

end subroutine psb_ccsrspspmm

#else

subroutine psb_ccsrspspmm(a,b,c,info)
  use psb_c_mat_mod
  use psb_serial_mod, psb_protect_name => psb_ccsrspspmm

  implicit none

  class(psb_c_csr_sparse_mat), intent(in) :: a,b
  type(psb_c_csr_sparse_mat), intent(out)  :: c
  integer(psb_ipk_), intent(out)                     :: info
  integer(psb_ipk_) :: ma,na,mb,nb, nzc, nza, nzb
  character(len=20) :: name
  integer(psb_ipk_) :: err_act
  name='psb_csrspspmm'
  call psb_erractionsave(err_act)
  info = psb_success_

  if (a%is_dev())   call a%sync()
  if (b%is_dev())   call b%sync()

  ma = a%get_nrows()
  na = a%get_ncols()
  mb = b%get_nrows()
  nb = b%get_ncols()


  if ( mb /= na ) then
    write(psb_err_unit,*) 'Mismatch in SPSPMM: ',ma,na,mb,nb
    info = psb_err_invalid_matrix_sizes_
    call psb_errpush(info,name)
    goto 9999
  endif

  ! Estimate number of nonzeros on output.
  nza = a%get_nzeros()
  nzb = b%get_nzeros()
  nzc = 2*(nza+nzb)
  call c%allocate(ma,nb,nzc)

  call csr_spspmm(a,b,c,info)

  call c%set_asb()
  call c%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_spspmm(a,b,c,info)
    implicit none
    type(psb_c_csr_sparse_mat), intent(in)  :: a,b
    type(psb_c_csr_sparse_mat), intent(inout) :: c
    integer(psb_ipk_), intent(out)          :: info
    integer(psb_ipk_)              :: ma,na,mb,nb
    integer(psb_ipk_), allocatable :: irow(:), idxs(:)
    complex(psb_spk_), allocatable    :: row(:)
    integer(psb_ipk_)              :: i,j,k,irw,icl,icf, iret, &
         & nzc,nnzre, isz, ipb, irwsz, nrc, nze
    complex(psb_spk_)                 :: cfb


    info = psb_success_
    ma = a%get_nrows()
    na = a%get_ncols()
    mb = b%get_nrows()
    nb = b%get_ncols()

    nze = min(size(c%val),size(c%ja))
    isz = max(ma,na,mb,nb)
    call psb_realloc(isz,row,info)
    if (info == 0) call psb_realloc(isz,idxs,info)
    if (info == 0) call psb_realloc(isz,irow,info)
    if (info /= 0) return
    row  = dzero
    irow = 0
    nzc  = 1
    do j = 1,ma
      c%irp(j) = nzc
      nrc = 0
      do k = a%irp(j), a%irp(j+1)-1
        irw = a%ja(k)
        cfb = a%val(k)
        irwsz = b%irp(irw+1)-b%irp(irw)
        do i = b%irp(irw),b%irp(irw+1)-1
          icl = b%ja(i)
          if (irow(icl)<j) then
            nrc = nrc + 1
            idxs(nrc) = icl
            irow(icl) = j
          end if
          row(icl)  = row(icl)  + cfb*b%val(i)
        end do
      end do
      if (nrc > 0 ) then
        if ((nzc+nrc)>nze) then
          nze = max(ma*((nzc+j-1)/j),nzc+2*nrc)
          call psb_realloc(nze,c%val,info)
          if (info == 0) call psb_realloc(nze,c%ja,info)
          if (info /= 0) return
        end if

        call psb_qsort(idxs(1:nrc))
        do i=1, nrc
          irw        = idxs(i)
          c%ja(nzc)  = irw
          c%val(nzc) = row(irw)
          row(irw)   = dzero
          nzc        = nzc + 1
        end do
      end if
    end do

    c%irp(ma+1) = nzc


  end subroutine csr_spspmm

end subroutine psb_ccsrspspmm
#endif

subroutine psb_c_ecsr_mold(a,b,info)
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_mold
  use psb_error_mod
  implicit none
  class(psb_c_ecsr_sparse_mat), intent(in)                 :: a
  class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
  integer(psb_ipk_), intent(out)                    :: info
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='ecsr_mold'
  logical, parameter :: debug=.false.

  call psb_get_erraction(err_act)

  info = 0
  if (allocated(b)) then
    call b%free()
    deallocate(b,stat=info)
  end if
  if (info == 0) allocate(psb_c_ecsr_sparse_mat :: b, stat=info)

  if (info /= 0) then
    info = psb_err_alloc_dealloc_
    call psb_errpush(info, name)
    goto 9999
  end if
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_c_ecsr_mold

subroutine psb_c_ecsr_csmv(alpha,a,x,beta,y,info,trans)
  use psb_error_mod
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_csmv
  implicit none
  class(psb_c_ecsr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(in)          :: alpha, beta, x(:)
  complex(psb_spk_), intent(inout)       :: y(:)
  integer(psb_ipk_), intent(out)                :: info
  character, optional, intent(in)     :: trans

  character :: trans_
  integer(psb_ipk_) :: m, n
  logical   :: tra, ctra
  integer(psb_ipk_) :: err_act
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='c_csr_csmv'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  if (a%is_dev())   call a%sync()

  if (present(trans)) then
    trans_ = trans
  else
    trans_ = 'N'
  end if

  if (.not.a%is_asb()) then
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  endif


  tra  = (psb_toupper(trans_) == 'T')
  ctra = (psb_toupper(trans_) == 'C')

  if (tra.or.ctra) then
    m = a%get_ncols()
    n = a%get_nrows()
  else
    n = a%get_ncols()
    m = a%get_nrows()
  end if

  if (size(x,1)<n) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 3; ierr(2) = size(x); ierr(3) = n;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if (size(y,1)<m) then
    info = psb_err_input_asize_small_i_
    ierr(1) = 5; ierr(2) = size(y); ierr(3) =m;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if

  if ((beta == cone).and.&
       & .not.(tra.or.ctra.or.(a%is_triangle()).or.(a%is_unit()))) then    
    call psb_c_ecsr_csmv_inner(m,n,alpha,a%irp,a%ja,a%val,&
         & a%nnerws,a%nerwp,x,y)
  else
    call a%psb_c_csr_sparse_mat%csmv(alpha,x,beta,y,info,trans)
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains
  subroutine psb_c_ecsr_csmv_inner(m,n,alpha,irp,ja,val,&
       & nnerws,nerwp,x,y)
    integer(psb_ipk_), intent(in)    :: m,n,nnerws,irp(*),ja(*),nerwp(*)
    complex(psb_spk_), intent(in)      :: alpha, x(*),val(*)
    complex(psb_spk_), intent(inout)   :: y(*)


    integer(psb_ipk_) :: i,j,ir
    complex(psb_spk_) :: acc

    if (alpha == czero) return

    if (alpha == cone) then
      !$omp parallel do private(ir,i,j,acc)
      do ir=1,nnerws
        i = nerwp(ir)
        acc  = czero
        do j=irp(i), irp(i+1)-1
          acc  = acc + val(j) * x(ja(j))
        enddo
        y(i) = y(i) + acc
      end do
      
    else if (alpha == -cone) then
      
      !$omp parallel do private(ir,i,j,acc)
      do ir=1,nnerws
        i = nerwp(ir)
        acc  = czero
        do j=irp(i), irp(i+1)-1
          acc  = acc + val(j) * x(ja(j))
        enddo
        y(i) = y(i) -acc
      end do
      
    else
      
      !$omp parallel do private(ir,i,j,acc)
      do ir=1,nnerws
        i = nerwp(ir)
        acc  = czero
        do j=irp(i), irp(i+1)-1
          acc  = acc + val(j) * x(ja(j))
        enddo
        y(i) = y(i) + alpha*acc
      end do
      
    end if

  end subroutine psb_c_ecsr_csmv_inner

end subroutine psb_c_ecsr_csmv


subroutine psb_c_ecsr_cmp_nerwp(a,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_ecsr_cmp_nerwp
  implicit none

  class(psb_c_ecsr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_), intent(out)               :: info

  integer(psb_ipk_) :: nnerws, i, nr, nzr
  info = psb_success_
  nr = a%get_nrows()
  call psb_realloc(nr,a%nerwp,info)
  nnerws = 0 
  do i=1, nr
    nzr = a%irp(i+1)-a%irp(i)
    if (nzr>0) then 
      nnerws = nnerws + 1
      a%nerwp(nnerws) = i
    end if
  end do
  call psb_realloc(nnerws,a%nerwp,info)
  a%nnerws = nnerws
end subroutine psb_c_ecsr_cmp_nerwp

subroutine psb_c_cp_ecsr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_coo
  implicit none

  class(psb_c_ecsr_sparse_mat), intent(inout) :: a
  class(psb_c_coo_sparse_mat), intent(in)    :: b
  integer(psb_ipk_), intent(out)               :: info
  
  info = psb_success_
  call a%psb_c_csr_sparse_mat%cp_from_coo(b,info)
  if (info == psb_success_) call a%cmp_nerwp(info)

end subroutine psb_c_cp_ecsr_from_coo

subroutine psb_c_mv_ecsr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_coo
  implicit none

  class(psb_c_ecsr_sparse_mat), intent(inout) :: a
  class(psb_c_coo_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                        :: info

  
  info = psb_success_
  call a%psb_c_csr_sparse_mat%mv_from_coo(b,info)
  if (info == psb_success_) call a%cmp_nerwp(info)

end subroutine psb_c_mv_ecsr_from_coo

subroutine psb_c_mv_ecsr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_mv_ecsr_from_fmt
  implicit none

  class(psb_c_ecsr_sparse_mat), intent(inout)  :: a
  class(psb_c_base_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                         :: info

  
  info = psb_success_
  call a%psb_c_csr_sparse_mat%mv_from_fmt(b,info)
  if (info == psb_success_) call a%cmp_nerwp(info)

end subroutine psb_c_mv_ecsr_from_fmt

subroutine psb_c_cp_ecsr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_c_cp_ecsr_from_fmt
  implicit none

  class(psb_c_ecsr_sparse_mat), intent(inout) :: a
  class(psb_c_base_sparse_mat), intent(in)   :: b
  integer(psb_ipk_), intent(out)                        :: info

  
  info = psb_success_
  call a%psb_c_csr_sparse_mat%cp_from_fmt(b,info)
  if (info == psb_success_) call a%cmp_nerwp(info)

end subroutine psb_c_cp_ecsr_from_fmt

!
!
!  lc  version
!
!
subroutine psb_lc_csr_get_diag(a,d,info)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_get_diag
  implicit none
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)     :: d(:)
  integer(psb_ipk_), intent(out)            :: info

  integer(psb_lpk_) :: mnm, i, j, k
  integer(psb_ipk_) :: err_act, ierr(5)
  character(len=20)  :: name='get_diag'
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  mnm = min(a%get_nrows(),a%get_ncols())
  if (size(d) < mnm) then
    info=psb_err_input_asize_invalid_i_
    ierr(1) = 2; ierr(2) = size(d);
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  end if


  if (a%is_unit()) then
    d(1:mnm) = cone
  else
    do i=1, mnm
      d(i) = czero
      do k=a%irp(i),a%irp(i+1)-1
        j=a%ja(k)
        if ((j == i) .and.(j <= mnm )) then
          d(i) = a%val(k)
        endif
      enddo
    end do
  end if
  do i=mnm+1,size(d)
    d(i) = czero
  end do

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_lc_csr_get_diag


subroutine psb_lc_csr_scal(d,a,info,side)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_scal
  use psb_string_mod
  implicit none
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: d(:)
  integer(psb_ipk_), intent(out)            :: info
  character, intent(in), optional :: side

  integer(psb_lpk_) :: mnm, i, j, m
  integer(psb_ipk_) :: err_act, ierr(5)
  character(len=20)  :: name='scal'
  character :: side_
  logical   :: left
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  if (a%is_unit()) then
    call a%make_nonunit()
  end if

  side_ = 'L'
  if (present(side)) then
    side_ = psb_toupper(side)
  end if

  left = (side_ == 'L')

  if (left) then
    m = a%get_nrows()
    if (size(d) < m) then
      info=psb_err_input_asize_invalid_i_
      ierr(1) = 2; ierr(2) = size(d);
      call psb_errpush(info,name,i_err=ierr)
      goto 9999
    end if

    do i=1, m
      do j = a%irp(i), a%irp(i+1) -1
        a%val(j) = a%val(j) * d(i)
      end do
    enddo
  else
    m = a%get_ncols()
    if (size(d) < m) then
      info=psb_err_input_asize_invalid_i_
      ierr(1) = 2; ierr(2) = size(d);
      call psb_errpush(info,name,i_err=ierr)
      goto 9999
    end if

    do i=1,a%get_nzeros()
      j        = a%ja(i)
      a%val(i) = a%val(i) * d(j)
    enddo
  end if

  call a%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_lc_csr_scal


subroutine psb_lc_csr_scals(d,a,info)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_scals
  implicit none
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: d
  integer(psb_ipk_), intent(out)            :: info

  integer(psb_lpk_) :: mnm, i, j, m
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='scal'
  logical, parameter :: debug=.false.

  info  = psb_success_
  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  if (a%is_unit()) then
    call a%make_nonunit()
  end if

  do i=1,a%get_nzeros()
    a%val(i) = a%val(i) * d
  enddo
  call a%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_lc_csr_scals


function psb_lc_csr_maxval(a) result(res)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_maxval
  implicit none
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  real(psb_spk_)         :: res

  integer(psb_lpk_) :: nnz
  integer(psb_ipk_) :: info
  character(len=20)  :: name='lc_csr_maxval'
  logical, parameter :: debug=.false.

  if (a%is_dev())   call a%sync()

  res = szero
  nnz = a%get_nzeros()
  if (allocated(a%val)) then
    nnz = min(nnz,size(a%val))
    res = maxval(abs(a%val(1:nnz)))
  end if
end function psb_lc_csr_maxval

function psb_lc_csr_csnmi(a) result(res)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_csnmi
  implicit none
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  real(psb_spk_)         :: res

  integer(psb_lpk_) :: i,j,k,m,n, nr, ir, jc, nc
  real(psb_spk_) :: acc
  logical   :: tra
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='lc_csnmi'
  logical, parameter :: debug=.false.


  res = szero
  if (a%is_dev())   call a%sync()

  do i = 1, a%get_nrows()
    acc = szero
    do j=a%irp(i),a%irp(i+1)-1
      acc = acc + abs(a%val(j))
    end do
    res = max(res,acc)
  end do

end function psb_lc_csr_csnmi

subroutine psb_lc_csr_rowsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_rowsum
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)             :: d(:)

  integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc
  complex(psb_spk_) :: acc
  complex(psb_spk_), allocatable :: vt(:)
  logical   :: tra
  integer(psb_ipk_) :: err_act, info
  integer(psb_epk_) :: err(5)
  character(len=20)  :: name='rowsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  if (size(d) < m) then
    info=psb_err_input_asize_small_i_
    err(1) = 1; err(2) = size(d); err(3) = m
    call psb_errpush(info,name,e_err=err)
    goto 9999
  end if

  do i = 1, a%get_nrows()
    d(i) = czero
    do j=a%irp(i),a%irp(i+1)-1
      d(i) = d(i) + (a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, m
      d(i) = d(i) + cone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_rowsum

subroutine psb_lc_csr_arwsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_arwsum
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  real(psb_spk_), intent(out)              :: d(:)

  integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc
  real(psb_spk_) :: acc
  real(psb_spk_), allocatable :: vt(:)
  logical   :: tra
  integer(psb_ipk_) :: err_act, info
  integer(psb_epk_) :: err(5)
  character(len=20)  :: name='rowsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  if (size(d) < m) then
    info=psb_err_input_asize_small_i_
    err(1) = 1; err(2) = size(d); err(3) = m
    call psb_errpush(info,name,e_err=err)
    goto 9999
  end if


  do i = 1, a%get_nrows()
    d(i) = szero
    do j=a%irp(i),a%irp(i+1)-1
      d(i) = d(i) + abs(a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, m
      d(i) = d(i) + sone
    end do
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_arwsum

subroutine psb_lc_csr_colsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_colsum
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  complex(psb_spk_), intent(out)              :: d(:)

  integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc
  complex(psb_spk_) :: acc
  complex(psb_spk_), allocatable :: vt(:)
  logical   :: tra
  integer(psb_ipk_) :: err_act, info
  integer(psb_epk_) :: err(5)
  character(len=20)  :: name='colsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  n = a%get_ncols()
  if (size(d) < n) then
    info=psb_err_input_asize_small_i_
    err(1) = 1; err(2) = size(d); err(3) = n
    call psb_errpush(info,name,e_err=err)
    goto 9999
  end if

  d   = czero

  do i=1, m
    do j=a%irp(i),a%irp(i+1)-1
      k = a%ja(j)
      d(k) = d(k) + (a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, n
      d(i) = d(i) + cone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_colsum

subroutine psb_lc_csr_aclsum(d,a)
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_aclsum
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  real(psb_spk_), intent(out)              :: d(:)

  integer(psb_lpk_) :: i,j,k,m,n, nnz, ir, jc, nc
  real(psb_spk_) :: acc
  real(psb_spk_), allocatable :: vt(:)
  logical   :: tra
  integer(psb_ipk_) :: err_act, info
  integer(psb_epk_) :: err(5)
  character(len=20)  :: name='aclsum'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()

  m = a%get_nrows()
  n = a%get_ncols()
  if (size(d) < n) then
    info=psb_err_input_asize_small_i_
    err(1) = 1; err(2) = size(d); err(3) = n
    call psb_errpush(info,name,e_err=err)
    goto 9999
  end if

  d   = szero

  do i=1, m
    do j=a%irp(i),a%irp(i+1)-1
      k = a%ja(j)
      d(k) = d(k) + abs(a%val(j))
    end do
  end do

  if (a%is_unit()) then
    do i=1, n
      d(i) = d(i) + sone
    end do
  end if

  return
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_aclsum


! == ===================================
!
!
!
! Data management
!
!
!
!
!
! == ===================================


subroutine  psb_lc_csr_reallocate_nz(nz,a)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_reallocate_nz
  implicit none
  integer(psb_lpk_), intent(in) :: nz
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_) :: err_act, info
  character(len=20)  :: name='lc_csr_reallocate_nz'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)

  call psb_realloc(max(nz,ione),a%ja,info)
  if (info == psb_success_) call psb_realloc(max(nz,ione),a%val,info)
  if (info == psb_success_) call psb_realloc(a%get_nrows()+1,a%irp,info)
  if (info /= psb_success_) then
    call psb_errpush(psb_err_alloc_dealloc_,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_reallocate_nz

subroutine psb_lc_csr_mold(a,b,info)
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_mold
  use psb_error_mod
  implicit none
  class(psb_lc_csr_sparse_mat), intent(in)                  :: a
  class(psb_lc_base_sparse_mat), intent(inout), allocatable :: b
  integer(psb_ipk_), intent(out)                    :: info
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='csr_mold'
  logical, parameter :: debug=.false.

  call psb_get_erraction(err_act)

  info = 0
  if (allocated(b)) then
    call b%free()
    deallocate(b,stat=info)
  end if
  if (info == 0) allocate(psb_lc_csr_sparse_mat :: b, stat=info)

  if (info /= 0) then
    info = psb_err_alloc_dealloc_
    call psb_errpush(info, name)
    goto 9999
  end if
  return

9999 call psb_error_handler(err_act)
  return

end subroutine psb_lc_csr_mold

subroutine  psb_lc_csr_allocate_mnnz(m,n,a,nz)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_allocate_mnnz
  implicit none
  integer(psb_lpk_), intent(in) :: m,n
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  integer(psb_lpk_), intent(in), optional :: nz
  integer(psb_lpk_) :: nz_
  integer(psb_ipk_) :: err_act, info
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='allocate_mnz'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  if (m < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = ione; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif
  if (n < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = 2; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif
  if (present(nz)) then
    nz_ = max(nz,ione)
  else
    nz_ = max(7*m,7*n,ione)
  end if
  if (nz_ < 0) then
    info = psb_err_iarg_neg_
    ierr(1) = 3; ierr(2) = izero;
    call psb_errpush(info,name,i_err=ierr)
    goto 9999
  endif

  if (info == psb_success_) call psb_realloc(m+1,a%irp,info)
  if (info == psb_success_) call psb_realloc(nz_,a%ja,info)
  if (info == psb_success_) call psb_realloc(nz_,a%val,info)
  if (info == psb_success_) then
    a%irp=0
    call a%set_nrows(m)
    call a%set_ncols(n)
    call a%set_bld()
    call a%set_triangle(.false.)
    call a%set_unit(.false.)
    call a%set_dupl(psb_dupl_def_)
    call a%set_host()
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_allocate_mnnz


subroutine psb_lc_csr_csgetptn(imin,imax,a,nz,ia,ja,info,&
     & jmin,jmax,iren,append,nzin,rscale,cscale)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_csgetptn
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in) :: a
  integer(psb_lpk_), intent(in)                  :: imin,imax
  integer(psb_lpk_), intent(out)                 :: nz
  integer(psb_lpk_), allocatable, intent(inout)  :: ia(:), ja(:)
  integer(psb_ipk_),intent(out)                  :: info
  logical, intent(in), optional        :: append
  integer(psb_lpk_), intent(in), optional        :: iren(:)
  integer(psb_lpk_), intent(in), optional        :: jmin,jmax, nzin
  logical, intent(in), optional        :: rscale,cscale

  logical :: append_, rscale_, cscale_
  integer(psb_lpk_) :: nzin_, jmin_, jmax_, i
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='csget'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()
  info = psb_success_
  nz = 0

  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  endif
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  endif

  if ((imax<imin).or.(jmax_<jmin_)) return

  if (present(append)) then
    append_=append
  else
    append_=.false.
  endif
  if ((append_).and.(present(nzin))) then
    nzin_ = nzin
  else
    nzin_ = 0
  endif
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .false.
  endif
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .false.
  endif
  if ((rscale_.or.cscale_).and.(present(iren))) then
    info = psb_err_many_optional_arg_
    call psb_errpush(info,name,a_err='iren (rscale.or.cscale)')
    goto 9999
  end if

  call csr_getptn(imin,imax,jmin_,jmax_,a,nz,ia,ja,nzin_,append_,info,iren)

  if (rscale_) then
    do i=nzin_+1, nzin_+nz
      ia(i) = ia(i) - imin + 1
    end do
  end if
  if (cscale_) then
    do i=nzin_+1, nzin_+nz
      ja(i) = ja(i) - jmin_ + 1
    end do
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_getptn(imin,imax,jmin,jmax,a,nz,ia,ja,nzin,append,info,&
       & iren)

    use psb_const_mod
    use psb_error_mod
    use psb_realloc_mod
    use psb_sort_mod
    implicit none

    class(psb_lc_csr_sparse_mat), intent(in)    :: a
    integer(psb_lpk_) :: imin,imax,jmin,jmax
    integer(psb_lpk_), intent(inout)               :: nz
    integer(psb_lpk_), allocatable, intent(inout)  :: ia(:), ja(:)
    integer(psb_lpk_), intent(in)                  :: nzin
    logical, intent(in)                  :: append
    integer(psb_ipk_) :: info
    integer(psb_lpk_), optional                    :: iren(:)
    integer(psb_lpk_) :: nzin_, nza, idx,i,j,k, nzt, irw, lrw, icl,lcl,nrd,ncd
    integer(psb_ipk_) :: debug_level, debug_unit
    character(len=20) :: name='csr_getptn'

    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    nza = a%get_nzeros()
    irw = imin
    lrw = min(imax,a%get_nrows())
    icl = jmin
    lcl = min(jmax,a%get_ncols())
    if (irw<0) then
      info = psb_err_pivot_too_small_
      return
    end if

    if (append) then
      nzin_ = nzin
    else
      nzin_ = 0
    endif
    !
    ! This is a row-oriented routine, so the following is a
    ! good choice.
    !
    nzt = (a%irp(lrw+1)-a%irp(irw))
    nz = 0

    call psb_ensure_size(nzin_+nzt,ia,info)
    if (info == psb_success_) call psb_ensure_size(nzin_+nzt,ja,info)

    if (info /= psb_success_) return

    if (present(iren)) then
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            ia(nzin_)  = iren(i)
            ja(nzin_)  = iren(a%ja(j))
          end if
        enddo
      end do
    else
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            ia(nzin_)  = (i)
            ja(nzin_)  = (a%ja(j))
          end if
        enddo
      end do
    end if

  end subroutine csr_getptn

end subroutine psb_lc_csr_csgetptn


subroutine psb_lc_csr_csgetrow(imin,imax,a,nz,ia,ja,val,info,&
     & jmin,jmax,iren,append,nzin,rscale,cscale)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_csgetrow
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in) :: a
  integer(psb_lpk_), intent(in)                  :: imin,imax
  integer(psb_lpk_), intent(out)                 :: nz
  integer(psb_lpk_), allocatable, intent(inout)  :: ia(:), ja(:)
  complex(psb_spk_), allocatable,  intent(inout)    :: val(:)
  integer(psb_ipk_),intent(out)                  :: info
  logical, intent(in), optional        :: append
  integer(psb_lpk_), intent(in), optional        :: iren(:)
  integer(psb_lpk_), intent(in), optional        :: jmin,jmax, nzin
  logical, intent(in), optional        :: rscale,cscale

  logical :: append_, rscale_, cscale_
  integer(psb_lpk_) :: nzin_, jmin_, jmax_, i
  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='csget'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  if (a%is_dev())   call a%sync()
  info = psb_success_
  nz = 0

  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  endif
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  endif

  if ((imax<imin).or.(jmax_<jmin_)) return

  if (present(append)) then
    append_=append
  else
    append_=.false.
  endif
  if ((append_).and.(present(nzin))) then
    nzin_ = nzin
  else
    nzin_ = 0
  endif
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .false.
  endif
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .false.
  endif
  if ((rscale_.or.cscale_).and.(present(iren))) then
    info = psb_err_many_optional_arg_
    call psb_errpush(info,name,a_err='iren (rscale.or.cscale)')
    goto 9999
  end if

  call csr_getrow(imin,imax,jmin_,jmax_,a,nz,ia,ja,val,nzin_,append_,info,&
       & iren)

  if (rscale_) then
    do i=nzin_+1, nzin_+nz
      ia(i) = ia(i) - imin + 1
    end do
  end if
  if (cscale_) then
    do i=nzin_+1, nzin_+nz
      ja(i) = ja(i) - jmin_ + 1
    end do
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_getrow(imin,imax,jmin,jmax,a,nz,ia,ja,val,nzin,append,info,&
       & iren)

    use psb_const_mod
    use psb_error_mod
    use psb_realloc_mod
    use psb_sort_mod
    implicit none

    class(psb_lc_csr_sparse_mat), intent(in)    :: a
    integer(psb_lpk_) :: imin,imax,jmin,jmax
    integer(psb_lpk_), intent(inout)               :: nz
    integer(psb_lpk_), allocatable, intent(inout)  :: ia(:), ja(:)
    complex(psb_spk_), allocatable,  intent(inout)    :: val(:)
    integer(psb_lpk_), intent(in)                  :: nzin
    logical, intent(in)                  :: append
    integer(psb_ipk_) :: info
    integer(psb_lpk_), optional                    :: iren(:)
    integer(psb_lpk_) :: nzin_, nza, idx,i,j,k, nzt, irw, lrw, icl,lcl, nrd, ncd
    integer(psb_ipk_) :: debug_level, debug_unit
    character(len=20) :: name='coo_getrow'

    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    nza = a%get_nzeros()
    irw = imin
    lrw = min(imax,a%get_nrows())
    icl = jmin
    lcl = min(jmax,a%get_ncols())
    if (irw<0) then
      info = psb_err_pivot_too_small_
      return
    end if

    if (append) then
      nzin_ = nzin
    else
      nzin_ = 0
    endif

    !
    ! This is a row-oriented routine, so the following is a
    ! good choice.
    !
    nzt = (a%irp(lrw+1)-a%irp(irw))
    nz = 0

    call psb_ensure_size(nzin_+nzt,ia,info)
    if (info == psb_success_) call psb_ensure_size(nzin_+nzt,ja,info)
    if (info == psb_success_) call psb_ensure_size(nzin_+nzt,val,info)

    if (info /= psb_success_) return

    if (present(iren)) then
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            val(nzin_) = a%val(j)
            ia(nzin_)  = iren(i)
            ja(nzin_)  = iren(a%ja(j))
          end if
        enddo
      end do
    else
      do i=irw, lrw
        do j=a%irp(i), a%irp(i+1) - 1
          if ((jmin <= a%ja(j)).and.(a%ja(j)<=jmax)) then
            nzin_ = nzin_ + 1
            nz    = nz + 1
            val(nzin_) = a%val(j)
            ia(nzin_)  = (i)
            ja(nzin_)  = (a%ja(j))
          end if
        enddo
      end do
    end if

  end subroutine csr_getrow

end subroutine psb_lc_csr_csgetrow


!
! CSR implementation of tril/triu
!
subroutine psb_lc_csr_tril(a,l,info,&
     & diag,imin,imax,jmin,jmax,rscale,cscale,u)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_tril
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in) :: a
  class(psb_lc_coo_sparse_mat), intent(out) :: l
  integer(psb_ipk_),intent(out)            :: info
  integer(psb_lpk_), intent(in), optional  :: diag,imin,imax,jmin,jmax
  logical, intent(in), optional            :: rscale,cscale
  class(psb_lc_coo_sparse_mat), optional, intent(out) :: u

  integer(psb_ipk_) :: err_act
  integer(psb_lpk_) :: nzin, nzout, i, j, k
  integer(psb_lpk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='tril'
  logical :: rscale_, cscale_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (present(diag)) then
    diag_ = diag
  else
    diag_ = 0
  end if
  if (present(imin)) then
    imin_ = imin
  else
    imin_ = 1
  end if
  if (present(imax)) then
    imax_ = imax
  else
    imax_ = a%get_nrows()
  end if
  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  end if
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  end if
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .true.
  end if
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .true.
  end if

  if (rscale_) then
    mb = imax_ - imin_ +1
  else
    mb = imax_
  endif
  if (cscale_) then
    nb = jmax_ - jmin_ +1
  else
    nb = jmax_
  endif


  nz = a%get_nzeros()
  call l%allocate(mb,nb,nz)

  if (present(u)) then
    nzlin = l%get_nzeros() ! At this point it should be 0
    call u%allocate(mb,nb,nz)
    nzuin = u%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          j = ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<=diag_) then
              nzlin = nzlin + 1
              l%ia(nzlin)  = i
              l%ja(nzlin)  = ja(k)
              l%val(nzlin) = val(k)
            else
              nzuin = nzuin + 1
              u%ia(nzuin)  = i
              u%ja(nzuin)  = ja(k)
              u%val(nzuin) = val(k)
            end if
          end if
        end do
      end do
    end associate

    call l%set_nzeros(nzlin)
    call u%set_nzeros(nzuin)
    call u%fix(info)
    nzout = u%get_nzeros()
    if (rscale_) &
         & u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
    if (cscale_) &
         & u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1
    if ((diag_ >=-1).and.(imin_ == jmin_)) then
      call u%set_triangle(.true.)
      call u%set_lower(.false.)
    end if
  else
    nzin = l%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<=diag_) then
              nzin = nzin + 1
              l%ia(nzin)  = i
              l%ja(nzin)  = ja(k)
              l%val(nzin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call l%set_nzeros(nzin)
  end if
  call l%fix(info)
  nzout = l%get_nzeros()
  if (rscale_) &
       & l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
  if (cscale_) &
       & l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1

  if ((diag_ <= 0).and.(imin_ == jmin_)) then
    call l%set_triangle(.true.)
    call l%set_lower(.true.)
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_tril

subroutine psb_lc_csr_triu(a,u,info,&
     & diag,imin,imax,jmin,jmax,rscale,cscale,l)
  ! Output is always in  COO format
  use psb_error_mod
  use psb_const_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_triu
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in) :: a
  class(psb_lc_coo_sparse_mat), intent(out) :: u
  integer(psb_ipk_),intent(out)            :: info
  integer(psb_lpk_), intent(in), optional  :: diag,imin,imax,jmin,jmax
  logical, intent(in), optional            :: rscale,cscale
  class(psb_lc_coo_sparse_mat), optional, intent(out) :: l

  integer(psb_ipk_) :: err_act
  integer(psb_lpk_) :: nzin, nzout, i, j, k
  integer(psb_lpk_) :: imin_, imax_, jmin_, jmax_, mb,nb, diag_, nzlin, nzuin, nz
  integer(psb_ipk_) :: ierr(5)
  character(len=20)  :: name='triu'
  logical :: rscale_, cscale_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (present(diag)) then
    diag_ = diag
  else
    diag_ = 0
  end if
  if (present(imin)) then
    imin_ = imin
  else
    imin_ = 1
  end if
  if (present(imax)) then
    imax_ = imax
  else
    imax_ = a%get_nrows()
  end if
  if (present(jmin)) then
    jmin_ = jmin
  else
    jmin_ = 1
  end if
  if (present(jmax)) then
    jmax_ = jmax
  else
    jmax_ = a%get_ncols()
  end if
  if (present(rscale)) then
    rscale_ = rscale
  else
    rscale_ = .true.
  end if
  if (present(cscale)) then
    cscale_ = cscale
  else
    cscale_ = .true.
  end if

  if (rscale_) then
    mb = imax_ - imin_ +1
  else
    mb = imax_
  endif
  if (cscale_) then
    nb = jmax_ - jmin_ +1
  else
    nb = jmax_
  endif


  nz = a%get_nzeros()
  call u%allocate(mb,nb,nz)

  if (present(l)) then
    nzuin = u%get_nzeros() ! At this point it should be 0
    call l%allocate(mb,nb,nz)
    nzlin = l%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          j = ja(k)
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)<diag_) then
              nzlin = nzlin + 1
              l%ia(nzlin)  = i
              l%ja(nzlin)  = ja(k)
              l%val(nzlin) = val(k)
            else
              nzuin = nzuin + 1
              u%ia(nzuin)  = i
              u%ja(nzuin)  = ja(k)
              u%val(nzuin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call u%set_nzeros(nzuin)
    call l%set_nzeros(nzlin)
    call l%fix(info)
    nzout = l%get_nzeros()
    if (rscale_) &
         & l%ia(1:nzout) = l%ia(1:nzout) - imin_ + 1
    if (cscale_) &
         & l%ja(1:nzout) = l%ja(1:nzout) - jmin_ + 1
    if ((diag_ <=1).and.(imin_ == jmin_)) then
      call l%set_triangle(.true.)
      call l%set_lower(.true.)
    end if
  else
    nzin = u%get_nzeros() ! At this point it should be 0
    associate(val =>a%val, ja => a%ja, irp=>a%irp)
      do i=imin_,imax_
        do k=irp(i),irp(i+1)-1
          if ((jmin_<=j).and.(j<=jmax_)) then
            if ((ja(k)-i)>=diag_) then
              nzin = nzin + 1
              u%ia(nzin)  = i
              u%ja(nzin)  = ja(k)
              u%val(nzin) = val(k)
            end if
          end if
        end do
      end do
    end associate
    call u%set_nzeros(nzin)
  end if
  call u%fix(info)
  nzout = u%get_nzeros()
  if (rscale_) &
       & u%ia(1:nzout) = u%ia(1:nzout) - imin_ + 1
  if (cscale_) &
       & u%ja(1:nzout) = u%ja(1:nzout) - jmin_ + 1

  if ((diag_ >= 0).and.(imin_ == jmin_)) then
    call u%set_triangle(.true.)
    call u%set_upper(.true.)
  end if

  if (info /= psb_success_) goto 9999

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_triu


subroutine psb_lc_csr_csput_a(nz,ia,ja,val,a,imin,imax,jmin,jmax,info)
  use psb_error_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_csput_a
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  complex(psb_spk_), intent(in)      :: val(:)
  integer(psb_lpk_), intent(in)             :: nz, ia(:), ja(:), imin,imax,jmin,jmax
  integer(psb_ipk_), intent(out)            :: info


  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='lc_csr_csput_a'
  logical, parameter :: debug=.false.
  integer(psb_lpk_) :: nza, i,j,k, nzl, isza
  integer(psb_ipk_) :: debug_level, debug_unit


  call psb_erractionsave(err_act)
  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (nz <= 0) then
    info = psb_err_iarg_neg_;
    call psb_errpush(info,name,m_err=(/1/))
    goto 9999
  end if
  if (size(ia) < nz) then
    info = psb_err_input_asize_invalid_i_;
    call psb_errpush(info,name,m_err=(/2/))
    goto 9999
  end if

  if (size(ja) < nz) then
    info = psb_err_input_asize_invalid_i_;
    call psb_errpush(info,name,m_err=(/3/))
    goto 9999
  end if
  if (size(val) < nz) then
    info = psb_err_input_asize_invalid_i_;
    call psb_errpush(info,name,m_err=(/4/))
    goto 9999
  end if

  if (nz == 0) return
  if (a%is_dev())   call a%sync()

  nza  = a%get_nzeros()

  if (a%is_bld()) then
    ! Build phase should only ever be in COO
    info = psb_err_invalid_mat_state_

  else  if (a%is_upd()) then
    call  psb_lc_csr_srch_upd(nz,ia,ja,val,a,&
         & imin,imax,jmin,jmax,info)

    if (info < 0) then
      info = psb_err_internal_error_
    else if (info > 0) then
      if (debug_level >= psb_debug_serial_) &
           & write(debug_unit,*) trim(name),&
           & ': Discarded entries not  belonging to us.'
      info = psb_success_
    end if
    call a%set_host()

  else
    ! State is wrong.
    info = psb_err_invalid_mat_state_
  end if
  if (info /= psb_success_) then
    call psb_errpush(info,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return


contains

  subroutine psb_lc_csr_srch_upd(nz,ia,ja,val,a,&
       & imin,imax,jmin,jmax,info)

    use psb_const_mod
    use psb_realloc_mod
    use psb_string_mod
    use psb_sort_mod
    implicit none

    class(psb_lc_csr_sparse_mat), intent(inout) :: a
    integer(psb_lpk_), intent(in) :: nz, imin,imax,jmin,jmax
    integer(psb_lpk_), intent(in) :: ia(:),ja(:)
    complex(psb_spk_), intent(in) :: val(:)
    integer(psb_ipk_), intent(out) :: info
    integer(psb_lpk_) :: i,ir,ic, ilr, ilc, ip, &
         & i1,i2,nr,nc,nnz
    integer(psb_ipk_) :: debug_level, debug_unit,dupl, inc
    character(len=20)    :: name='lc_csr_srch_upd'

    info = psb_success_
    debug_unit  = psb_get_debug_unit()
    debug_level = psb_get_debug_level()

    dupl = a%get_dupl()

    if (.not.a%is_sorted()) then
      info = -4
      return
    end if

    ilr = -1
    ilc = -1
    nnz = a%get_nzeros()
    nr  = a%get_nrows()
    nc  = a%get_ncols()

    select case(dupl)
    case(psb_dupl_ovwrt_,psb_dupl_err_)
      ! Overwrite.
      ! Cannot test for error, should have been caught earlier.

      ilr = -1
      ilc = -1
      do i=1, nz
        ir = ia(i)
        ic = ja(i)

        if ((ir > 0).and.(ir <= nr)) then

          i1 = a%irp(ir)
          i2 = a%irp(ir+1)
          nc=i2-i1
          inc = nc
          ip = psb_bsrch(ic,inc,a%ja(i1:i2-1))
          if (ip>0) then
            a%val(i1+ip-1) = val(i)
          else
            info = max(info,3)
          end if
        else
          info = max(info,2)
        end if
      end do

    case(psb_dupl_add_)
      ! Add
      ilr = -1
      ilc = -1
      do i=1, nz
        ir = ia(i)
        ic = ja(i)
        if ((ir > 0).and.(ir <= nr)) then
          i1 = a%irp(ir)
          i2 = a%irp(ir+1)
          nc = i2-i1
          inc = nc
          ip = psb_bsrch(ic,inc,a%ja(i1:i2-1))
          if (ip>0) then
            a%val(i1+ip-1) = a%val(i1+ip-1) + val(i)
          else
            info = max(info,3)
          end if
        else
          info = max(info,2)
        end if
      end do

    case default
      info = -3
      if (debug_level >= psb_debug_serial_) &
           & write(debug_unit,*) trim(name),&
           & ': Duplicate handling: ',dupl
    end select

  end subroutine psb_lc_csr_srch_upd

end subroutine psb_lc_csr_csput_a


subroutine psb_lc_csr_reinit(a,clear)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_reinit
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  logical, intent(in), optional :: clear

  integer(psb_ipk_) :: err_act, info
  character(len=20)  :: name='reinit'
  logical  :: clear_
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_

  if (a%is_dev())   call a%sync()

  if (present(clear)) then
    clear_ = clear
  else
    clear_ = .true.
  end if

  if (a%is_bld() .or. a%is_upd()) then
    ! do nothing
    return
  else if (a%is_asb()) then
    if (clear_) a%val(:) = czero
    call a%set_upd()
    call a%set_host()
  else
    info = psb_err_invalid_mat_state_
    call psb_errpush(info,name)
    goto 9999
  end if

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_reinit

subroutine  psb_lc_csr_trim(a)
  use psb_realloc_mod
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_trim
  implicit none
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  integer(psb_lpk_) :: nz, m
  integer(psb_ipk_) :: err_act, info
  character(len=20)  :: name='trim'
  logical, parameter :: debug=.false.

  call psb_erractionsave(err_act)
  info = psb_success_
  m   = max(1_psb_lpk_,a%get_nrows())
  nz  = max(1_psb_lpk_,a%get_nzeros())
  if (info == psb_success_) call psb_realloc(m+1,a%irp,info)

  if (info == psb_success_) call psb_realloc(nz,a%ja,info)
  if (info == psb_success_) call psb_realloc(nz,a%val,info)

  if (info /= psb_success_) goto 9999
  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

end subroutine psb_lc_csr_trim

subroutine psb_lc_csr_print(iout,a,iv,head,ivr,ivc)
  use psb_string_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_print
  implicit none

  integer(psb_ipk_), intent(in)               :: iout
  class(psb_lc_csr_sparse_mat), intent(in) :: a
  integer(psb_lpk_), intent(in), optional     :: iv(:)
  character(len=*), optional        :: head
  integer(psb_lpk_), intent(in), optional     :: ivr(:), ivc(:)

  integer(psb_ipk_) :: err_act
  character(len=20)  :: name='lc_csr_print'
  logical, parameter :: debug=.false.
  character(len=80)                 :: frmt
  integer(psb_lpk_) :: irs,ics,i,j, ni, nr, nc, nz


  write(iout,'(a)') '%%MatrixMarket matrix coordinate complex general'
  if (present(head)) write(iout,'(a,a)') '% ',head
  write(iout,'(a)') '%'
  write(iout,'(a,a)') '% COO'

  if (a%is_dev())   call a%sync()

  nr = a%get_nrows()
  nc = a%get_ncols()
  nz = a%get_nzeros()
  frmt = psb_lc_get_print_frmt(nr,nc,nz,iv,ivr,ivc)

  write(iout,*) nr, nc, nz
  if(present(iv)) then
    do i=1, nr
      do j=a%irp(i),a%irp(i+1)-1
        write(iout,frmt) iv(i),iv(a%ja(j)),a%val(j)
      end do
    enddo
  else
    if (present(ivr).and..not.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) ivr(i),(a%ja(j)),a%val(j)
        end do
      enddo
    else if (present(ivr).and.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) ivr(i),ivc(a%ja(j)),a%val(j)
        end do
      enddo
    else if (.not.present(ivr).and.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) (i),ivc(a%ja(j)),a%val(j)
        end do
      enddo
    else if (.not.present(ivr).and..not.present(ivc)) then
      do i=1, nr
        do j=a%irp(i),a%irp(i+1)-1
          write(iout,frmt) (i),(a%ja(j)),a%val(j)
        end do
      enddo
    endif
  endif

end subroutine psb_lc_csr_print


subroutine psb_lc_cp_csr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_cp_csr_from_coo
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  class(psb_lc_coo_sparse_mat), intent(in)    :: b
  integer(psb_ipk_), intent(out)               :: info

  type(psb_lc_coo_sparse_mat)   :: tmp
  integer(psb_lpk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, nc, i,j,k,ip,irw, ncl
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name='lc_cp_csr_from_coo'

  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (.not.b%is_by_rows()) then
    ! This is to have fix_coo called behind the scenes
    call tmp%cp_from_coo(b,info)
    if (info /= psb_success_) return

    nr  = tmp%get_nrows()
    nc  = tmp%get_ncols()
    nza = tmp%get_nzeros()

    a%psb_lc_base_sparse_mat = tmp%psb_lc_base_sparse_mat

    ! Dirty trick: call move_alloc to have the new data allocated just once.
    call move_alloc(tmp%ia,itemp)
    call move_alloc(tmp%ja,a%ja)
    call move_alloc(tmp%val,a%val)
    call psb_realloc(nr+1,a%irp,info)
    call tmp%free()

  else

    if (info /= psb_success_) return
    if (b%is_dev())   call b%sync()

    nr  = b%get_nrows()
    nc  = b%get_ncols()
    nza = b%get_nzeros()

    a%psb_lc_base_sparse_mat = b%psb_lc_base_sparse_mat

    ! Dirty trick: call move_alloc to have the new data allocated just once.
    call psb_safe_ab_cpy(b%ia,itemp,info)
    if (info == psb_success_) call psb_safe_ab_cpy(b%ja,a%ja,info)
    if (info == psb_success_) call psb_safe_ab_cpy(b%val,a%val,info)
    if (info == psb_success_) call psb_realloc(nr+1,a%irp,info)

  endif

  a%irp(:) = 0
  do k=1,nza
    i = itemp(k)
    a%irp(i) = a%irp(i) + 1
  end do
  ip = 1
  do i=1,nr
    ncl = a%irp(i)
    a%irp(i) = ip
    ip = ip + ncl
  end do
  a%irp(nr+1) = ip
  call a%set_host()


end subroutine psb_lc_cp_csr_from_coo



subroutine psb_lc_cp_csr_to_coo(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_cp_csr_to_coo
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in)  :: a
  class(psb_lc_coo_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                      :: info

  integer(psb_lpk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, nc,i,j,irw
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_

  if (a%is_dev())   call a%sync()
  nr  = a%get_nrows()
  nc  = a%get_ncols()
  nza = a%get_nzeros()

  call b%allocate(nr,nc,nza)
  b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat

  do i=1, nr
    do j=a%irp(i),a%irp(i+1)-1
      b%ia(j)  = i
      b%ja(j)  = a%ja(j)
      b%val(j) = a%val(j)
    end do
  end do
  call b%set_nzeros(a%get_nzeros())
  call b%set_sort_status(psb_row_major_)
  call b%set_asb()
  call b%set_host()

end subroutine psb_lc_cp_csr_to_coo


subroutine psb_lc_mv_csr_to_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_mv_csr_to_coo
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  class(psb_lc_coo_sparse_mat), intent(inout)   :: b
  integer(psb_ipk_), intent(out)                        :: info

  integer(psb_lpk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, nc,i,j,k,irw
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_

  if (a%is_dev())   call a%sync()
  nr  = a%get_nrows()
  nc  = a%get_ncols()
  nza = a%get_nzeros()

  b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
  call b%set_nzeros(a%get_nzeros())
  call move_alloc(a%ja,b%ja)
  call move_alloc(a%val,b%val)
  call psb_realloc(nza,b%ia,info)
  if (info /= psb_success_) return
  do i=1, nr
    do j=a%irp(i),a%irp(i+1)-1
      b%ia(j)  = i
    end do
  end do
  call a%free()
  call b%set_sort_status(psb_row_major_)
  call b%set_asb()
  call b%set_host()

end subroutine psb_lc_mv_csr_to_coo



subroutine psb_lc_mv_csr_from_coo(a,b,info)
  use psb_const_mod
  use psb_realloc_mod
  use psb_error_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_mv_csr_from_coo
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  class(psb_lc_coo_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                        :: info

  integer(psb_lpk_), allocatable :: itemp(:)
  !locals
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, nc, i,j,k, ip,irw, ncl
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name='mv_from_coo'

  info = psb_success_
  debug_unit  = psb_get_debug_unit()
  debug_level = psb_get_debug_level()

  if (b%is_dev())   call b%sync()

  if (.not.b%is_by_rows()) call b%fix(info)
  if (info /= psb_success_) return

  nr  = b%get_nrows()
  nc  = b%get_ncols()
  nza = b%get_nzeros()

  a%psb_lc_base_sparse_mat = b%psb_lc_base_sparse_mat

  ! Dirty trick: call move_alloc to have the new data allocated just once.
  call move_alloc(b%ia,itemp)
  call move_alloc(b%ja,a%ja)
  call move_alloc(b%val,a%val)
  call psb_realloc(nr+1,a%irp,info)
  call b%free()


  a%irp(:) = 0
  do k=1,nza
    i = itemp(k)
    a%irp(i) = a%irp(i) + 1
  end do
  ip = 1
  do i=1,nr
    ncl = a%irp(i)
    a%irp(i) = ip
    ip = ip + ncl
  end do
  a%irp(nr+1) = ip
  call a%set_host()

end subroutine psb_lc_mv_csr_from_coo


subroutine psb_lc_mv_csr_to_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_mv_csr_to_fmt
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  class(psb_lc_base_sparse_mat), intent(inout)  :: b
  integer(psb_ipk_), intent(out)                        :: info

  !locals
  type(psb_lc_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, i,j,irw, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_lc_coo_sparse_mat)
    call a%mv_to_coo(b,info)
    ! Need to fix trivial copies!
  type is (psb_lc_csr_sparse_mat)
    if (a%is_dev())   call a%sync()
    b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
    call move_alloc(a%irp, b%irp)
    call move_alloc(a%ja,  b%ja)
    call move_alloc(a%val, b%val)
    call a%free()
    call b%set_host()

  class default
    call a%mv_to_coo(tmp,info)
    if (info == psb_success_) call b%mv_from_coo(tmp,info)
  end select

end subroutine psb_lc_mv_csr_to_fmt


subroutine psb_lc_cp_csr_to_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_cp_csr_to_fmt
  implicit none

  class(psb_lc_csr_sparse_mat), intent(in)   :: a
  class(psb_lc_base_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                       :: info

  !locals
  type(psb_lc_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_lpk_) :: nz, nr, i,j,irw, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_


  select type (b)
  type is (psb_lc_coo_sparse_mat)
    call a%cp_to_coo(b,info)

  type is (psb_lc_csr_sparse_mat)
    if (a%is_dev())   call a%sync()
    b%psb_lc_base_sparse_mat = a%psb_lc_base_sparse_mat
    nr = a%get_nrows()
    nz = a%get_nzeros()
    if (info == 0) call psb_safe_cpy( a%irp(1:nr+1), b%irp , info)
    if (info == 0) call psb_safe_cpy( a%ja(1:nz),    b%ja  , info)
    if (info == 0) call psb_safe_cpy( a%val(1:nz),   b%val , info)
    call b%set_host()

  class default
    call a%cp_to_coo(tmp,info)
    if (info == psb_success_) call b%mv_from_coo(tmp,info)
  end select

end subroutine psb_lc_cp_csr_to_fmt


subroutine psb_lc_mv_csr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_mv_csr_from_fmt
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout)  :: a
  class(psb_lc_base_sparse_mat), intent(inout) :: b
  integer(psb_ipk_), intent(out)                         :: info

  !locals
  type(psb_lc_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_lpk_) :: nza, nr, i,j,irw, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_lc_coo_sparse_mat)
    call a%mv_from_coo(b,info)

  type is (psb_lc_csr_sparse_mat)
    if (b%is_dev())   call b%sync()

    a%psb_lc_base_sparse_mat = b%psb_lc_base_sparse_mat
    call move_alloc(b%irp, a%irp)
    call move_alloc(b%ja,  a%ja)
    call move_alloc(b%val, a%val)
    call b%free()
    call a%set_host()

  class default
    call b%mv_to_coo(tmp,info)
    if (info == psb_success_) call a%mv_from_coo(tmp,info)
  end select

end subroutine psb_lc_mv_csr_from_fmt



subroutine psb_lc_cp_csr_from_fmt(a,b,info)
  use psb_const_mod
  use psb_c_base_mat_mod
  use psb_realloc_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_cp_csr_from_fmt
  implicit none

  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  class(psb_lc_base_sparse_mat), intent(in)   :: b
  integer(psb_ipk_), intent(out)                        :: info

  !locals
  type(psb_lc_coo_sparse_mat) :: tmp
  logical             :: rwshr_
  integer(psb_lpk_) :: nz, nr, i,j,irw, nc
  integer(psb_ipk_), Parameter  :: maxtry=8
  integer(psb_ipk_) :: debug_level, debug_unit, err_act
  character(len=20)   :: name

  info = psb_success_

  select type (b)
  type is (psb_lc_coo_sparse_mat)
    call a%cp_from_coo(b,info)

  type is (psb_lc_csr_sparse_mat)
    if (b%is_dev())   call b%sync()
    a%psb_lc_base_sparse_mat = b%psb_lc_base_sparse_mat
    nr = b%get_nrows()
    nz = b%get_nzeros()
    if (info == 0) call psb_safe_cpy( b%irp(1:nr+1), a%irp , info)
    if (info == 0) call psb_safe_cpy( b%ja(1:nz)   , a%ja  , info)
    if (info == 0) call psb_safe_cpy( b%val(1:nz)  , a%val , info)
    call a%set_host()

  class default
    call b%cp_to_coo(tmp,info)
    if (info == psb_success_) call a%mv_from_coo(tmp,info)
  end select
end subroutine psb_lc_cp_csr_from_fmt


subroutine  psb_lc_csr_clean_zeros(a, info)
  use psb_error_mod
  use psb_c_csr_mat_mod, psb_protect_name => psb_lc_csr_clean_zeros
  implicit none
  class(psb_lc_csr_sparse_mat), intent(inout) :: a
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_lpk_) :: i, j, k, nr
  integer(psb_lpk_), allocatable :: ilrp(:)

  info = 0
  call a%sync()
  nr   = a%get_nrows()
  ilrp = a%irp
  a%irp(1) = 1
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
      if (a%val(k) /= czero) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
      end if
    end do
    a%irp(i+1) = j
  end do
  call a%trim()
  call a%set_host()
end subroutine psb_lc_csr_clean_zeros

subroutine psb_lccsrspspmm(a,b,c,info)
  use psb_c_mat_mod
  use psb_serial_mod, psb_protect_name => psb_lccsrspspmm

  implicit none

  class(psb_lc_csr_sparse_mat), intent(in) :: a,b
  type(psb_lc_csr_sparse_mat), intent(out)  :: c
  integer(psb_ipk_), intent(out)                     :: info
  integer(psb_lpk_) :: ma,na,mb,nb, nzc, nza, nzb
  character(len=20) :: name
  integer(psb_ipk_) :: err_act
  name='psb_csrspspmm'
  call psb_erractionsave(err_act)
  info = psb_success_

  if (a%is_dev())   call a%sync()
  if (b%is_dev())   call b%sync()

  ma = a%get_nrows()
  na = a%get_ncols()
  mb = b%get_nrows()
  nb = b%get_ncols()


  if ( mb /= na ) then
    write(psb_err_unit,*) 'Mismatch in SPSPMM: ',ma,na,mb,nb
    info = psb_err_invalid_matrix_sizes_
    call psb_errpush(info,name)
    goto 9999
  endif

  nza = a%get_nzeros()
  nzb = b%get_nzeros()
  nzc = 2*(nza+nzb)
  call c%allocate(ma,nb,nzc)

  call csr_spspmm(a,b,c,info)

  call c%set_asb()
  call c%set_host()

  call psb_erractionrestore(err_act)
  return

9999 call psb_error_handler(err_act)

  return

contains

  subroutine csr_spspmm(a,b,c,info)
    implicit none
    type(psb_lc_csr_sparse_mat), intent(in)  :: a,b
    type(psb_lc_csr_sparse_mat), intent(inout) :: c
    integer(psb_ipk_), intent(out)          :: info
    integer(psb_lpk_)              :: ma,na,mb,nb
    integer(psb_lpk_), allocatable :: irow(:), idxs(:)
    complex(psb_spk_), allocatable    :: row(:)
    integer(psb_lpk_)              :: i,j,k,irw,icl,icf, iret, &
         & nzc,nnzre, isz, ipb, irwsz, nrc, nze
    complex(psb_spk_)                 :: cfb


    info = psb_success_
    ma = a%get_nrows()
    na = a%get_ncols()
    mb = b%get_nrows()
    nb = b%get_ncols()

    nze = min(size(c%val),size(c%ja))
    isz = max(ma,na,mb,nb)
    call psb_realloc(isz,row,info)
    if (info == 0) call psb_realloc(isz,idxs,info)
    if (info == 0) call psb_realloc(isz,irow,info)
    if (info /= 0) return
    row  = dzero
    irow = 0
    nzc  = 1
    do j = 1,ma
      c%irp(j) = nzc
      nrc = 0
      do k = a%irp(j), a%irp(j+1)-1
        irw = a%ja(k)
        cfb = a%val(k)
        irwsz = b%irp(irw+1)-b%irp(irw)
        do i = b%irp(irw),b%irp(irw+1)-1
          icl = b%ja(i)
          if (irow(icl)<j) then
            nrc = nrc + 1
            idxs(nrc) = icl
            irow(icl) = j
          end if
          row(icl)  = row(icl)  + cfb*b%val(i)
        end do
      end do
      if (nrc > 0 ) then
        if ((nzc+nrc)>nze) then
          nze = max(ma*((nzc+j-1)/j),nzc+2*nrc)
          call psb_realloc(nze,c%val,info)
          if (info == 0) call psb_realloc(nze,c%ja,info)
          if (info /= 0) return
        end if

        call psb_qsort(idxs(1:nrc))
        do i=1, nrc
          irw        = idxs(i)
          c%ja(nzc)  = irw
          c%val(nzc) = row(irw)
          row(irw)   = dzero
          nzc        = nzc + 1
        end do
      end if
    end do

    c%irp(ma+1) = nzc


  end subroutine csr_spspmm

end subroutine psb_lccsrspspmm