!!$ !!$ !!$ MLD2P4 version 2.0 !!$ MultiLevel Domain Decomposition Parallel Preconditioners Package !!$ based on PSBLAS (Parallel Sparse BLAS version 3.0) !!$ !!$ (C) Copyright 2008,2009,2010 !!$ !!$ Salvatore Filippone University of Rome Tor Vergata !!$ Alfredo Buttari CNRS-IRIT, Toulouse !!$ Pasqua D'Ambra ICAR-CNR, Naples !!$ Daniela di Serafino Second University of Naples !!$ !!$ Redistribution and use in source and binary forms, with or without !!$ modification, are permitted provided that the following conditions !!$ are met: !!$ 1. Redistributions of source code must retain the above copyright !!$ notice, this list of conditions and the following disclaimer. !!$ 2. Redistributions in binary form must reproduce the above copyright !!$ notice, this list of conditions, and the following disclaimer in the !!$ documentation and/or other materials provided with the distribution. !!$ 3. The name of the MLD2P4 group or the names of its contributors may !!$ not be used to endorse or promote products derived from this !!$ software without specific written permission. !!$ !!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS !!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED !!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR !!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS !!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR !!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF !!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS !!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN !!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) !!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE !!$ POSSIBILITY OF SUCH DAMAGE. !!$ !!$ ! File: mld_cilu0_fact.f90 ! ! Subroutine: mld_cilu0_fact ! Version: complex ! Contains: mld_cilu0_factint, ilu_copyin ! ! This routine computes either the ILU(0) or the MILU(0) factorization of ! the diagonal blocks of a distributed matrix. These factorizations are used ! to build the 'base preconditioner' (block-Jacobi preconditioner/solver, ! Additive Schwarz preconditioner) corresponding to a given level of a ! multilevel preconditioner. ! ! Details on the above factorizations can be found in ! Y. Saad, Iterative Methods for Sparse Linear Systems, Second Edition, ! SIAM, 2003, Chapter 10. ! ! The local matrix is stored into a and blck, as specified in the description ! of the arguments below. The storage format for both the L and U factors is CSR. ! The diagonal of the U factor is stored separately (actually, the inverse of the ! diagonal entries is stored; this is then managed in the solve stage associated ! to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from a and blck into l (L factor), ! u (U factor, except its diagonal) and d (diagonal of U). ! ! This implementation of ILU(0)/MILU(0) is faster than the implementation in ! mld_diluk_fct (the latter routine performs the more general ILU(k)/MILU(k)). ! ! ! Arguments: ! ialg - integer, input. ! The type of incomplete factorization to be performed. ! The MILU(0) factorization is computed if ialg = 2 (= mld_milu_n_); ! the ILU(0) factorization otherwise. ! a - type(psb_cspmat_type), input. ! The sparse matrix structure containing the local matrix. ! Note that if the 'base' Additive Schwarz preconditioner ! has overlap greater than 0 and the matrix has not been reordered ! (see mld_as_bld), then a contains only the 'original' local part ! of the distributed matrix, i.e. the rows of the matrix held ! by the calling process according to the initial data distribution. ! l - type(psb_cspmat_type), input/output. ! The L factor in the incomplete factorization. ! Note: its allocation is managed by the calling routine mld_ilu_bld, ! hence it cannot be only intent(out). ! u - type(psb_cspmat_type), input/output. ! The U factor (except its diagonal) in the incomplete factorization. ! Note: its allocation is managed by the calling routine mld_ilu_bld, ! hence it cannot be only intent(out). ! d - complex(psb_spk_), dimension(:), input/output. ! The inverse of the diagonal entries of the U factor in the incomplete ! factorization. ! Note: its allocation is managed by the calling routine mld_ilu_bld, ! hence it cannot be only intent(out). ! info - integer, output. ! Error code. ! blck - type(psb_cspmat_type), input, optional, target. ! The sparse matrix structure containing the remote rows of the ! distributed matrix, that have been retrieved by mld_as_bld ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been reordered ! (see mld_fact_bld), then blck is empty. ! subroutine mld_cilu0_fact(ialg,a,l,u,d,info,blck,upd) use psb_sparse_mod use mld_inner_mod!, mld_protect_name => mld_cilu0_fact implicit none ! Arguments integer, intent(in) :: ialg type(psb_cspmat_type),intent(in) :: a type(psb_cspmat_type),intent(inout) :: l,u complex(psb_spk_), intent(inout) :: d(:) integer, intent(out) :: info type(psb_cspmat_type),intent(in), optional, target :: blck character, intent(in), optional :: upd ! Local variables integer :: l1, l2, m, err_act type(psb_cspmat_type), pointer :: blck_ type(psb_c_csr_sparse_mat) :: ll, uu character :: upd_ character(len=20) :: name, ch_err name='mld_cilu0_fact' info = psb_success_ call psb_erractionsave(err_act) ! ! Point to / allocate memory for the incomplete factorization ! if (present(blck)) then blck_ => blck else allocate(blck_,stat=info) if (info == psb_success_) call blck_%csall(0,0,info,1) if (info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csall' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if endif if (present(upd)) then upd_ = psb_toupper(upd) else upd_ = 'F' end if m = a%get_nrows() + blck_%get_nrows() if ((m /= l%get_nrows()).or.(m /= u%get_nrows()).or.& & (m > size(d)) ) then write(0,*) 'Wrong allocation status for L,D,U? ',& & l%get_nrows(),size(d),u%get_nrows() info = -1 return end if call l%mv_to(ll) call u%mv_to(uu) ! ! Compute the ILU(0) or the MILU(0) factorization, depending on ialg ! call mld_cilu0_factint(ialg,a,blck_,& & d,ll%val,ll%ja,ll%irp,uu%val,uu%ja,uu%irp,l1,l2,upd_,info) if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='mld_cilu0_factint' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if ! ! Store information on the L and U sparse matrices ! call l%mv_from(ll) call l%set_triangle() call l%set_unit() call l%set_lower() call u%mv_from(uu) call u%set_triangle() call u%set_unit() call u%set_upper() ! ! Nullify pointer / deallocate memory ! if (present(blck)) then blck_ => null() else call blck_%free() if(info.ne.0) then info=psb_err_from_subroutine_ ch_err='psb_sp_free' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if deallocate(blck_) endif call psb_erractionrestore(err_act) return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then call psb_error() return end if return contains ! ! Subroutine: mld_cilu0_factint ! Version: complex ! Note: internal subroutine of mld_cilu0_fact. ! ! This routine computes either the ILU(0) or the MILU(0) factorization of the ! diagonal blocks of a distributed matrix. ! These factorizations are used to build the 'base preconditioner' ! (block-Jacobi preconditioner/solver, Additive Schwarz ! preconditioner) corresponding to a given level of a multilevel preconditioner. ! ! The local matrix is stored into a and b, as specified in the ! description of the arguments below. The storage format for both the L and U ! factors is CSR. The diagonal of the U factor is stored separately (actually, ! the inverse of the diagonal entries is stored; this is then managed in the ! solve stage associated to the ILU(0)/MILU(0) factorization). ! ! The routine copies and factors "on the fly" from the sparse matrix structures a ! and b into the arrays laspk, uaspk, d (L, U without its diagonal, diagonal of U). ! ! ! Arguments: ! ialg - integer, input. ! The type of incomplete factorization to be performed. ! The ILU(0) factorization is computed if ialg = 1 (= mld_ilu_n_), ! the MILU(0) one if ialg = 2 (= mld_milu_n_); other values ! are not allowed. ! m - integer, output. ! The total number of rows of the local matrix to be factorized, ! i.e. ma+mb. ! ma - integer, input ! The number of rows of the local submatrix stored into a. ! a - type(psb_cspmat_type), input. ! The sparse matrix structure containing the local matrix. ! Note that, if the 'base' Additive Schwarz preconditioner ! has overlap greater than 0 and the matrix has not been reordered ! (see mld_fact_bld), then a contains only the 'original' local part ! of the distributed matrix, i.e. the rows of the matrix held ! by the calling process according to the initial data distribution. ! mb - integer, input. ! The number of rows of the local submatrix stored into b. ! b - type(psb_cspmat_type), input. ! The sparse matrix structure containing the remote rows of the ! distributed matrix, that have been retrieved by mld_as_bld ! to build an Additive Schwarz base preconditioner with overlap ! greater than 0. If the overlap is 0 or the matrix has been ! reordered (see mld_fact_bld), then b does not contain any row. ! d - complex(psb_spk_), dimension(:), output. ! The inverse of the diagonal entries of the U factor in the ! incomplete factorization. ! laspk - complex(psb_spk_), dimension(:), input/output. ! The entries of U are stored according to the CSR format. ! The L factor in the incomplete factorization. ! lia1 - integer, dimension(:), input/output. ! The column indices of the nonzero entries of the L factor, ! according to the CSR storage format. ! lia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row ! of the L factor in laspk, according to the CSR storage format. ! uaspk - complex(psb_spk_), dimension(:), input/output. ! The U factor in the incomplete factorization. ! The entries of U are stored according to the CSR format. ! uia1 - integer, dimension(:), input/output. ! The column indices of the nonzero entries of the U factor, ! according to the CSR storage format. ! uia2 - integer, dimension(:), input/output. ! The indices identifying the first nonzero entry of each row ! of the U factor in uaspk, according to the CSR storage format. ! l1 - integer, output. ! The number of nonzero entries in laspk. ! l2 - integer, output. ! The number of nonzero entries in uaspk. ! info - integer, output. ! Error code. ! subroutine mld_cilu0_factint(ialg,a,b,& & d,lval,lja,lirp,uval,uja,uirp,l1,l2,upd,info) implicit none ! Arguments integer, intent(in) :: ialg type(psb_cspmat_type),intent(in) :: a,b integer,intent(inout) :: l1,l2,info integer, intent(inout) :: lja(:),lirp(:),uja(:),uirp(:) complex(psb_spk_), intent(inout) :: lval(:),uval(:),d(:) character, intent(in) :: upd ! Local variables integer :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, m integer :: ma,mb complex(psb_spk_) :: dia,temp integer, parameter :: nrb=16 type(psb_c_coo_sparse_mat) :: trw integer :: int_err(5) character(len=20) :: name, ch_err name='mld_cilu0_factint' if(psb_get_errstatus().ne.0) return info=psb_success_ call psb_erractionsave(err_act) ma = a%get_nrows() mb = b%get_nrows() select case(ialg) case(mld_ilu_n_,mld_milu_n_) ! Ok case default info=psb_err_input_asize_invalid_i_ call psb_errpush(info,name,i_err=(/1,ialg,0,0,0/)) goto 9999 end select call trw%allocate(0,0,1) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='psb_sp_all' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if m = ma+mb if (psb_toupper(upd) == 'F' ) then lirp(1) = 1 uirp(1) = 1 l1 = 0 l2 = 0 ! ! Cycle over the matrix rows ! do i = 1, m d(i) = szero if (i <= ma) then ! ! Copy the i-th local row of the matrix, stored in a, ! into lval/d(i)/uval ! call ilu_copyin(i,ma,a,i,1,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) else ! ! Copy the i-th local row of the matrix, stored in b ! (as (i-ma)-th row), into lval/d(i)/uval ! call ilu_copyin(i-ma,mb,b,i,1,m,l1,lja,lval,& & d(i),l2,uja,uval,ktrw,trw,upd) endif lirp(i+1) = l1 + 1 uirp(i+1) = l2 + 1 dia = d(i) do kk = lirp(i), lirp(i+1) - 1 ! ! Compute entry l(i,k) (lower factor L) of the incomplete ! factorization ! temp = lval(kk) k = lja(kk) lval(kk) = temp*d(k) ! ! Update the rest of row i (lower and upper factors L and U) ! using l(i,k) ! low1 = kk + 1 low2 = uirp(i) ! updateloop: do jj = uirp(k), uirp(k+1) - 1 ! j = uja(jj) ! if (j < i) then ! ! search l(i,*) (i-th row of L) for a matching index j ! do ll = low1, lirp(i+1) - 1 l = lja(ll) if (l > j) then low1 = ll exit else if (l == j) then lval(ll) = lval(ll) - temp*uval(jj) low1 = ll + 1 cycle updateloop end if enddo else if (j == i) then ! ! j=i: update the diagonal ! dia = dia - temp*uval(jj) cycle updateloop ! else if (j > i) then ! ! search u(i,*) (i-th row of U) for a matching index j ! do ll = low2, uirp(i+1) - 1 l = uja(ll) if (l > j) then low2 = ll exit else if (l == j) then uval(ll) = uval(ll) - temp*uval(jj) low2 = ll + 1 cycle updateloop end if enddo end if ! ! If we get here we missed the cycle updateloop, which means ! that this entry does not match; thus we accumulate on the ! diagonal for MILU(0). ! if (ialg == mld_milu_n_) then dia = dia - temp*uval(jj) end if enddo updateloop enddo ! ! Check the pivot size ! if (abs(dia) < s_epstol) then ! ! Too small pivot: unstable factorization ! info = psb_err_pivot_too_small_ int_err(1) = i write(ch_err,'(g20.10)') abs(dia) call psb_errpush(info,name,i_err=int_err,a_err=ch_err) goto 9999 else ! ! Compute 1/pivot ! dia = cone/dia end if d(i) = dia ! ! Scale row i of upper triangle ! do kk = uirp(i), uirp(i+1) - 1 uval(kk) = uval(kk)*dia enddo enddo else write(0,*) 'Update not implemented ' info = 31 call psb_errpush(info,name,i_err=(/13,0,0,0,0/),a_err=upd) goto 9999 end if call trw%free() call psb_erractionrestore(err_act) return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then call psb_error() return end if return end subroutine mld_cilu0_factint ! ! Subroutine: ilu_copyin ! Version: complex ! Note: internal subroutine of mld_cilu0_fact ! ! This routine copies a row of a sparse matrix A, stored in the psb_sspmat_type ! data structure a, into the arrays laspk and uaspk and into the scalar variable ! dia, corresponding to the lower and upper triangles of A and to the diagonal ! entry of the row, respectively. The entries in lval and uval are stored ! according to the CSR format; the corresponding column indices are stored in ! the arrays lja and uja. ! ! If the sparse matrix is in CSR format, a 'straight' copy is performed; ! otherwise psb_sp_getblk is used to extract a block of rows, which is then ! copied into lval, dia, uval row by row, through successive calls to ! ilu_copyin. ! ! The routine is used by mld_cilu0_factint in the computation of the ILU(0)/MILU(0) ! factorization of a local sparse matrix. ! ! TODO: modify the routine to allow copying into output L and U that are ! already filled with indices; this would allow computing an ILU(k) pattern, ! then use the ILU(0) internal for subsequent calls with the same pattern. ! ! Arguments: ! i - integer, input. ! The local index of the row to be extracted from the ! sparse matrix structure a. ! m - integer, input. ! The number of rows of the local matrix stored into a. ! a - type(psb_cspmat_type), input. ! The sparse matrix structure containing the row to be copied. ! jd - integer, input. ! The column index of the diagonal entry of the row to be ! copied. ! jmin - integer, input. ! Minimum valid column index. ! jmax - integer, input. ! Maximum valid column index. ! The output matrix will contain a clipped copy taken from ! a(1:m,jmin:jmax). ! l1 - integer, input/output. ! Pointer to the last occupied entry of lval. ! lja - integer, dimension(:), input/output. ! The column indices of the nonzero entries of the lower triangle ! copied in lval row by row (see mld_dilu0_factint), according ! to the CSR storage format. ! lval - complex(psb_spk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the ! lower triangle are copied. ! dia - complex(psb_spk_), output. ! The diagonal entry of the copied row. ! l2 - integer, input/output. ! Pointer to the last occupied entry of uval. ! uja - integer, dimension(:), input/output. ! The column indices of the nonzero entries of the upper triangle ! copied in uval row by row (see mld_dilu0_factint), according ! to the CSR storage format. ! uval - complex(psb_spk_), dimension(:), input/output. ! The array where the entries of the row corresponding to the ! upper triangle are copied. ! ktrw - integer, input/output. ! The index identifying the last entry taken from the ! staging buffer trw. See below. ! trw - type(psb_cspmat_type), input/output. ! A staging buffer. If the matrix A is not in CSR format, we use ! the psb_sp_getblk routine and store its output in trw; when we ! need to call psb_sp_getblk we do it for a block of rows, and then ! we consume them from trw in successive calls to this routine, ! until we empty the buffer. Thus we will make a call to psb_sp_getblk ! every nrb calls to copyin. If A is in CSR format it is unused. ! subroutine ilu_copyin(i,m,a,jd,jmin,jmax,l1,lja,lval,& & dia,l2,uja,uval,ktrw,trw,upd) use psb_sparse_mod implicit none ! Arguments type(psb_cspmat_type), intent(in) :: a type(psb_c_coo_sparse_mat), intent(inout) :: trw integer, intent(in) :: i,m,jd,jmin,jmax integer, intent(inout) :: ktrw,l1,l2 integer, intent(inout) :: lja(:), uja(:) complex(psb_spk_), intent(inout) :: lval(:), uval(:), dia character, intent(in) :: upd ! Local variables integer :: k,j,info,irb, nz integer, parameter :: nrb=40 character(len=20), parameter :: name='ilu_copyin' character(len=20) :: ch_err if (psb_get_errstatus() /= 0) return info=psb_success_ call psb_erractionsave(err_act) if (psb_toupper(upd) == 'F') then select type(aa => a%a) type is (psb_c_csr_sparse_mat) ! ! Take a fast shortcut if the matrix is stored in CSR format ! do j = aa%irp(i), aa%irp(i+1) - 1 k = aa%ja(j) ! write(0,*)'KKKKK',k if ((k < jd).and.(k >= jmin)) then l1 = l1 + 1 lval(l1) = aa%val(j) lja(l1) = k else if (k == jd) then dia = aa%val(j) else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = aa%val(j) uja(l2) = k end if enddo class default ! ! Otherwise use psb_sp_getblk, slower but able (in principle) of ! handling any format. In this case, a block of rows is extracted ! instead of a single row, for performance reasons, and these ! rows are copied one by one into lval, dia, uval, through ! successive calls to ilu_copyin. ! if ((mod(i,nrb) == 1).or.(nrb == 1)) then irb = min(m-i+1,nrb) call aa%csget(i,i+irb-1,trw,info) if(info /= psb_success_) then info=psb_err_from_subroutine_ ch_err='csget' call psb_errpush(info,name,a_err=ch_err) goto 9999 end if ktrw=1 end if nz = trw%get_nzeros() do if (ktrw > nz) exit if (trw%ia(ktrw) > i) exit k = trw%ja(ktrw) if ((k < jd).and.(k >= jmin)) then l1 = l1 + 1 lval(l1) = trw%val(ktrw) lja(l1) = k else if (k == jd) then dia = trw%val(ktrw) else if ((k > jd).and.(k <= jmax)) then l2 = l2 + 1 uval(l2) = trw%val(ktrw) uja(l2) = k end if ktrw = ktrw + 1 enddo end select else write(0,*) 'Update not implemented ' info = 31 call psb_errpush(info,name,i_err=(/13,0,0,0,0/),a_err=upd) goto 9999 end if call psb_erractionrestore(err_act) return 9999 continue call psb_erractionrestore(err_act) if (err_act.eq.psb_act_abort_) then call psb_error() return end if return end subroutine ilu_copyin end subroutine mld_cilu0_fact