Merged ilu(k) branch. Branch still open to play with ILU(P,T).

stopcriterion
Salvatore Filippone 17 years ago
parent 59cd953055
commit 1020eacd6f

@ -1,7 +1,14 @@
Changelog. A lot less detailed than usual, at least for past Changelog. A lot less detailed than usual, at least for past
history. history.
2007/10/12: Renamed all constants with MLD_ prefix. 2007/10/17: Merged ILU(K) into trunk.
2007/10/16: Fixed ILU(K), it now performs satisfactorily. Also updated
ILU(0) to be more legible.
2007/10/11: First working version of ILU(K). Still slow, there should
be room for improvement.
2007/10/09: Added benchmark code. 2007/10/09: Added benchmark code.
2007/10/09: Added MILU_N_. Beware: values for UMF_ etc. have been 2007/10/09: Added MILU_N_. Beware: values for UMF_ etc. have been

@ -22,6 +22,7 @@ F90OBJS=mld_dasmat_bld.o mld_dslu_bld.o mld_dumf_bld.o mld_dilu_fct.o\
mld_zbaseprec_bld.o mld_zdiagsc_bld.o mld_zaggrmap_bld.o \ mld_zbaseprec_bld.o mld_zdiagsc_bld.o mld_zaggrmap_bld.o \
mld_zprec_aply.o mld_zmlprec_aply.o mld_zslud_bld.o\ mld_zprec_aply.o mld_zmlprec_aply.o mld_zslud_bld.o\
mld_zbaseprec_aply.o mld_zbjac_aply.o mld_zaggrmat_asb.o\ mld_zbaseprec_aply.o mld_zbjac_aply.o mld_zaggrmat_asb.o\
mld_diluk_fct.o mld_ziluk_fct.o \
$(MPFOBJS) $(MPFOBJS)
COBJS=mld_slu_impl.o mld_umf_impl.o mld_zslu_impl.o mld_zumf_impl.o COBJS=mld_slu_impl.o mld_umf_impl.o mld_zslu_impl.o mld_zumf_impl.o
OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MPCOBJS) $(MODOBJS) OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MPCOBJS) $(MODOBJS)

@ -104,7 +104,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info)
call psb_nullify_sp(blck) call psb_nullify_sp(blck)
call psb_nullify_sp(atmp) call psb_nullify_sp(atmp)
t1= psb_wtime()
if(debug) write(0,*)me,': calling mld_asmat_bld',& if(debug) write(0,*)me,': calling mld_asmat_bld',&
& p%iprcparm(mld_prec_type_),p%iprcparm(mld_n_ovr_) & p%iprcparm(mld_prec_type_),p%iprcparm(mld_n_ovr_)
@ -126,7 +125,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info)
goto 9999 goto 9999
end if end if
t2= psb_wtime()
if (debug) write(0,*)me,': out of mld_asmat_bld' if (debug) write(0,*)me,': out of mld_asmat_bld'
if (debug) call psb_barrier(ictxt) if (debug) call psb_barrier(ictxt)
@ -170,7 +168,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info)
end if end if
t3 = psb_wtime()
if (debugprt) then if (debugprt) then
call psb_barrier(ictxt) call psb_barrier(ictxt)
open(40+me) open(40+me)
@ -499,7 +496,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info)
end select end select
t6 = psb_wtime()
call psb_sp_free(blck,info) call psb_sp_free(blck,info)
if(info/=0) then if(info/=0) then

@ -145,12 +145,37 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck)
endif endif
select case(p%iprcparm(mld_sub_solve_))
case(mld_ilu_n_,mld_milu_n_,mld_ilu_t_)
!
! Need to decide what to do with ILU(T).
! Should we pack the functionality together with ILU(K)?
! Maybe for mental sanity, a separate routine would be better.
!
! Decide what to do with MILU(K)
! !
! Ok, factor the matrix. ! Ok, factor the matrix.
! !
t5 = psb_wtime() select case(p%iprcparm(mld_sub_fill_in_))
case(:-1)
! This is an error.
call psb_errpush(30,name,i_err=(/3,p%iprcparm(mld_sub_fill_in_),0,0,0/))
goto 9999
case(0)
call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),&
& p%d,info,blck=blck) & p%d,info,blck=blck)
case(1:)
call mld_iluk_fct(p%iprcparm(mld_sub_fill_in_),p%iprcparm(mld_sub_solve_),&
& a,p%av(mld_l_pr_),p%av(mld_u_pr_),p%d,info,blck=blck)
end select
case default
! If we end up here, something was wrong up in the call chain.
call psb_errpush(4000,name)
goto 9999
end select
if(info/=0) then if(info/=0) then
info=4010 info=4010
ch_err='mld_ilu_fct' ch_err='mld_ilu_fct'
@ -179,10 +204,7 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck)
! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" ) ! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" )
t6 = psb_wtime()
! !
! write(0,'(i3,1x,a,3(1x,g18.9))') me,'renum/factor time',t3-t2,t6-t5
! if (me==0) write(0,'(a,3(1x,g18.9))') 'renum/factor time',t3-t2,t6-t5
if (psb_sp_getifld(psb_upd_,p%av(mld_u_pr_),info) /= psb_upd_perm_) then if (psb_sp_getifld(psb_upd_,p%av(mld_u_pr_),info) /= psb_upd_perm_) then
call psb_sp_trim(p%av(mld_u_pr_),info) call psb_sp_trim(p%av(mld_u_pr_),info)
@ -192,7 +214,6 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck)
call psb_sp_trim(p%av(mld_l_pr_),info) call psb_sp_trim(p%av(mld_l_pr_),info)
endif endif
if (debug) write(0,*) me,'End of ilu_bld' if (debug) write(0,*) me,'End of ilu_bld'
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)

@ -167,72 +167,22 @@ contains
if(debug) write(0,*)'LUINT Done Allocating TRW' if(debug) write(0,*)'LUINT Done Allocating TRW'
lia2(1) = 1 lia2(1) = 1
uia2(1) = 1 uia2(1) = 1
l1=0 l1 = 0
l2=0 l2 = 0
m = ma+mb m = ma+mb
if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb
do i = 1, ma do i = 1, m
if(debug) write(0,*)'LUINT: Loop index ',i,ma if(debug) write(0,*)'LUINT: Loop index ',i,ma
d(i) = 0.d0 d(i) = dzero
!
! Here we take a fast shortcut if possible, otherwise
! use spgtblk, slower but able (in principle) to handle
! anything.
!
if (toupper(a%fida)=='CSR') then
do j = a%ia2(i), a%ia2(i+1) - 1
k = a%ia1(j)
! write(0,*)'KKKKK',k
if ((k < i).and.(k >= 1)) then
l1 = l1 + 1
laspk(l1) = a%aspk(j)
lia1(l1) = k
else if (k == i) then
d(i) = a%aspk(j)
else if ((k > i).and.(k <= m)) then
l2 = l2 + 1
uaspk(l2) = a%aspk(j)
uia1(l2) = k
end if
enddo
if (i <= ma) then
call ilu_copyin(i,ma,a,i,m,l1,lia1,lia2,laspk,&
& d(i),l2,uia1,uia2,uaspk,ktrw,trw)
else else
call ilu_copyin(i-ma,mb,b,i,m,l1,lia1,lia2,laspk,&
if ((mod(i,nrb) == 1).or.(nrb==1)) then & d(i),l2,uia1,uia2,uaspk,ktrw,trw)
irb = min(ma-i+1,nrb) endif
call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if(info.ne.0) then
info=4010
ch_err='psb_sp_getblk'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
ktrw=1
end if
do
if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw)
if ((k < i).and.(k >= 1)) then
l1 = l1 + 1
laspk(l1) = trw%aspk(ktrw)
lia1(l1) = k
else if (k == i) then
d(i) = trw%aspk(ktrw)
else if ((k > i).and.(k <= m)) then
l2 = l2 + 1
uaspk(l2) = trw%aspk(ktrw)
uia1(l2) = k
end if
ktrw = ktrw + 1
enddo
end if
!!$
lia2(i+1) = l1 + 1 lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1 uia2(i+1) = l2 + 1
@ -300,17 +250,17 @@ contains
! !
! Non singularity ! Non singularity
! !
if (dabs(dia) < epstol) then if (abs(dia) < epstol) then
! !
! Pivot too small: unstable factorization ! Pivot too small: unstable factorization
! !
info = 2 info = 2
int_err(1) = i int_err(1) = i
write(ch_err,'(g20.10)') dia write(ch_err,'(g20.10)') abs(dia)
call psb_errpush(info,name,i_err=int_err,a_err=ch_err) call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
goto 9999 goto 9999
else else
dia = 1.d0/dia dia = done/dia
end if end if
d(i) = dia d(i) = dia
! write(6,*)'diag(',i,')=',d(i) ! write(6,*)'diag(',i,')=',d(i)
@ -320,35 +270,74 @@ contains
enddo enddo
enddo enddo
do i = ma+1, m call psb_sp_free(trw,info)
d(i) = 0.d0 if(info.ne.0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act)
return
if (toupper(b%fida)=='CSR') then 9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine mld_dilu_fctint
do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 subroutine ilu_copyin(i,m,a,jd,jmax,l1,lia1,lia2,laspk,&
k = b%ia1(j) & dia,l2,uia1,uia2,uaspk,ktrw,trw)
! if (me.eq.2) write(0,*)'ecco k=',k use psb_base_mod
if ((k < i).and.(k >= 1)) then implicit none
type(psb_dspmat_type) :: a,trw
integer :: i,m,ktrw,jd,jmax,l1,l2
integer :: lia1(:),lia2(:),uia1(:),uia2(:)
real(kind(1.d0)) :: laspk(:), uaspk(:), dia
type(psb_int_heap) :: heap
integer :: k,j,info,irb
integer, parameter :: nrb=16
character(len=20), parameter :: name='mld_dilu_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Here we take a fast shortcut if possible, otherwise
! use spgtblk, slower but able (in principle) of
! handling anything.
!
if (toupper(a%fida)=='CSR') then
do j = a%ia2(i), a%ia2(i+1) - 1
k = a%ia1(j)
! write(0,*)'KKKKK',k
if ((k < jd).and.(k >= 1)) then
l1 = l1 + 1 l1 = l1 + 1
laspk(l1) = b%aspk(j) laspk(l1) = a%aspk(j)
lia1(l1) = k lia1(l1) = k
! if(me.eq.2) write(0,*)'scrivo l' else if (k == jd) then
else if (k == i) then dia = a%aspk(j)
d(i) = b%aspk(j) else if ((k > jd).and.(k <= jmax)) then
else if ((k > i).and.(k <= m)) then
l2 = l2 + 1 l2 = l2 + 1
uaspk(l2) = b%aspk(j) uaspk(l2) = a%aspk(j)
! write(0,*)'KKKKK',k
uia1(l2) = k uia1(l2) = k
end if end if
enddo enddo
else else
if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then if ((mod(i,nrb) == 1).or.(nrb==1)) then
irb = min(m-i+1,nrb) irb = min(m-i+1,nrb)
call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if(info.ne.0) then if(info.ne.0) then
info=4010 info=4010
ch_err='psb_sp_getblk' ch_err='psb_sp_getblk'
@ -362,14 +351,13 @@ contains
if (ktrw > trw%infoa(psb_nnz_)) exit if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw) k = trw%ia2(ktrw)
! write(0,*)'KKKKK',k if ((k < jd).and.(k >= 1)) then
if ((k < i).and.(k >= 1)) then
l1 = l1 + 1 l1 = l1 + 1
laspk(l1) = trw%aspk(ktrw) laspk(l1) = trw%aspk(ktrw)
lia1(l1) = k lia1(l1) = k
else if (k == i) then else if (k == jd) then
d(i) = trw%aspk(ktrw) dia = trw%aspk(ktrw)
else if ((k > i).and.(k <= m)) then else if ((k > jd).and.(k <= jmax)) then
l2 = l2 + 1 l2 = l2 + 1
uaspk(l2) = trw%aspk(ktrw) uaspk(l2) = trw%aspk(ktrw)
uia1(l2) = k uia1(l2) = k
@ -377,100 +365,7 @@ contains
ktrw = ktrw + 1 ktrw = ktrw + 1
enddo enddo
endif
lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1
dia = d(i)
do kk = lia2(i), lia2(i+1) - 1
!
! compute element alo(i,k) of incomplete factorization
!
temp = laspk(kk)
k = lia1(kk)
laspk(kk) = temp*d(k)
! update the rest of row i using alo(i,k)
low1 = kk + 1
low2 = uia2(i)
updateloopb: do jj = uia2(k), uia2(k+1) - 1
j = uia1(jj)
!
if (j < i) then
! search alo(i,*) for matching index J
do ll = low1, lia2(i+1) - 1
l = lia1(ll)
if (l > j) then
low1 = ll
exit
else if (l == j) then
laspk(ll) = laspk(ll) - temp*uaspk(jj)
low1 = ll + 1
cycle updateloopb
end if
enddo
!
else if (j == i) then
! j=i update diagonal
dia = dia - temp*uaspk(jj)
cycle updateloopb
!
else if (j > i) then
! search aup(i,*) for matching index j
do ll = low2, uia2(i+1) - 1
l = uia1(ll)
if (l > j) then
low2 = ll
exit
else if (l == j) then
uaspk(ll) = uaspk(ll) - temp*uaspk(jj)
low2 = ll + 1
cycle updateloopb
end if
enddo
end if
!
! If we get here we missed the cycle updateloop,
! which means that this entry does not match; thus
! we take it out of diagonal for MILU.
!
if (ialg == mld_milu_n_) then
dia = dia - temp*uaspk(jj)
end if end if
enddo updateloopb
enddo
!
!
! Non singularity
!
if (dabs(dia) < epstol) then
!
! Pivot too small: unstable factorization
!
int_err(1) = i
write(ch_err,'(g20.10)') dia
info = 2
call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
goto 9999
else
dia = 1.d0/dia
end if
d(i) = dia
! Scale row i of upper triangle
do kk = uia2(i), uia2(i+1) - 1
uaspk(kk) = uaspk(kk)*dia
enddo
enddo
call psb_sp_free(trw,info)
if(info.ne.0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -482,5 +377,6 @@ contains
return return
end if end if
return return
end subroutine mld_dilu_fctint end subroutine ilu_copyin
end subroutine mld_dilu_fct end subroutine mld_dilu_fct

@ -0,0 +1,500 @@
!!$
!!$
!!$ MD2P4
!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS
!!$ for
!!$ Parallel Sparse BLAS v2.0
!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata
!!$ Alfredo Buttari University of Rome Tor Vergata
!!$ Daniela di Serafino Second University of Naples
!!$ Pasqua D'Ambra ICAR-CNR
!!$
!!$ Redistribution and use in source and binary forms, with or without
!!$ modification, are permitted provided that the following conditions
!!$ are met:
!!$ 1. Redistributions of source code must retain the above copyright
!!$ notice, this list of conditions and the following disclaimer.
!!$ 2. Redistributions in binary form must reproduce the above copyright
!!$ notice, this list of conditions, and the following disclaimer in the
!!$ documentation and/or other materials provided with the distribution.
!!$ 3. The name of the MD2P4 group or the names of its contributors may
!!$ not be used to endorse or promote products derived from this
!!$ software without specific written permission.
!!$
!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS
!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
!!$ POSSIBILITY OF SUCH DAMAGE.
!!$
!!$
subroutine mld_diluk_fct(fill_in,ialg,a,l,u,d,info,blck)
!
! This routine copies and factors "on the fly" from A and BLCK
! into L/D/U.
!
!
use psb_base_mod
use mld_prec_mod, mld_protect_name => mld_diluk_fct
implicit none
! .. Scalar Arguments ..
integer, intent(in) :: fill_in, ialg
integer, intent(out) :: info
! .. Array Arguments ..
type(psb_dspmat_type),intent(in) :: a
type(psb_dspmat_type),intent(inout) :: l,u
type(psb_dspmat_type),intent(in), optional, target :: blck
real(kind(1.d0)), intent(inout) :: d(:)
! .. Local Scalars ..
real(kind(1.d0)) :: dia, temp
integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act
type(psb_dspmat_type), pointer :: blck_
character(len=20) :: name, ch_err
logical, parameter :: debug=.false.
name='mld_diluk_fct'
info = 0
call psb_erractionsave(err_act)
! .. Executable Statements ..
!
if (debug) write(0,*) 'mld_diluk_fct: start'
if (present(blck)) then
blck_ => blck
else
allocate(blck_,stat=info)
if (info /= 0) then
call psb_errpush(4010,name,a_err='Allocate')
goto 9999
end if
call psb_sp_all(0,0,blck_,1,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_all'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
endif
if (debug) write(0,*) 'mld_diluk_fct: calling fctint'
call mld_diluk_fctint(fill_in,ialg,m,a%m,a,blck_%m,blck_,&
& d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info)
if (info /= 0) then
info=4010
ch_err='mld_diluk_fctint'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
l%infoa(1) = l1
l%fida = 'CSR'
l%descra = 'TLU'
u%infoa(1) = l2
u%fida = 'CSR'
u%descra = 'TUU'
l%m = m
l%k = m
u%m = m
u%k = m
if (present(blck)) then
blck_ => null()
else
call psb_sp_free(blck_,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
deallocate(blck_)
endif
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
contains
subroutine mld_diluk_fctint(fill_in,ialg,m,ma,a,mb,b,&
& d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info)
use psb_base_mod
implicit none
integer, intent(in) :: fill_in, ialg
type(psb_dspmat_type) :: a,b
integer :: m,ma,mb,l1,l2,info
integer, dimension(:), allocatable :: lia1,lia2,uia1,uia2
real(kind(1.d0)), dimension(:), allocatable :: laspk,uaspk
real(kind(1.d0)), dimension(:) :: d
integer :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, &
& isz,minj,maxj,lrwk,nidx
real(kind(1.d0)) :: dia,temp,rwk
integer, allocatable :: uplevs(:), rowlevs(:),idxs(:)
real(kind(1.d0)), allocatable :: row(:)
type(psb_int_heap) :: heap
logical,parameter :: debug=.false.
type(psb_dspmat_type) :: trw
integer :: int_err(5)
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
m = ma+mb
!
! Temp buffer for copyin function.
!
if (debug) write(0,*)'LUINT Allocating TRW'
call psb_sp_all(0,0,trw,1,info)
if (info==0) call psb_ensure_size(m+1,lia2,info)
if (info==0) call psb_ensure_size(m+1,uia2,info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='psb_sp_all')
goto 9999
end if
if (debug) write(0,*)'LUINT Done Allocating TRW'
l1=0
l2=0
lia2(1) = 1
uia2(1) = 1
if (debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb
! Initial allocation
allocate(uplevs(size(uaspk)),rowlevs(m),row(m),stat=info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
uplevs(:) = m+1
row(:) = dzero
rowlevs(:) = m+1
do i = 1, m
if (debug.and.(mod(i,500)==1)) write(0,*)'LUINT: Loop index ',i,ma,minj,maxj
if (debug) write(0,*)'LUINT: Loop index ',i,ma
!
! At each iteration of the loop we keep the indices affected in a heap
! initialized and filled in the copyin function, and updated during
! the elimination. The heap is ideal because at each step we need the
! lowest index, but we also need to insert new items, and the heap allows
! to do both in log time.
!
d(i) = dzero
if (i<=ma) then
call iluk_copyin(i,ma,a,m,row,rowlevs,heap,ktrw,trw)
else
call iluk_copyin(i-ma,mb,b,m,row,rowlevs,heap,ktrw,trw)
endif
if (debug) write(0,*)'LUINT: input Copy done',minj,maxj
! Do an elimination step on current row
! Turns out we only need to keep track of levels
! for the upper triangle, hence no uplevs variable.
!
call iluk_fact(fill_in,i,m,row,rowlevs,heap,&
& d,uia1,uia2,uaspk,uplevs,nidx,idxs)
!
! Copy the row into the lower/diag/upper structures.
!
call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
& l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs)
end do
!
! And we're done......hopefully :-)
!
deallocate(uplevs,rowlevs,row,stat=info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Deallocate')
goto 9999
end if
if (info == 0) call psb_sp_free(trw,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine mld_diluk_fctint
subroutine iluk_copyin(i,m,a,jmax,row,rowlevs,heap,ktrw,trw)
use psb_base_mod
implicit none
type(psb_dspmat_type) :: a,trw
integer :: i, rowlevs(:),m,ktrw,jmax
real(kind(1.d0)) :: row(:)
type(psb_int_heap) :: heap
integer :: k,j,info,irb
integer, parameter :: nrb=16
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Here we take a fast shortcut if possible, otherwise
! use spgtblk, slower but able (in principle) to handle
! anything.
!
if (toupper(a%fida)=='CSR') then
call psb_init_heap(heap,info)
do j = a%ia2(i), a%ia2(i+1) - 1
k = a%ia1(j)
if ((1<=k).and.(k<=jmax)) then
row(k) = a%aspk(j)
rowlevs(k) = 0
call psb_insert_heap(k,heap,info)
end if
end do
else
if ((mod(i,nrb) == 1).or.(nrb==1)) then
irb = min(m-i+1,nrb)
call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if (info /= 0) then
info=4010
ch_err='psb_sp_getblk'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
ktrw=1
end if
do
if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw)
if ((1<=k).and.(k<=jmax)) then
row(k) = trw%aspk(ktrw)
rowlevs(k) = 0
call psb_insert_heap(k,heap,info)
end if
ktrw = ktrw + 1
enddo
end if
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine iluk_copyin
subroutine iluk_fact(fill_in,i,m,row,rowlevs,heap,d,uia1,uia2,uaspk,uplevs,nidx,idxs)
use psb_base_mod
implicit none
type(psb_dspmat_type) :: a
type(psb_int_heap) :: heap
integer :: i,m, rowlevs(:),minj,maxj,fill_in,nidx
integer, allocatable :: idxs(:)
integer :: uia1(:),uia2(:),uplevs(:)
real(kind(1.d0)) :: row(:), uaspk(:),d(:)
integer :: k,j,lrwk,jj,info, lastk
real(kind(1.d0)) :: rwk
! Do an elimination step on current row
! Turns out we only need to keep track of levels
! for the upper triangle.
if (.not.allocated(idxs)) then
allocate(idxs(200),stat=info)
endif
nidx = 0
lastk = -1
!
! Do while there are indices to be processed
!
do
call psb_heap_get_first(k,heap,info)
if (info < 0) exit
!
! An index may have been put on the heap more than once.
!
if (k == lastk) cycle
lastk = k
nidx = nidx + 1
if (nidx>size(idxs)) then
call psb_realloc(nidx+psb_heap_resize,idxs,info)
end if
idxs(nidx) = k
if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
!
! Note: since U is scaled while copying out, we can use rwk
! in the update below
!
rwk = row(k)
row(k) = row(k) * d(k) ! d(k) == 1/a(k,k)
lrwk = rowlevs(k)
do jj=uia2(k),uia2(k+1)-1
j = uia1(jj)
if (j<=k) then
write(0,*) 'Error in accessing upper mat???',j,k,jj
endif
!
! Insert the index for further processing.
! Is there a sensible way to prune the insertion?
!
call psb_insert_heap(j,heap,info)
row(j) = row(j) - rwk * uaspk(jj)
rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
end do
end if
end do
end subroutine iluk_fact
subroutine iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
& l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs)
use psb_base_mod
implicit none
integer :: fill_in,ialg,i, rowlevs(:),minj,maxj,l1,l2,m,nidx,idxs(:)
integer, allocatable :: uia1(:),uia2(:), lia1(:),lia2(:),uplevs(:)
real(kind(1.d0)),allocatable :: uaspk(:), laspk(:)
real(kind(1.d0)) :: row(:), d(:)
integer :: k,j,isz,info,err_act,int_err(5),idxp
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Copy the row into the lower/diag/upper structures.
!
! Lower part
d(i) = dzero
do idxp=1,nidx
j = idxs(idxp)
if (j<i) then
if (rowlevs(j) <= fill_in) then
l1 = l1 + 1
if (size(laspk) < l1) then
! Figure out a good reallocation size!!
isz = (max((l1/i)*m,int(1.2*l1),l1+100))
call psb_realloc(isz,laspk,info)
if (info == 0) call psb_realloc(isz,lia1,info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
end if
lia1(l1) = j
laspk(l1) = row(j)
else if (ialg == mld_milu_n_) then
d(i) = d(i) + row(j)
end if
row(j) = dzero
rowlevs(j) = m+1
else if (j==i) then
d(i) = d(i) + row(i)
row(i) = dzero
rowlevs(i) = m+1
else if (j>i) then
! Upper part
if (rowlevs(j) <= fill_in) then
l2 = l2 + 1
if (size(uaspk) < l2) then
! Figure out a good reallocation size!!
isz = max((l2/i)*m,int(1.2*l2),l2+100)
call psb_realloc(isz,uaspk,info)
if (info == 0) call psb_realloc(isz,uia1,info)
if (info == 0) call psb_realloc(isz,uplevs,info,pad=(m+1))
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
end if
uia1(l2) = j
uaspk(l2) = row(j)
uplevs(l2) = rowlevs(j)
else if (ialg == mld_milu_n_) then
d(i) = d(i) + row(j)
end if
row(j) = dzero
rowlevs(j) = m+1
end if
end do
lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1
if (abs(d(i)) < epstol) then
!
! Pivot too small: unstable factorization
!
info = 2
int_err(1) = i
write(ch_err,'(g20.10)') d(i)
call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
goto 9999
else
d(i) = done/d(i)
end if
! Now scale upper part
do j=uia2(i), uia2(i+1)-1
uaspk(j) = d(i)*uaspk(j)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
end subroutine iluk_copyout
end subroutine mld_diluk_fct

@ -84,7 +84,7 @@ subroutine mld_dmlprec_bld(a,desc_a,p,info)
case(mld_ilu_n_) case(mld_ilu_n_)
call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev) call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev)
case(mld_ilu_t_) case(mld_ilu_t_)
call mld_check_def(p%dprcparm(mld_fact_eps_),'Eps',dzero,is_legal_ml_eps) call mld_check_def(p%dprcparm(mld_fact_thrs_),'Eps',dzero,is_legal_fact_thrs)
end select end select
call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega) call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega)
call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',& call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',&

@ -57,7 +57,7 @@ subroutine mld_dprecinit(p,ptype,info,nlev)
endif endif
select case(toupper(ptype(1:len_trim(ptype)))) select case(toupper(ptype(1:len_trim(ptype))))
case ('NOPREC') case ('NONE','NOPREC')
nlev_ = 1 nlev_ = 1
ilev_ = 1 ilev_ = 1
allocate(p%baseprecv(nlev_),stat=info) allocate(p%baseprecv(nlev_),stat=info)

@ -206,13 +206,15 @@ subroutine mld_dprecsetd(p,what,val,info,ilev)
select case(what) select case(what)
! Right now we don't have any at base level. Will change when ! Right now we don't have any at base level. Will change when
! we implement mld_ilu_t_ ! we implement mld_ilu_t_
case(mld_fact_thrs_)
p%baseprecv(ilev_)%dprcparm(what) = val
case default case default
write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' write(0,*) 'Error: trying to call PRECSET with an invalid WHAT'
info = -2 info = -2
end select end select
else if (ilev_ > 1) then else if (ilev_ > 1) then
select case(what) select case(what)
case(mld_aggr_damp_) case(mld_aggr_damp_,mld_fact_thrs_)
p%baseprecv(ilev_)%dprcparm(what) = val p%baseprecv(ilev_)%dprcparm(what) = val
case default case default
write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' write(0,*) 'Error: trying to call PRECSET with an invalid WHAT'

@ -440,6 +440,27 @@ module mld_prec_mod
end subroutine mld_zilu_fct end subroutine mld_zilu_fct
end interface end interface
interface mld_iluk_fct
subroutine mld_diluk_fct(fill_in,ialg,a,l,u,d,info,blck)
use psb_base_mod
integer, intent(in) :: fill_in,ialg
integer, intent(out) :: info
type(psb_dspmat_type),intent(in) :: a
type(psb_dspmat_type),intent(inout) :: l,u
type(psb_dspmat_type),intent(in), optional, target :: blck
real(kind(1.d0)), intent(inout) :: d(:)
end subroutine mld_diluk_fct
subroutine mld_ziluk_fct(fill_in,ialg,a,l,u,d,info,blck)
use psb_base_mod
integer, intent(in) :: fill_in,ialg
integer, intent(out) :: info
type(psb_zspmat_type),intent(in) :: a
type(psb_zspmat_type),intent(inout) :: l,u
type(psb_zspmat_type),intent(in), optional, target :: blck
complex(kind(1.d0)), intent(inout) :: d(:)
end subroutine mld_ziluk_fct
end interface
interface mld_asmat_bld interface mld_asmat_bld
Subroutine mld_dasmat_bld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) Subroutine mld_dasmat_bld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt)
use psb_base_mod use psb_base_mod

@ -164,8 +164,8 @@ module mld_prec_type
integer, parameter :: mld_no_ml_=0, mld_add_ml_=1, mld_mult_ml_=2 integer, parameter :: mld_no_ml_=0, mld_add_ml_=1, mld_mult_ml_=2
integer, parameter :: mld_new_ml_prec_=3, mld_max_ml_=mld_new_ml_prec_ integer, parameter :: mld_new_ml_prec_=3, mld_max_ml_=mld_new_ml_prec_
! Legal values for entry: mld_smooth_pos_ ! Legal values for entry: mld_smooth_pos_
integer, parameter :: mld_pre_smooth_=1, mld_post_smooth_=2, mld_twoside_smooth_=3,& integer, parameter :: mld_pre_smooth_=1, mld_post_smooth_=2,&
& mld_max_smooth_=mld_twoside_smooth_ & mld_twoside_smooth_=3, mld_max_smooth_=mld_twoside_smooth_
! Legal values for entry: mld_sub_solve_ ! Legal values for entry: mld_sub_solve_
integer, parameter :: mld_f_none_=0,mld_ilu_n_=1,mld_milu_n_=2, mld_ilu_t_=3 integer, parameter :: mld_f_none_=0,mld_ilu_n_=1,mld_milu_n_=2, mld_ilu_t_=3
integer, parameter :: mld_slu_=4, mld_umf_=5, mld_sludist_=6 integer, parameter :: mld_slu_=4, mld_umf_=5, mld_sludist_=6
@ -185,7 +185,7 @@ module mld_prec_type
integer, parameter :: mld_renum_none_=0, mld_renum_glb_=1, mld_renum_gps_=2 integer, parameter :: mld_renum_none_=0, mld_renum_glb_=1, mld_renum_gps_=2
! Entries in dprcparm: ILU(T) epsilon, smoother omega ! Entries in dprcparm: ILU(T) epsilon, smoother omega
integer, parameter :: mld_fact_eps_=1 integer, parameter :: mld_fact_thrs_=1
integer, parameter :: mld_aggr_damp_=2 integer, parameter :: mld_aggr_damp_=2
integer, parameter :: mld_aggr_thresh_=3 integer, parameter :: mld_aggr_thresh_=3
integer, parameter :: mld_dfpsz_=4 integer, parameter :: mld_dfpsz_=4
@ -379,24 +379,43 @@ contains
write(iout,*) 'Preconditioner description' write(iout,*) 'Preconditioner description'
if (allocated(p%baseprecv)) then if (allocated(p%baseprecv)) then
if (size(p%baseprecv)>=1) then if (size(p%baseprecv)>=1) then
ilev = 1
write(iout,*) 'Base preconditioner' write(iout,*) 'Base preconditioner'
select case(p%baseprecv(1)%iprcparm(mld_prec_type_)) select case(p%baseprecv(ilev)%iprcparm(mld_prec_type_))
case(mld_noprec_) case(mld_noprec_)
write(iout,*) 'No preconditioning' write(iout,*) 'No preconditioning'
case(mld_diag_) case(mld_diag_)
write(iout,*) 'Diagonal scaling' write(iout,*) 'Diagonal scaling'
case(mld_bjac_) case(mld_bjac_)
write(iout,*) 'Block Jacobi with: ',& write(iout,*) 'Block Jacobi with: ',&
& fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_)
write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_)
case default
write(iout,*) 'Should never get here!'
end select
case(mld_as_) case(mld_as_)
write(iout,*) 'Additive Schwarz with: ',& write(iout,*) 'Additive Schwarz with: ',&
& fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_)
write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_)
case default
write(iout,*) 'Should never get here!'
end select
write(iout,*) 'Overlap:',& write(iout,*) 'Overlap:',&
& p%baseprecv(1)%iprcparm(mld_n_ovr_) & p%baseprecv(ilev)%iprcparm(mld_n_ovr_)
write(iout,*) 'Restriction: ',& write(iout,*) 'Restriction: ',&
& restrict_names(p%baseprecv(1)%iprcparm(mld_sub_restr_)) & restrict_names(p%baseprecv(ilev)%iprcparm(mld_sub_restr_))
write(iout,*) 'Prolongation: ',& write(iout,*) 'Prolongation: ',&
& prolong_names(p%baseprecv(1)%iprcparm(mld_sub_prol_)) & prolong_names(p%baseprecv(ilev)%iprcparm(mld_sub_prol_))
end select end select
end if end if
if (size(p%baseprecv)>=2) then if (size(p%baseprecv)>=2) then
@ -430,9 +449,9 @@ contains
& fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_) case(mld_ilu_n_)
write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_) case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_eps_) write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_) case(mld_slu_,mld_umf_,mld_sludist_)
case default case default
write(iout,*) 'Should never get here!' write(iout,*) 'Should never get here!'
@ -498,9 +517,9 @@ contains
!!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_))
!!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_))
!!$ case(mld_ilu_n_) !!$ case(mld_ilu_n_)
!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) !!$ write(iout,*) 'Fill level:',p%baseprecv(2)%iprcparm(mld_sub_fill_in_)
!!$ case(mld_ilu_t_) !!$ case(mld_ilu_t_)
!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_eps_) !!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_thrs_)
!!$ case(mld_slu_,mld_umf_,mld_sludist_) !!$ case(mld_slu_,mld_umf_,mld_sludist_)
!!$ case default !!$ case default
!!$ write(iout,*) 'Should never get here!' !!$ write(iout,*) 'Should never get here!'
@ -530,23 +549,42 @@ contains
if (allocated(p%baseprecv)) then if (allocated(p%baseprecv)) then
if (size(p%baseprecv)>=1) then if (size(p%baseprecv)>=1) then
write(iout,*) 'Base preconditioner' write(iout,*) 'Base preconditioner'
select case(p%baseprecv(1)%iprcparm(mld_prec_type_)) ilev=1
select case(p%baseprecv(ilev)%iprcparm(mld_prec_type_))
case(mld_noprec_) case(mld_noprec_)
write(iout,*) 'No preconditioning' write(iout,*) 'No preconditioning'
case(mld_diag_) case(mld_diag_)
write(iout,*) 'Diagonal scaling' write(iout,*) 'Diagonal scaling'
case(mld_bjac_) case(mld_bjac_)
write(iout,*) 'Block Jacobi with: ',& write(iout,*) 'Block Jacobi with: ',&
& fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_)
write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_)
case default
write(iout,*) 'Should never get here!'
end select
case(mld_as_) case(mld_as_)
write(iout,*) 'Additive Schwarz with: ',& write(iout,*) 'Additive Schwarz with: ',&
& fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_)
write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_)
case default
write(iout,*) 'Should never get here!'
end select
write(iout,*) 'Overlap:',& write(iout,*) 'Overlap:',&
& p%baseprecv(1)%iprcparm(mld_n_ovr_) & p%baseprecv(ilev)%iprcparm(mld_n_ovr_)
write(iout,*) 'Restriction: ',& write(iout,*) 'Restriction: ',&
& restrict_names(p%baseprecv(1)%iprcparm(mld_sub_restr_)) & restrict_names(p%baseprecv(ilev)%iprcparm(mld_sub_restr_))
write(iout,*) 'Prolongation: ',& write(iout,*) 'Prolongation: ',&
& prolong_names(p%baseprecv(1)%iprcparm(mld_sub_prol_)) & prolong_names(p%baseprecv(ilev)%iprcparm(mld_sub_prol_))
end select end select
end if end if
if (size(p%baseprecv)>=2) then if (size(p%baseprecv)>=2) then
@ -580,9 +618,9 @@ contains
& fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_))
case(mld_ilu_n_) case(mld_ilu_n_)
write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_)
case(mld_ilu_t_) case(mld_ilu_t_)
write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_eps_) write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_)
case(mld_slu_,mld_umf_,mld_sludist_) case(mld_slu_,mld_umf_,mld_sludist_)
case default case default
write(iout,*) 'Should never get here!' write(iout,*) 'Should never get here!'
@ -648,9 +686,9 @@ contains
!!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_))
!!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_))
!!$ case(mld_ilu_n_) !!$ case(mld_ilu_n_)
!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) !!$ write(iout,*) 'Fill level:',p%baseprecv(2)%iprcparm(mld_sub_fill_in_)
!!$ case(mld_ilu_t_) !!$ case(mld_ilu_t_)
!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_eps_) !!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_thrs_)
!!$ case(mld_slu_,mld_umf_,mld_sludist_) !!$ case(mld_slu_,mld_umf_,mld_sludist_)
!!$ case default !!$ case default
!!$ write(iout,*) 'Should never get here!' !!$ write(iout,*) 'Should never get here!'
@ -781,14 +819,14 @@ contains
is_legal_omega = ((ip>=0.0d0).and.(ip<=2.0d0)) is_legal_omega = ((ip>=0.0d0).and.(ip<=2.0d0))
return return
end function is_legal_omega end function is_legal_omega
function is_legal_ml_eps(ip) function is_legal_fact_thrs(ip)
use psb_base_mod use psb_base_mod
real(kind(1.d0)), intent(in) :: ip real(kind(1.d0)), intent(in) :: ip
logical :: is_legal_ml_eps logical :: is_legal_fact_thrs
is_legal_ml_eps = (ip>=0.0d0) is_legal_fact_thrs = (ip>=0.0d0)
return return
end function is_legal_ml_eps end function is_legal_fact_thrs
subroutine mld_icheck_def(ip,name,id,is_legal) subroutine mld_icheck_def(ip,name,id,is_legal)

@ -144,12 +144,37 @@ subroutine mld_zilu_bld(a,desc_a,p,upd,info,blck)
endif endif
select case(p%iprcparm(mld_sub_solve_))
case(mld_ilu_n_,mld_milu_n_,mld_ilu_t_)
!
! Need to decide what to do with ILU(T).
! Should we pack the functionality together with ILU(K)?
! Maybe for mental sanity, a separate routine would be better.
!
! Decide what to do with MILU(K)
! !
! Ok, factor the matrix. ! Ok, factor the matrix.
! !
t5 = psb_wtime() t5 = psb_wtime()
select case(p%iprcparm(mld_sub_fill_in_))
case(:-1)
! This is an error.
call psb_errpush(30,name,i_err=(/3,p%iprcparm(mld_sub_fill_in_),0,0,0/))
goto 9999
case(0)
call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),&
& p%d,info,blck=blck) & p%d,info,blck=blck)
case(1:)
call mld_iluk_fct(p%iprcparm(mld_sub_fill_in_),p%iprcparm(mld_sub_solve_),&
& a,p%av(mld_l_pr_),p%av(mld_u_pr_),p%d,info,blck=blck)
end select
case default
! If we end up here, something was wrong up in the call chain.
call psb_errpush(4000,name)
goto 9999
end select
if(info/=0) then if(info/=0) then
info=4010 info=4010
ch_err='mld_ilu_fct' ch_err='mld_ilu_fct'

@ -164,72 +164,22 @@ contains
if(debug) write(0,*)'LUINT Done Allocating TRW' if(debug) write(0,*)'LUINT Done Allocating TRW'
lia2(1) = 1 lia2(1) = 1
uia2(1) = 1 uia2(1) = 1
l1=0 l1 = 0
l2=0 l2 = 0
m = ma+mb m = ma+mb
if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb
do i = 1, ma do i = 1, m
if(debug) write(0,*)'LUINT: Loop index ',i,ma if(debug) write(0,*)'LUINT: Loop index ',i,ma
d(i) = zzero d(i) = zzero
! if (i <= ma) then
! Here we take a fast shortcut if possible, otherwise call ilu_copyin(i,ma,a,i,m,l1,lia1,lia2,laspk,&
! use spgtblk, slower but able (in principle) to handle & d(i),l2,uia1,uia2,uaspk,ktrw,trw)
! anything.
!
if (toupper(a%fida)=='CSR') then
do j = a%ia2(i), a%ia2(i+1) - 1
k = a%ia1(j)
! write(0,*)'KKKKK',k
if ((k < i).and.(k >= 1)) then
l1 = l1 + 1
laspk(l1) = a%aspk(j)
lia1(l1) = k
else if (k == i) then
d(i) = a%aspk(j)
else if ((k > i).and.(k <= m)) then
l2 = l2 + 1
uaspk(l2) = a%aspk(j)
uia1(l2) = k
end if
enddo
else else
call ilu_copyin(i-ma,mb,b,i,m,l1,lia1,lia2,laspk,&
if ((mod(i,nrb) == 1).or.(nrb==1)) then & d(i),l2,uia1,uia2,uaspk,ktrw,trw)
irb = min(ma-i+1,nrb) endif
call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if(info.ne.0) then
info=4010
ch_err='psb_sp_getblk'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
ktrw=1
end if
do
if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw)
if ((k < i).and.(k >= 1)) then
l1 = l1 + 1
laspk(l1) = trw%aspk(ktrw)
lia1(l1) = k
else if (k == i) then
d(i) = trw%aspk(ktrw)
else if ((k > i).and.(k <= m)) then
l2 = l2 + 1
uaspk(l2) = trw%aspk(ktrw)
uia1(l2) = k
end if
ktrw = ktrw + 1
enddo
end if
!!$
lia2(i+1) = l1 + 1 lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1 uia2(i+1) = l2 + 1
@ -317,32 +267,74 @@ contains
enddo enddo
enddo enddo
do i = ma+1, m call psb_sp_free(trw,info)
d(i) = zzero if(info.ne.0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine mld_zilu_fctint
subroutine ilu_copyin(i,m,a,jd,jmax,l1,lia1,lia2,laspk,&
& dia,l2,uia1,uia2,uaspk,ktrw,trw)
use psb_base_mod
implicit none
type(psb_zspmat_type) :: a,trw
integer :: i, m,ktrw,jd,jmax,l1,l2
integer :: lia1(:),lia2(:),uia1(:),uia2(:)
complex(kind(1.d0)) :: laspk(:), uaspk(:), dia
type(psb_int_heap) :: heap
integer :: k,j,info,irb
integer, parameter :: nrb=16
character(len=20), parameter :: name='mld_dilu_fctint'
character(len=20) :: ch_err
if (toupper(b%fida)=='CSR') then if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Here we take a fast shortcut if possible, otherwise
! use spgtblk, slower but able (in principle) of
! handling anything.
!
do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 if (toupper(a%fida)=='CSR') then
k = b%ia1(j) do j = a%ia2(i), a%ia2(i+1) - 1
if ((k < i).and.(k >= 1)) then k = a%ia1(j)
! write(0,*)'KKKKK',k
if ((k < jd).and.(k >= 1)) then
l1 = l1 + 1 l1 = l1 + 1
laspk(l1) = b%aspk(j) laspk(l1) = a%aspk(j)
lia1(l1) = k lia1(l1) = k
else if (k == i) then else if (k == jd) then
d(i) = b%aspk(j) dia = a%aspk(j)
else if ((k > i).and.(k <= m)) then else if ((k > jd).and.(k <= jmax)) then
l2 = l2 + 1 l2 = l2 + 1
uaspk(l2) = b%aspk(j) uaspk(l2) = a%aspk(j)
uia1(l2) = k uia1(l2) = k
end if end if
enddo enddo
else else
if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then if ((mod(i,nrb) == 1).or.(nrb==1)) then
irb = min(m-i+1,nrb) irb = min(m-i+1,nrb)
call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if(info.ne.0) then if(info.ne.0) then
info=4010 info=4010
ch_err='psb_sp_getblk' ch_err='psb_sp_getblk'
@ -356,13 +348,13 @@ contains
if (ktrw > trw%infoa(psb_nnz_)) exit if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw) k = trw%ia2(ktrw)
if ((k < i).and.(k >= 1)) then if ((k < jd).and.(k >= 1)) then
l1 = l1 + 1 l1 = l1 + 1
laspk(l1) = trw%aspk(ktrw) laspk(l1) = trw%aspk(ktrw)
lia1(l1) = k lia1(l1) = k
else if (k == i) then else if (k == jd) then
d(i) = trw%aspk(ktrw) dia = trw%aspk(ktrw)
else if ((k > i).and.(k <= m)) then else if ((k > jd).and.(k <= jmax)) then
l2 = l2 + 1 l2 = l2 + 1
uaspk(l2) = trw%aspk(ktrw) uaspk(l2) = trw%aspk(ktrw)
uia1(l2) = k uia1(l2) = k
@ -370,100 +362,7 @@ contains
ktrw = ktrw + 1 ktrw = ktrw + 1
enddo enddo
endif
lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1
dia = d(i)
do kk = lia2(i), lia2(i+1) - 1
!
! compute element alo(i,k) of incomplete factorization
!
temp = laspk(kk)
k = lia1(kk)
laspk(kk) = temp*d(k)
! update the rest of row i using alo(i,k)
low1 = kk + 1
low2 = uia2(i)
updateloopb: do jj = uia2(k), uia2(k+1) - 1
j = uia1(jj)
!
if (j < i) then
! search alo(i,*) for matching index J
do ll = low1, lia2(i+1) - 1
l = lia1(ll)
if (l > j) then
low1 = ll
exit
else if (l == j) then
laspk(ll) = laspk(ll) - temp*uaspk(jj)
low1 = ll + 1
cycle updateloopb
end if
enddo
!
else if (j == i) then
! j=i update diagonal
dia = dia - temp*uaspk(jj)
cycle updateloopb
!
else if (j > i) then
! search aup(i,*) for matching index j
do ll = low2, uia2(i+1) - 1
l = uia1(ll)
if (l > j) then
low2 = ll
exit
else if (l == j) then
uaspk(ll) = uaspk(ll) - temp*uaspk(jj)
low2 = ll + 1
cycle updateloopb
end if end if
enddo
end if
!
! If we get here we missed the cycle updateloop,
! which means that this entry does not match; thus
! we take it out of diagonal for MILU.
!
if (ialg == mld_milu_n_) then
dia = dia - temp*uaspk(jj)
end if
enddo updateloopb
enddo
!
!
! Non singularity
!
if (abs(dia) < epstol) then
!
! Pivot too small: unstable factorization
!
int_err(1) = i
write(ch_err,'(g20.10)') abs(dia)
info = 2
call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
goto 9999
else
dia = zone/dia
end if
d(i) = dia
! Scale row i of upper triangle
do kk = uia2(i), uia2(i+1) - 1
uaspk(kk) = uaspk(kk)*dia
enddo
enddo
call psb_sp_free(trw,info)
if(info.ne.0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if(debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act) call psb_erractionrestore(err_act)
return return
@ -475,5 +374,6 @@ contains
return return
end if end if
return return
end subroutine mld_zilu_fctint end subroutine ilu_copyin
end subroutine mld_zilu_fct end subroutine mld_zilu_fct

@ -0,0 +1,500 @@
!!$
!!$
!!$ MD2P4
!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS
!!$ for
!!$ Parallel Sparse BLAS v2.0
!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata
!!$ Alfredo Buttari University of Rome Tor Vergata
!!$ Daniela di Serafino Second University of Naples
!!$ Pasqua D'Ambra ICAR-CNR
!!$
!!$ Redistribution and use in source and binary forms, with or without
!!$ modification, are permitted provided that the following conditions
!!$ are met:
!!$ 1. Redistributions of source code must retain the above copyright
!!$ notice, this list of conditions and the following disclaimer.
!!$ 2. Redistributions in binary form must reproduce the above copyright
!!$ notice, this list of conditions, and the following disclaimer in the
!!$ documentation and/or other materials provided with the distribution.
!!$ 3. The name of the MD2P4 group or the names of its contributors may
!!$ not be used to endorse or promote products derived from this
!!$ software without specific written permission.
!!$
!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS
!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
!!$ POSSIBILITY OF SUCH DAMAGE.
!!$
!!$
subroutine mld_ziluk_fct(fill_in,ialg,a,l,u,d,info,blck)
!
! This routine copies and factors "on the fly" from A and BLCK
! into L/D/U.
!
!
use psb_base_mod
use mld_prec_mod, mld_protect_name => mld_diluk_fct
implicit none
! .. Scalar Arguments ..
integer, intent(in) :: fill_in, ialg
integer, intent(out) :: info
! .. Array Arguments ..
type(psb_zspmat_type),intent(in) :: a
type(psb_zspmat_type),intent(inout) :: l,u
type(psb_zspmat_type),intent(in), optional, target :: blck
complex(kind(1.d0)), intent(inout) :: d(:)
! .. Local Scalars ..
complex(kind(1.d0)) :: dia, temp
integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act
type(psb_zspmat_type), pointer :: blck_
character(len=20) :: name, ch_err
logical, parameter :: debug=.false.
name='mld_diluk_fct'
info = 0
call psb_erractionsave(err_act)
! .. Executable Statements ..
!
if (debug) write(0,*) 'mld_diluk_fct: start'
if (present(blck)) then
blck_ => blck
else
allocate(blck_,stat=info)
if (info /= 0) then
call psb_errpush(4010,name,a_err='Allocate')
goto 9999
end if
call psb_sp_all(0,0,blck_,1,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_all'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
endif
if (debug) write(0,*) 'mld_diluk_fct: calling fctint'
call mld_ziluk_fctint(fill_in,ialg,m,a%m,a,blck_%m,blck_,&
& d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info)
if (info /= 0) then
info=4010
ch_err='mld_diluk_fctint'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
l%infoa(1) = l1
l%fida = 'CSR'
l%descra = 'TLU'
u%infoa(1) = l2
u%fida = 'CSR'
u%descra = 'TUU'
l%m = m
l%k = m
u%m = m
u%k = m
if (present(blck)) then
blck_ => null()
else
call psb_sp_free(blck_,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
deallocate(blck_)
endif
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
contains
subroutine mld_ziluk_fctint(fill_in,ialg,m,ma,a,mb,b,&
& d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info)
use psb_base_mod
implicit none
integer, intent(in) :: fill_in, ialg
type(psb_zspmat_type) :: a,b
integer :: m,ma,mb,l1,l2,info
integer, dimension(:), allocatable :: lia1,lia2,uia1,uia2
complex(kind(1.d0)), dimension(:), allocatable :: laspk,uaspk
complex(kind(1.d0)), dimension(:) :: d
integer :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, &
& isz,minj,maxj,lrwk,nidx
complex(kind(1.d0)) :: dia,temp,rwk
integer, allocatable :: uplevs(:), rowlevs(:),idxs(:)
complex(kind(1.d0)), allocatable :: row(:)
type(psb_int_heap) :: heap
logical,parameter :: debug=.false.
type(psb_zspmat_type) :: trw
integer :: int_err(5)
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
m = ma+mb
!
! Temp buffer for copyin function.
!
if (debug) write(0,*)'LUINT Allocating TRW'
call psb_sp_all(0,0,trw,1,info)
if (info==0) call psb_ensure_size(m+1,lia2,info)
if (info==0) call psb_ensure_size(m+1,uia2,info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='psb_sp_all')
goto 9999
end if
if (debug) write(0,*)'LUINT Done Allocating TRW'
l1=0
l2=0
lia2(1) = 1
uia2(1) = 1
if (debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb
! Initial allocation
allocate(uplevs(size(uaspk)),rowlevs(m),row(m),stat=info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
uplevs(:) = m+1
row(:) = zzero
rowlevs(:) = m+1
do i = 1, m
if (debug.and.(mod(i,500)==1)) write(0,*)'LUINT: Loop index ',i,ma,minj,maxj
if (debug) write(0,*)'LUINT: Loop index ',i,ma
!
! At each iteration of the loop we keep the indices affected in a heap
! initialized and filled in the copyin function, and updated during
! the elimination. The heap is ideal because at each step we need the
! lowest index, but we also need to insert new items, and the heap allows
! to do both in log time.
!
d(i) = zzero
if (i<=ma) then
call iluk_copyin(i,ma,a,m,row,rowlevs,heap,ktrw,trw)
else
call iluk_copyin(i-ma,mb,b,m,row,rowlevs,heap,ktrw,trw)
endif
if (debug) write(0,*)'LUINT: input Copy done',minj,maxj
! Do an elimination step on current row
! Turns out we only need to keep track of levels
! for the upper triangle, hence no uplevs variable.
!
call iluk_fact(fill_in,i,m,row,rowlevs,heap,&
& d,uia1,uia2,uaspk,uplevs,nidx,idxs)
!
! Copy the row into the lower/diag/upper structures.
!
call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
& l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs)
end do
!
! And we're done......hopefully :-)
!
deallocate(uplevs,rowlevs,row,stat=info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Deallocate')
goto 9999
end if
if (info == 0) call psb_sp_free(trw,info)
if (info /= 0) then
info=4010
ch_err='psb_sp_free'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
if (debug) write(0,*)'Leaving ilu_fct'
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine mld_ziluk_fctint
subroutine iluk_copyin(i,m,a,jmax,row,rowlevs,heap,ktrw,trw)
use psb_base_mod
implicit none
type(psb_zspmat_type) :: a,trw
integer :: i, rowlevs(:),m,ktrw,jmax
complex(kind(1.d0)) :: row(:)
type(psb_int_heap) :: heap
integer :: k,j,info,irb
integer, parameter :: nrb=16
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Here we take a fast shortcut if possible, otherwise
! use spgtblk, slower but able (in principle) to handle
! anything.
!
if (toupper(a%fida)=='CSR') then
call psb_init_heap(heap,info)
do j = a%ia2(i), a%ia2(i+1) - 1
k = a%ia1(j)
if ((1<=k).and.(k<=jmax)) then
row(k) = a%aspk(j)
rowlevs(k) = 0
call psb_insert_heap(k,heap,info)
end if
end do
else
if ((mod(i,nrb) == 1).or.(nrb==1)) then
irb = min(m-i+1,nrb)
call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1)
if (info /= 0) then
info=4010
ch_err='psb_sp_getblk'
call psb_errpush(info,name,a_err=ch_err)
goto 9999
end if
ktrw=1
end if
do
if (ktrw > trw%infoa(psb_nnz_)) exit
if (trw%ia1(ktrw) > i) exit
k = trw%ia2(ktrw)
if ((1<=k).and.(k<=jmax)) then
row(k) = trw%aspk(ktrw)
rowlevs(k) = 0
call psb_insert_heap(k,heap,info)
end if
ktrw = ktrw + 1
enddo
end if
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
return
end subroutine iluk_copyin
subroutine iluk_fact(fill_in,i,m,row,rowlevs,heap,d,uia1,uia2,uaspk,uplevs,nidx,idxs)
use psb_base_mod
implicit none
type(psb_zspmat_type) :: a
type(psb_int_heap) :: heap
integer :: i,m, rowlevs(:),minj,maxj,fill_in,nidx
integer, allocatable :: idxs(:)
integer :: uia1(:),uia2(:),uplevs(:)
complex(kind(1.d0)) :: row(:), uaspk(:),d(:)
integer :: k,j,lrwk,jj,info, lastk
complex(kind(1.d0)) :: rwk
! Do an elimination step on current row
! Turns out we only need to keep track of levels
! for the upper triangle.
if (.not.allocated(idxs)) then
allocate(idxs(200),stat=info)
endif
nidx = 0
lastk = -1
!
! Do while there are indices to be processed
!
do
call psb_heap_get_first(k,heap,info)
if (info < 0) exit
!
! An index may have been put on the heap more than once.
!
if (k == lastk) cycle
lastk = k
nidx = nidx + 1
if (nidx>size(idxs)) then
call psb_realloc(nidx+psb_heap_resize,idxs,info)
end if
idxs(nidx) = k
if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in).and.(k<i)) then
!
! Note: since U is scaled while copying out, we can use rwk
! in the update below
!
rwk = row(k)
row(k) = row(k) * d(k) ! d(k) == 1/a(k,k)
lrwk = rowlevs(k)
do jj=uia2(k),uia2(k+1)-1
j = uia1(jj)
if (j<=k) then
write(0,*) 'Error in accessing upper mat???',j,k,jj
endif
!
! Insert the index for further processing.
! Is there a sensible way to prune the insertion?
!
call psb_insert_heap(j,heap,info)
row(j) = row(j) - rwk * uaspk(jj)
rowlevs(j) = min(rowlevs(j),lrwk+uplevs(jj)+1)
end do
end if
end do
end subroutine iluk_fact
subroutine iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,&
& l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs)
use psb_base_mod
implicit none
integer :: fill_in,ialg,i, rowlevs(:),minj,maxj,l1,l2,m,nidx,idxs(:)
integer, allocatable :: uia1(:),uia2(:), lia1(:),lia2(:),uplevs(:)
complex(kind(1.d0)),allocatable :: uaspk(:), laspk(:)
complex(kind(1.d0)) :: row(:), d(:)
integer :: k,j,isz,info,err_act,int_err(5),idxp
character(len=20), parameter :: name='mld_diluk_fctint'
character(len=20) :: ch_err
if (psb_get_errstatus() /= 0) return
info=0
call psb_erractionsave(err_act)
!
! Copy the row into the lower/diag/upper structures.
!
! Lower part
d(i) = zzero
do idxp=1,nidx
j = idxs(idxp)
if (j<i) then
if (rowlevs(j) <= fill_in) then
l1 = l1 + 1
if (size(laspk) < l1) then
! Figure out a good reallocation size!!
isz = (max((l1/i)*m,int(1.2*l1),l1+100))
call psb_realloc(isz,laspk,info)
if (info == 0) call psb_realloc(isz,lia1,info)
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
end if
lia1(l1) = j
laspk(l1) = row(j)
else if (ialg == mld_milu_n_) then
d(i) = d(i) + row(j)
end if
row(j) = zzero
rowlevs(j) = m+1
else if (j==i) then
d(i) = d(i) + row(i)
row(i) = zzero
rowlevs(i) = m+1
else if (j>i) then
! Upper part
if (rowlevs(j) <= fill_in) then
l2 = l2 + 1
if (size(uaspk) < l2) then
! Figure out a good reallocation size!!
isz = max((l2/i)*m,int(1.2*l2),l2+100)
call psb_realloc(isz,uaspk,info)
if (info == 0) call psb_realloc(isz,uia1,info)
if (info == 0) call psb_realloc(isz,uplevs,info,pad=(m+1))
if (info /= 0) then
info=4010
call psb_errpush(info,name,a_err='Allocate')
goto 9999
end if
end if
uia1(l2) = j
uaspk(l2) = row(j)
uplevs(l2) = rowlevs(j)
else if (ialg == mld_milu_n_) then
d(i) = d(i) + row(j)
end if
row(j) = zzero
rowlevs(j) = m+1
end if
end do
lia2(i+1) = l1 + 1
uia2(i+1) = l2 + 1
if (abs(d(i)) < epstol) then
!
! Pivot too small: unstable factorization
!
info = 2
int_err(1) = i
write(ch_err,'(g20.10)') d(i)
call psb_errpush(info,name,i_err=int_err,a_err=ch_err)
goto 9999
else
d(i) = zone/d(i)
end if
! Now scale upper part
do j=uia2(i), uia2(i+1)-1
uaspk(j) = d(i)*uaspk(j)
end do
call psb_erractionrestore(err_act)
return
9999 continue
call psb_erractionrestore(err_act)
if (err_act.eq.psb_act_abort_) then
call psb_error()
return
end if
end subroutine iluk_copyout
end subroutine mld_ziluk_fct

@ -85,7 +85,7 @@ subroutine mld_zmlprec_bld(a,desc_a,p,info)
case(mld_ilu_n_) case(mld_ilu_n_)
call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev) call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev)
case(mld_ilu_t_) case(mld_ilu_t_)
call mld_check_def(p%dprcparm(mld_fact_eps_),'Eps',dzero,is_legal_ml_eps) call mld_check_def(p%dprcparm(mld_fact_thrs_),'Eps',dzero,is_legal_fact_thrs)
end select end select
call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega) call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega)
call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',& call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',&

@ -80,6 +80,7 @@ subroutine mld_zprecinit(p,ptype,info,nlev)
if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info)
if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info)
if (info /= 0) return if (info /= 0) return
p%baseprecv(ilev_)%iprcparm(:) = 0
p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_diag_ p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_diag_
p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_f_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_f_none_
p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_
@ -105,24 +106,25 @@ subroutine mld_zprecinit(p,ptype,info,nlev)
p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0
p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1
case ('ASM','AS') case ('AS')
nlev_ = 1 nlev_ = 1
ilev_ = 1 ilev_ = 1
allocate(p%baseprecv(nlev_),stat=info) allocate(p%baseprecv(nlev_),stat=info)
if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info)
if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info)
if (info /= 0) return if (info /= 0) return
p%baseprecv(ilev_)%iprcparm(:) = 0
p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_ p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_
p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_
p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_
p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_
p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0
p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 1 p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0
p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0
p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1
case ('MLD', 'ML') case ('ML')
if (present(nlev)) then if (present(nlev)) then
nlev_ = max(1,nlev) nlev_ = max(1,nlev)
@ -137,12 +139,13 @@ subroutine mld_zprecinit(p,ptype,info,nlev)
if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info)
if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info)
if (info /= 0) return if (info /= 0) return
p%baseprecv(ilev_)%iprcparm(:) = 0
p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_ p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_
p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_
p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_
p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_
p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0
p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 1 p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0
p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0
p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1
if (nlev_ == 1) return if (nlev_ == 1) return
@ -151,6 +154,7 @@ subroutine mld_zprecinit(p,ptype,info,nlev)
if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info)
if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info)
if (info /= 0) return if (info /= 0) return
p%baseprecv(ilev_)%iprcparm(:) = 0
p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_
p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_
p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_
@ -171,6 +175,7 @@ subroutine mld_zprecinit(p,ptype,info,nlev)
if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info)
if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info)
if (info /= 0) return if (info /= 0) return
p%baseprecv(ilev_)%iprcparm(:) = 0
p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_
p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_
p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_

@ -207,13 +207,15 @@ subroutine mld_zprecsetd(p,what,val,info,ilev)
select case(what) select case(what)
! Right now we don't have any at base level. Will change when ! Right now we don't have any at base level. Will change when
! we implement mld_ilu_t_ ! we implement mld_ilu_t_
case(mld_fact_thrs_)
p%baseprecv(ilev_)%dprcparm(what) = val
case default case default
write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' write(0,*) 'Error: trying to call PRECSET with an invalid WHAT'
info = -2 info = -2
end select end select
else if (ilev_ > 1) then else if (ilev_ > 1) then
select case(what) select case(what)
case(mld_aggr_damp_) case(mld_aggr_damp_,mld_fact_thrs_)
p%baseprecv(ilev_)%dprcparm(what) = val p%baseprecv(ilev_)%dprcparm(what) = val
case default case default
write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' write(0,*) 'Error: trying to call PRECSET with an invalid WHAT'

@ -22,6 +22,7 @@ all: df_bench zf_bench
df_bench.o zf_bench.o: getp.o df_bench.o zf_bench.o: getp.o
getp.o: precdata.o getp.o: precdata.o
df_bench: $(DFOBJS) df_bench: $(DFOBJS)
$(F90LINK) $(LINKOPT) $(DFOBJS) -o df_bench \ $(F90LINK) $(LINKOPT) $(DFOBJS) -o df_bench \
$(MLD_LIB) $(PSBLAS_LIB) $(LDLIBS) $(MLD_LIB) $(PSBLAS_LIB) $(LDLIBS)

@ -223,15 +223,19 @@ program df_bench
call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev)
call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev)
call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev)
call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill2, info,ilev=nlev)
call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr2, info,ilev=nlev)
call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev)
call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind,info,ilev=nlev) call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind, info,ilev=nlev)
else else
call mld_precinit(pre,precs(pp)%lv1,info) call mld_precinit(pre,precs(pp)%lv1,info)
end if end if
call mld_precset(pre,mld_n_ovr_, precs(pp)%novr,info ,ilev=1) call mld_precset(pre,mld_n_ovr_, precs(pp)%novr, info,ilev=1)
call mld_precset(pre,mld_sub_restr_, precs(pp)%restr,info ,ilev=1) call mld_precset(pre,mld_sub_restr_, precs(pp)%restr, info,ilev=1)
call mld_precset(pre,mld_sub_prol_, precs(pp)%prol,info ,ilev=1) call mld_precset(pre,mld_sub_prol_, precs(pp)%prol, info,ilev=1)
call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1,info ,ilev=1) call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1, info,ilev=1)
call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill1, info,ilev=1)
call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr1, info,ilev=1)
! setting initial guess to zero ! setting initial guess to zero
@ -293,7 +297,6 @@ program df_bench
& write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') & & write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') &
& mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,&
& trim(cmethd),trim(precs(pp)%descr) & trim(cmethd),trim(precs(pp)%descr)
call flush(0)
if (nt.lt.ntry) call mld_precfree(pre,info) if (nt.lt.ntry) call mld_precfree(pre,info)
if((t2+tprec).lt.mttot) then if((t2+tprec).lt.mttot) then
mtslv=t2 mtslv=t2
@ -386,8 +389,6 @@ program df_bench
call psb_gefree(b_col, desc_a,info) call psb_gefree(b_col, desc_a,info)
call psb_gefree(x_col, desc_a,info) call psb_gefree(x_col, desc_a,info)
call psb_spfree(a, desc_a,info) call psb_spfree(a, desc_a,info)
write(0,*) 'Final cdfree'
call flush(0)
call psb_cdfree(desc_a,info) call psb_cdfree(desc_a,info)
deallocate(r_col,stat=info) deallocate(r_col,stat=info)
deallocate(aux_b,stat=info) deallocate(aux_b,stat=info)

@ -21,10 +21,10 @@ contains
type(precdata),allocatable :: precs(:) type(precdata),allocatable :: precs(:)
integer :: iret, istopc,itmax,itrace,ipart,nmat,nprecs,irst,irnum,ntry integer :: iret, istopc,itmax,itrace,ipart,nmat,nprecs,irst,irnum,ntry
character(len=1024) :: charbuf character(len=1024) :: charbuf
real(kind(1.d0)) :: eps, omega real(kind(1.d0)) :: eps, omega,thr1,thr2
character :: afmt*5, lv1*10, lv2*10, pdescr*40 character :: afmt*5, lv1*10, lv2*10, pdescr*40
integer :: iam, nm, np, i, idx integer :: iam, nm, np, i, idx
integer, parameter :: npparms=12 integer, parameter :: npparms=14
integer :: inparms(40), ip, pparms(npparms) integer :: inparms(40), ip, pparms(npparms)
call psb_info(icontxt,iam,np) call psb_info(icontxt,iam,np)
@ -81,7 +81,12 @@ contains
end do end do
idx=index(charbuf," ") idx=index(charbuf," ")
read(charbuf(1:idx),*) omega read(charbuf(1:idx),*) omega
charbuf=adjustl(charbuf(idx:))
idx=index(charbuf," ")
read(charbuf(1:idx),*) thr1
charbuf=adjustl(charbuf(idx:))
idx=index(charbuf," ")
read(charbuf(1:idx),*) thr2
charbuf=adjustl(charbuf(idx:)) charbuf=adjustl(charbuf(idx:))
read(charbuf,'(a)') pdescr read(charbuf,'(a)') pdescr
@ -92,6 +97,8 @@ contains
call psb_bcast(icontxt,lv2) call psb_bcast(icontxt,lv2)
call psb_bcast(icontxt,pparms(1:npparms),0) call psb_bcast(icontxt,pparms(1:npparms),0)
call psb_bcast(icontxt,omega,0) call psb_bcast(icontxt,omega,0)
call psb_bcast(icontxt,thr1,0)
call psb_bcast(icontxt,thr2,0)
precs(np)%lv1 = lv1 precs(np)%lv1 = lv1
precs(np)%lv2 = lv2 precs(np)%lv2 = lv2
@ -99,15 +106,19 @@ contains
precs(np)%restr = pparms(2) precs(np)%restr = pparms(2)
precs(np)%prol = pparms(3) precs(np)%prol = pparms(3)
precs(np)%ftype1 = pparms(4) precs(np)%ftype1 = pparms(4)
precs(np)%mltype = pparms(5) precs(np)%fill1 = pparms(5)
precs(np)%aggr = pparms(6) precs(np)%mltype = pparms(6)
precs(np)%smthkind = pparms(7) precs(np)%aggr = pparms(7)
precs(np)%cmat = pparms(8) precs(np)%smthkind = pparms(8)
precs(np)%smthpos = pparms(9) precs(np)%cmat = pparms(9)
precs(np)%ftype2 = pparms(10) precs(np)%smthpos = pparms(10)
precs(np)%jswp = pparms(11) precs(np)%ftype2 = pparms(11)
precs(np)%nlev = pparms(12) precs(np)%fill2 = pparms(12)
precs(np)%jswp = pparms(13)
precs(np)%nlev = pparms(14)
precs(np)%omega = omega precs(np)%omega = omega
precs(np)%thr1 = thr1
precs(np)%thr2 = thr2
end do end do
read(*,*) nmat read(*,*) nmat
@ -154,6 +165,8 @@ contains
call psb_bcast(icontxt,pparms(1:npparms)) call psb_bcast(icontxt,pparms(1:npparms))
call psb_bcast(icontxt,omega) call psb_bcast(icontxt,omega)
call psb_bcast(icontxt,thr1,0)
call psb_bcast(icontxt,thr2,0)
precs(np)%lv1 = lv1 precs(np)%lv1 = lv1
precs(np)%lv2 = lv2 precs(np)%lv2 = lv2
@ -161,15 +174,19 @@ contains
precs(np)%restr = pparms(2) precs(np)%restr = pparms(2)
precs(np)%prol = pparms(3) precs(np)%prol = pparms(3)
precs(np)%ftype1 = pparms(4) precs(np)%ftype1 = pparms(4)
precs(np)%mltype = pparms(5) precs(np)%fill1 = pparms(5)
precs(np)%aggr = pparms(6) precs(np)%mltype = pparms(6)
precs(np)%smthkind = pparms(7) precs(np)%aggr = pparms(7)
precs(np)%cmat = pparms(8) precs(np)%smthkind = pparms(8)
precs(np)%smthpos = pparms(9) precs(np)%cmat = pparms(9)
precs(np)%ftype2 = pparms(10) precs(np)%smthpos = pparms(10)
precs(np)%jswp = pparms(11) precs(np)%ftype2 = pparms(11)
precs(np)%nlev = pparms(12) precs(np)%fill2 = pparms(12)
precs(np)%jswp = pparms(13)
precs(np)%nlev = pparms(14)
precs(np)%omega = omega precs(np)%omega = omega
precs(np)%thr1 = thr1
precs(np)%thr2 = thr2
end do end do

@ -7,6 +7,8 @@ module precd
integer :: restr ! restriction over application of as integer :: restr ! restriction over application of as
integer :: prol ! prolongation over application of as integer :: prol ! prolongation over application of as
integer :: ftype1 ! Factorization type: ILU, SuperLU, UMFPACK. integer :: ftype1 ! Factorization type: ILU, SuperLU, UMFPACK.
integer :: fill1 ! Fill-in for factorization 1
real(kind(1.d0)) :: thr1 ! Threshold for fact. 1 ILU(T)
integer :: mltype ! additive or multiplicative 2nd level prec integer :: mltype ! additive or multiplicative 2nd level prec
integer :: aggr ! local or global aggregation integer :: aggr ! local or global aggregation
integer :: smthkind ! smoothing type integer :: smthkind ! smoothing type
@ -14,7 +16,9 @@ module precd
integer :: smthpos ! pre, post, both smoothing integer :: smthpos ! pre, post, both smoothing
integer :: glbsmth ! global smoothing integer :: glbsmth ! global smoothing
integer :: ftype2 ! Factorization type: ILU, SuperLU, UMFPACK. integer :: ftype2 ! Factorization type: ILU, SuperLU, UMFPACK.
integer :: jswp ! jacobi sweeps integer :: fill2 ! Fill-in for factorization 1
real(kind(1.d0)) :: thr2 ! Threshold for fact. 1 ILU(T)
integer :: jswp ! Jacobi sweeps
real(kind(1.d0)) :: omega ! smoother omega real(kind(1.d0)) :: omega ! smoother omega
character(len=40) :: descr ! verbose description of the prec character(len=40) :: descr ! verbose description of the prec
end type precdata end type precdata

@ -29,37 +29,37 @@ CSR Matrix format
30 IRST Restart parameter for GMRES and BiCGSTAB(L) 30 IRST Restart parameter for GMRES and BiCGSTAB(L)
0 RENUM: 0: none 1: global indices (2: GPS band reduction) 0 RENUM: 0: none 1: global indices (2: GPS band reduction)
$ntry NTRY for each comb. print out best timings $ntry NTRY for each comb. print out best timings
30 NPRCS/nov rstr prol fty1 mlty agg smth cm smp fty2 jswp nl omg 30 NPRCS nov rst prl fc1 fl1 mlt agg smt cm smp ft2 fl2 jsw nl omg th1 th2 name
noprec none 0 0 0 0 0 0 0 0 0 0 0 1 -1.0 NOPREC none none 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -1.0 1e-4 1e-4 NOPREC
diag none 0 0 0 1 2 0 1 0 2 1 4 1 -1.0 DIAG diag none 0 0 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 DIAG
bjac none 0 0 0 1 2 0 1 0 2 1 4 1 -1.0 BJAC bjac none 0 0 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 BJAC
as none 0 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS as none 0 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS
as none 1 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS as none 1 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS
as none 2 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS as none 2 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS
as ml 0 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 as ml 0 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4
as ml 1 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 as ml 1 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4
as ml 2 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 as ml 2 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4
as ml 0 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 as ml 0 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4
as ml 1 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 as ml 1 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4
as ml 2 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 as ml 2 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4
as ml 0 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 as ml 0 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4
as ml 1 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 as ml 1 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4
as ml 2 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 as ml 2 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4
as ml 0 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 as ml 0 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4
as ml 1 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 as ml 1 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4
as ml 2 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 as ml 2 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4
as ml 0 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R as ml 0 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R
as ml 1 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R as ml 1 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R
as ml 2 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R as ml 2 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R
as ml 0 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R as ml 0 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R
as ml 1 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R as ml 1 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R
as ml 2 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R as ml 2 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R
as ml 0 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R as ml 0 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R
as ml 1 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R as ml 1 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R
as ml 2 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R as ml 2 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R
as ml 0 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R as ml 0 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R
as ml 1 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R as ml 1 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R
as ml 2 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R as ml 2 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R
2 Number of matrices 2 Number of matrices
thm50x30.mtx none thm50x30.mtx none
thm200x120.mtx none thm200x120.mtx none

@ -35,8 +35,8 @@ program zf_bench
logical :: amroot, out1, out2 logical :: amroot, out1, out2
! solver paramters ! solver paramters
integer :: iter, itmax, ierr, itrace, ircode, ipart,& integer :: iter, itmax, ierr, itrace, ircode, ipart,nlev,&
& methd, istopc, iprec, ml, irnum, irst, ntry, nmat, ilev,nlev & methd, istopc, iprec, ml, irnum, irst, ntry, nmat, ilev,ipsize,asize,cdsize
real(kind(1.d0)) :: err, eps real(kind(1.d0)) :: err, eps
character(len=5) :: afmt character(len=5) :: afmt
@ -208,16 +208,19 @@ program zf_bench
call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev)
call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev)
call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev)
call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill2, info,ilev=nlev)
call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr2, info,ilev=nlev)
call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev)
call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind,info,ilev=nlev) call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind, info,ilev=nlev)
else else
call mld_precinit(pre,precs(pp)%lv1,info) call mld_precinit(pre,precs(pp)%lv1,info)
end if end if
call mld_precset(pre,mld_n_ovr_, precs(pp)%novr,info ,ilev=1) call mld_precset(pre,mld_n_ovr_, precs(pp)%novr, info,ilev=1)
call mld_precset(pre,mld_sub_restr_, precs(pp)%restr,info ,ilev=1) call mld_precset(pre,mld_sub_restr_, precs(pp)%restr, info,ilev=1)
call mld_precset(pre,mld_sub_prol_, precs(pp)%prol,info ,ilev=1) call mld_precset(pre,mld_sub_prol_, precs(pp)%prol, info,ilev=1)
call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1,info ,ilev=1) call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1, info,ilev=1)
call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill1, info,ilev=1)
call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr1, info,ilev=1)
! setting initial guess to zero ! setting initial guess to zero
@ -248,8 +251,8 @@ program zf_bench
call psb_barrier(ictxt) call psb_barrier(ictxt)
t1 = psb_wtime() t1 = psb_wtime()
call psb_krylov(cmethd,a,pre,b_col,x_col,eps,desc_a,info,& call psb_krylov(cmethd,a,pre,b_col,x_col,eps,desc_a,info,&
& itmax,iter,err,itrace,irst=ml,istop=istopc) & itmax=itmax,iter=iter,err=err,itrace=itrace,&
& irst=irst,istop=istopc)
call psb_barrier(ictxt) call psb_barrier(ictxt)
t2 = psb_wtime() - t1 t2 = psb_wtime() - t1
call psb_amx(ictxt,t2) call psb_amx(ictxt,t2)
@ -263,15 +266,14 @@ program zf_bench
!!$ call flush(6) !!$ call flush(6)
!!$ call psb_barrier(ictxt) !!$ call psb_barrier(ictxt)
if(amroot.and.out2) & if(amroot.and.out2) &
& write(10,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a)') & & write(10,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') &
& mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,&
& trim(precs(pp)%descr) & trim(cmethd),trim(precs(pp)%descr)
if(amroot) & if(amroot) &
& write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a)') & & write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') &
& mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,&
& trim(precs(pp)%descr) & trim(cmethd),trim(precs(pp)%descr)
if (nt.lt.ntry) call mld_precfree(pre,info)
if(nt.lt.ntry) call mld_precfree(pre,info)
if((t2+tprec).lt.mttot) then if((t2+tprec).lt.mttot) then
mtslv=t2 mtslv=t2
mtprec=tprec mtprec=tprec
@ -286,6 +288,13 @@ program zf_bench
call psb_genrm2s(resmx,r_col,desc_a,info) call psb_genrm2s(resmx,r_col,desc_a,info)
call psb_geamaxs(resmxp,r_col,desc_a,info) call psb_geamaxs(resmxp,r_col,desc_a,info)
ipsize = mld_sizeof(pre)
asize = psb_sizeof(a)
cdsize = psb_sizeof(desc_a)
call psb_sum(ictxt,ipsize)
call psb_sum(ictxt,asize)
call psb_sum(ictxt,cdsize)
if (amroot) then if (amroot) then
write(*,'("Matrix : ",a)') mtrx(nm) write(*,'("Matrix : ",a)') mtrx(nm)
write(*,'("RHS : ",a)') rhs(nm) write(*,'("RHS : ",a)') rhs(nm)
@ -302,14 +311,19 @@ program zf_bench
write(*,'("Total time : ",es10.4)')mttot write(*,'("Total time : ",es10.4)')mttot
write(*,'("Residual norm 2 : ",es10.4)')resmx write(*,'("Residual norm 2 : ",es10.4)')resmx
write(*,'("Residual norm inf : ",es10.4)')resmxp write(*,'("Residual norm inf : ",es10.4)')resmxp
write(*,'("Total memory occupation for A: ",i10)')asize
write(*,'("Total memory occupation for DESC_A: ",i10)')cdsize
write(*,'("Total memory occupation for PRE: ",i10)')ipsize
write(*,'(72("="))') write(*,'(72("="))')
write(*,'(" ")') write(*,'(" ")')
write(*,'(" ")') write(*,'(" ")')
write(*,'(" ")') write(*,'(" ")')
if(out1) write(8,'(a20,2(1x,i3),1x,i5,5(1x,g9.4),1x,a)') mtrx(nm),& if(out1) write(8,'(a20,2(1x,i3),1x,i5,5(1x,g9.4),1x,a8,1x,a)') mtrx(nm),&
& np,precs(pp)%novr,& & np,precs(pp)%novr,&
& iter,mtprec,mtslv,mttot,resmx,resmxp,trim(precs(pp)%descr) & iter,mtprec,mtslv,mttot,resmx,resmxp,&
& trim(cmethd),trim(precs(pp)%descr)
end if end if
call mld_precfree(pre,info) call mld_precfree(pre,info)

Loading…
Cancel
Save