From 1020eacd6f2df3f747078229d5be46217b1a01f2 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 17 Oct 2007 08:36:16 +0000 Subject: [PATCH] Merged ilu(k) branch. Branch still open to play with ILU(P,T). --- Changelog | 11 +- mlprec/Makefile | 1 + mlprec/mld_dbjac_bld.f90 | 4 - mlprec/mld_dilu_bld.f90 | 41 ++- mlprec/mld_dilu_fct.f90 | 308 ++++++++--------------- mlprec/mld_diluk_fct.f90 | 500 +++++++++++++++++++++++++++++++++++++ mlprec/mld_dmlprec_bld.f90 | 2 +- mlprec/mld_dprecinit.f90 | 20 +- mlprec/mld_dprecset.f90 | 4 +- mlprec/mld_prec_mod.f90 | 21 ++ mlprec/mld_prec_type.f90 | 92 +++++-- mlprec/mld_zilu_bld.f90 | 37 ++- mlprec/mld_zilu_fct.f90 | 296 ++++++++-------------- mlprec/mld_ziluk_fct.f90 | 500 +++++++++++++++++++++++++++++++++++++ mlprec/mld_zmlprec_bld.f90 | 2 +- mlprec/mld_zprecinit.f90 | 115 +++++---- mlprec/mld_zprecset.f90 | 4 +- test/fileread/Makefile | 1 + test/fileread/df_bench.f90 | 29 +-- test/fileread/getp.f90 | 55 ++-- test/fileread/precdata.f90 | 6 +- test/fileread/runs/drt.sh | 62 ++--- test/fileread/zf_bench.f90 | 62 +++-- 23 files changed, 1562 insertions(+), 611 deletions(-) create mode 100644 mlprec/mld_diluk_fct.f90 create mode 100644 mlprec/mld_ziluk_fct.f90 diff --git a/Changelog b/Changelog index 297e9524..c19da326 100644 --- a/Changelog +++ b/Changelog @@ -1,7 +1,14 @@ Changelog. A lot less detailed than usual, at least for past history. -2007/10/12: Renamed all constants with MLD_ prefix. +2007/10/17: Merged ILU(K) into trunk. + +2007/10/16: Fixed ILU(K), it now performs satisfactorily. Also updated + ILU(0) to be more legible. + +2007/10/11: First working version of ILU(K). Still slow, there should + be room for improvement. + 2007/10/09: Added benchmark code. 2007/10/09: Added MILU_N_. Beware: values for UMF_ etc. have been @@ -11,4 +18,4 @@ history. PSB_KRYLOV_MOD. 2007/10/01: Start of this changelog. MLD2P4 now has a different - structure, to enable a build not embedded in PSBLAS. \ No newline at end of file + structure, to enable a build not embedded in PSBLAS. diff --git a/mlprec/Makefile b/mlprec/Makefile index 5bbf59f2..e3a8cbc1 100644 --- a/mlprec/Makefile +++ b/mlprec/Makefile @@ -22,6 +22,7 @@ F90OBJS=mld_dasmat_bld.o mld_dslu_bld.o mld_dumf_bld.o mld_dilu_fct.o\ mld_zbaseprec_bld.o mld_zdiagsc_bld.o mld_zaggrmap_bld.o \ mld_zprec_aply.o mld_zmlprec_aply.o mld_zslud_bld.o\ mld_zbaseprec_aply.o mld_zbjac_aply.o mld_zaggrmat_asb.o\ + mld_diluk_fct.o mld_ziluk_fct.o \ $(MPFOBJS) COBJS=mld_slu_impl.o mld_umf_impl.o mld_zslu_impl.o mld_zumf_impl.o OBJS=$(F90OBJS) $(COBJS) $(MPFOBJS) $(MPCOBJS) $(MODOBJS) diff --git a/mlprec/mld_dbjac_bld.f90 b/mlprec/mld_dbjac_bld.f90 index 29e91705..25772274 100644 --- a/mlprec/mld_dbjac_bld.f90 +++ b/mlprec/mld_dbjac_bld.f90 @@ -104,7 +104,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info) call psb_nullify_sp(blck) call psb_nullify_sp(atmp) - t1= psb_wtime() if(debug) write(0,*)me,': calling mld_asmat_bld',& & p%iprcparm(mld_prec_type_),p%iprcparm(mld_n_ovr_) @@ -126,7 +125,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info) goto 9999 end if - t2= psb_wtime() if (debug) write(0,*)me,': out of mld_asmat_bld' if (debug) call psb_barrier(ictxt) @@ -170,7 +168,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info) end if - t3 = psb_wtime() if (debugprt) then call psb_barrier(ictxt) open(40+me) @@ -499,7 +496,6 @@ subroutine mld_dbjac_bld(a,desc_a,p,upd,info) end select - t6 = psb_wtime() call psb_sp_free(blck,info) if(info/=0) then diff --git a/mlprec/mld_dilu_bld.f90 b/mlprec/mld_dilu_bld.f90 index e2919f7f..eee14805 100644 --- a/mlprec/mld_dilu_bld.f90 +++ b/mlprec/mld_dilu_bld.f90 @@ -145,12 +145,37 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck) endif - ! - ! Ok, factor the matrix. - ! - t5 = psb_wtime() - call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& - & p%d,info,blck=blck) + + select case(p%iprcparm(mld_sub_solve_)) + + case(mld_ilu_n_,mld_milu_n_,mld_ilu_t_) + ! + ! Need to decide what to do with ILU(T). + ! Should we pack the functionality together with ILU(K)? + ! Maybe for mental sanity, a separate routine would be better. + ! + ! Decide what to do with MILU(K) + + ! + ! Ok, factor the matrix. + ! + select case(p%iprcparm(mld_sub_fill_in_)) + case(:-1) + ! This is an error. + call psb_errpush(30,name,i_err=(/3,p%iprcparm(mld_sub_fill_in_),0,0,0/)) + goto 9999 + case(0) + call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& + & p%d,info,blck=blck) + case(1:) + call mld_iluk_fct(p%iprcparm(mld_sub_fill_in_),p%iprcparm(mld_sub_solve_),& + & a,p%av(mld_l_pr_),p%av(mld_u_pr_),p%d,info,blck=blck) + end select + case default + ! If we end up here, something was wrong up in the call chain. + call psb_errpush(4000,name) + goto 9999 + end select if(info/=0) then info=4010 ch_err='mld_ilu_fct' @@ -179,10 +204,7 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck) ! ierr = MPE_Log_event( ifcte, 0, "st SIMPLE" ) - t6 = psb_wtime() ! - ! write(0,'(i3,1x,a,3(1x,g18.9))') me,'renum/factor time',t3-t2,t6-t5 - ! if (me==0) write(0,'(a,3(1x,g18.9))') 'renum/factor time',t3-t2,t6-t5 if (psb_sp_getifld(psb_upd_,p%av(mld_u_pr_),info) /= psb_upd_perm_) then call psb_sp_trim(p%av(mld_u_pr_),info) @@ -192,7 +214,6 @@ subroutine mld_dilu_bld(a,desc_a,p,upd,info,blck) call psb_sp_trim(p%av(mld_l_pr_),info) endif - if (debug) write(0,*) me,'End of ilu_bld' call psb_erractionrestore(err_act) diff --git a/mlprec/mld_dilu_fct.f90 b/mlprec/mld_dilu_fct.f90 index 295c356c..65458ecb 100644 --- a/mlprec/mld_dilu_fct.f90 +++ b/mlprec/mld_dilu_fct.f90 @@ -167,72 +167,22 @@ contains if(debug) write(0,*)'LUINT Done Allocating TRW' lia2(1) = 1 uia2(1) = 1 - l1=0 - l2=0 + l1 = 0 + l2 = 0 m = ma+mb if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb - do i = 1, ma + do i = 1, m if(debug) write(0,*)'LUINT: Loop index ',i,ma - d(i) = 0.d0 - - ! - ! Here we take a fast shortcut if possible, otherwise - ! use spgtblk, slower but able (in principle) to handle - ! anything. - ! - if (toupper(a%fida)=='CSR') then - do j = a%ia2(i), a%ia2(i+1) - 1 - k = a%ia1(j) - ! write(0,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = a%aspk(j) - lia1(l1) = k - else if (k == i) then - d(i) = a%aspk(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = a%aspk(j) - uia1(l2) = k - end if - enddo + d(i) = dzero + if (i <= ma) then + call ilu_copyin(i,ma,a,i,m,l1,lia1,lia2,laspk,& + & d(i),l2,uia1,uia2,uaspk,ktrw,trw) else - - if ((mod(i,nrb) == 1).or.(nrb==1)) then - irb = min(ma-i+1,nrb) - call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) - if(info.ne.0) then - info=4010 - ch_err='psb_sp_getblk' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > trw%infoa(psb_nnz_)) exit - if (trw%ia1(ktrw) > i) exit - k = trw%ia2(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%aspk(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%aspk(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%aspk(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - - end if - -!!$ + call ilu_copyin(i-ma,mb,b,i,m,l1,lia1,lia2,laspk,& + & d(i),l2,uia1,uia2,uaspk,ktrw,trw) + endif lia2(i+1) = l1 + 1 uia2(i+1) = l2 + 1 @@ -300,17 +250,17 @@ contains ! ! Non singularity ! - if (dabs(dia) < epstol) then + if (abs(dia) < epstol) then ! ! Pivot too small: unstable factorization ! info = 2 int_err(1) = i - write(ch_err,'(g20.10)') dia + write(ch_err,'(g20.10)') abs(dia) call psb_errpush(info,name,i_err=int_err,a_err=ch_err) goto 9999 else - dia = 1.d0/dia + dia = done/dia end if d(i) = dia ! write(6,*)'diag(',i,')=',d(i) @@ -320,149 +270,6 @@ contains enddo enddo - do i = ma+1, m - d(i) = 0.d0 - - - if (toupper(b%fida)=='CSR') then - - do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 - k = b%ia1(j) - ! if (me.eq.2) write(0,*)'ecco k=',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = b%aspk(j) - lia1(l1) = k - ! if(me.eq.2) write(0,*)'scrivo l' - else if (k == i) then - d(i) = b%aspk(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = b%aspk(j) - ! write(0,*)'KKKKK',k - uia1(l2) = k - end if - enddo - - else - - if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then - irb = min(m-i+1,nrb) - call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) - if(info.ne.0) then - info=4010 - ch_err='psb_sp_getblk' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > trw%infoa(psb_nnz_)) exit - if (trw%ia1(ktrw) > i) exit - k = trw%ia2(ktrw) - ! write(0,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%aspk(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%aspk(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%aspk(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - - endif - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! If we get here we missed the cycle updateloop, - ! which means that this entry does not match; thus - ! we take it out of diagonal for MILU. - ! - if (ialg == mld_milu_n_) then - dia = dia - temp*uaspk(jj) - end if - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (dabs(dia) < epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') dia - info = 2 - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = 1.d0/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - call psb_sp_free(trw,info) if(info.ne.0) then info=4010 @@ -483,4 +290,93 @@ contains end if return end subroutine mld_dilu_fctint + + subroutine ilu_copyin(i,m,a,jd,jmax,l1,lia1,lia2,laspk,& + & dia,l2,uia1,uia2,uaspk,ktrw,trw) + use psb_base_mod + implicit none + type(psb_dspmat_type) :: a,trw + integer :: i,m,ktrw,jd,jmax,l1,l2 + integer :: lia1(:),lia2(:),uia1(:),uia2(:) + real(kind(1.d0)) :: laspk(:), uaspk(:), dia + type(psb_int_heap) :: heap + + integer :: k,j,info,irb + integer, parameter :: nrb=16 + character(len=20), parameter :: name='mld_dilu_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) of + ! handling anything. + ! + + if (toupper(a%fida)=='CSR') then + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + ! write(0,*)'KKKKK',k + if ((k < jd).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = a%aspk(j) + lia1(l1) = k + else if (k == jd) then + dia = a%aspk(j) + else if ((k > jd).and.(k <= jmax)) then + l2 = l2 + 1 + uaspk(l2) = a%aspk(j) + uia1(l2) = k + end if + enddo + + else + + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((k < jd).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == jd) then + dia = trw%aspk(ktrw) + else if ((k > jd).and.(k <= jmax)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + end subroutine ilu_copyin + end subroutine mld_dilu_fct diff --git a/mlprec/mld_diluk_fct.f90 b/mlprec/mld_diluk_fct.f90 new file mode 100644 index 00000000..9afd3dc0 --- /dev/null +++ b/mlprec/mld_diluk_fct.f90 @@ -0,0 +1,500 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine mld_diluk_fct(fill_in,ialg,a,l,u,d,info,blck) + + ! + ! This routine copies and factors "on the fly" from A and BLCK + ! into L/D/U. + ! + ! + use psb_base_mod + use mld_prec_mod, mld_protect_name => mld_diluk_fct + implicit none + ! .. Scalar Arguments .. + integer, intent(in) :: fill_in, ialg + integer, intent(out) :: info + ! .. Array Arguments .. + type(psb_dspmat_type),intent(in) :: a + type(psb_dspmat_type),intent(inout) :: l,u + type(psb_dspmat_type),intent(in), optional, target :: blck + real(kind(1.d0)), intent(inout) :: d(:) + ! .. Local Scalars .. + real(kind(1.d0)) :: dia, temp + integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act + + type(psb_dspmat_type), pointer :: blck_ + character(len=20) :: name, ch_err + logical, parameter :: debug=.false. + + name='mld_diluk_fct' + info = 0 + call psb_erractionsave(err_act) + ! .. Executable Statements .. + ! + if (debug) write(0,*) 'mld_diluk_fct: start' + if (present(blck)) then + blck_ => blck + else + allocate(blck_,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_sp_all(0,0,blck_,1,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + endif + if (debug) write(0,*) 'mld_diluk_fct: calling fctint' + call mld_diluk_fctint(fill_in,ialg,m,a%m,a,blck_%m,blck_,& + & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info) + if (info /= 0) then + info=4010 + ch_err='mld_diluk_fctint' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l%infoa(1) = l1 + l%fida = 'CSR' + l%descra = 'TLU' + u%infoa(1) = l2 + u%fida = 'CSR' + u%descra = 'TUU' + l%m = m + l%k = m + u%m = m + u%k = m + if (present(blck)) then + blck_ => null() + else + call psb_sp_free(blck_,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + deallocate(blck_) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + +contains + subroutine mld_diluk_fctint(fill_in,ialg,m,ma,a,mb,b,& + & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) + use psb_base_mod + implicit none + integer, intent(in) :: fill_in, ialg + type(psb_dspmat_type) :: a,b + integer :: m,ma,mb,l1,l2,info + integer, dimension(:), allocatable :: lia1,lia2,uia1,uia2 + real(kind(1.d0)), dimension(:), allocatable :: laspk,uaspk + real(kind(1.d0)), dimension(:) :: d + + integer :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, & + & isz,minj,maxj,lrwk,nidx + real(kind(1.d0)) :: dia,temp,rwk + integer, allocatable :: uplevs(:), rowlevs(:),idxs(:) + real(kind(1.d0)), allocatable :: row(:) + type(psb_int_heap) :: heap + + logical,parameter :: debug=.false. + type(psb_dspmat_type) :: trw + integer :: int_err(5) + character(len=20), parameter :: name='mld_diluk_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + m = ma+mb + + ! + ! Temp buffer for copyin function. + ! + if (debug) write(0,*)'LUINT Allocating TRW' + call psb_sp_all(0,0,trw,1,info) + if (info==0) call psb_ensure_size(m+1,lia2,info) + if (info==0) call psb_ensure_size(m+1,uia2,info) + + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='psb_sp_all') + goto 9999 + end if + if (debug) write(0,*)'LUINT Done Allocating TRW' + + l1=0 + l2=0 + lia2(1) = 1 + uia2(1) = 1 + + if (debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb + ! Initial allocation + allocate(uplevs(size(uaspk)),rowlevs(m),row(m),stat=info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = m+1 + row(:) = dzero + rowlevs(:) = m+1 + + do i = 1, m + if (debug.and.(mod(i,500)==1)) write(0,*)'LUINT: Loop index ',i,ma,minj,maxj + if (debug) write(0,*)'LUINT: Loop index ',i,ma + ! + ! At each iteration of the loop we keep the indices affected in a heap + ! initialized and filled in the copyin function, and updated during + ! the elimination. The heap is ideal because at each step we need the + ! lowest index, but we also need to insert new items, and the heap allows + ! to do both in log time. + ! + d(i) = dzero + if (i<=ma) then + call iluk_copyin(i,ma,a,m,row,rowlevs,heap,ktrw,trw) + else + call iluk_copyin(i-ma,mb,b,m,row,rowlevs,heap,ktrw,trw) + endif + + if (debug) write(0,*)'LUINT: input Copy done',minj,maxj + ! Do an elimination step on current row + ! Turns out we only need to keep track of levels + ! for the upper triangle, hence no uplevs variable. + ! + call iluk_fact(fill_in,i,m,row,rowlevs,heap,& + & d,uia1,uia2,uaspk,uplevs,nidx,idxs) + ! + ! Copy the row into the lower/diag/upper structures. + ! + call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& + & l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs) + + end do + + ! + ! And we're done......hopefully :-) + ! + deallocate(uplevs,rowlevs,row,stat=info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Deallocate') + goto 9999 + end if + if (info == 0) call psb_sp_free(trw,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*)'Leaving ilu_fct' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + end subroutine mld_diluk_fctint + + + subroutine iluk_copyin(i,m,a,jmax,row,rowlevs,heap,ktrw,trw) + use psb_base_mod + implicit none + type(psb_dspmat_type) :: a,trw + integer :: i, rowlevs(:),m,ktrw,jmax + real(kind(1.d0)) :: row(:) + type(psb_int_heap) :: heap + + integer :: k,j,info,irb + integer, parameter :: nrb=16 + character(len=20), parameter :: name='mld_diluk_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) to handle + ! anything. + ! + if (toupper(a%fida)=='CSR') then + call psb_init_heap(heap,info) + + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + if ((1<=k).and.(k<=jmax)) then + row(k) = a%aspk(j) + rowlevs(k) = 0 + call psb_insert_heap(k,heap,info) + end if + end do + else + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if (info /= 0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((1<=k).and.(k<=jmax)) then + row(k) = trw%aspk(ktrw) + rowlevs(k) = 0 + call psb_insert_heap(k,heap,info) + end if + ktrw = ktrw + 1 + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + + end subroutine iluk_copyin + + + subroutine iluk_fact(fill_in,i,m,row,rowlevs,heap,d,uia1,uia2,uaspk,uplevs,nidx,idxs) + use psb_base_mod + implicit none + type(psb_dspmat_type) :: a + type(psb_int_heap) :: heap + integer :: i,m, rowlevs(:),minj,maxj,fill_in,nidx + integer, allocatable :: idxs(:) + integer :: uia1(:),uia2(:),uplevs(:) + real(kind(1.d0)) :: row(:), uaspk(:),d(:) + + integer :: k,j,lrwk,jj,info, lastk + real(kind(1.d0)) :: rwk + + ! Do an elimination step on current row + ! Turns out we only need to keep track of levels + ! for the upper triangle. + if (.not.allocated(idxs)) then + allocate(idxs(200),stat=info) + endif + nidx = 0 + lastk = -1 + ! + ! Do while there are indices to be processed + ! + do + call psb_heap_get_first(k,heap,info) + if (info < 0) exit + ! + ! An index may have been put on the heap more than once. + ! + if (k == lastk) cycle + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + end if + idxs(nidx) = k + if ((row(k) /= dzero).and.(rowlevs(k) <= fill_in).and.(ki) then + ! Upper part + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! Figure out a good reallocation size!! + isz = max((l2/i)*m,int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == 0) call psb_realloc(isz,uia1,info) + if (info == 0) call psb_realloc(isz,uplevs,info,pad=(m+1)) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + uplevs(l2) = rowlevs(j) + else if (ialg == mld_milu_n_) then + d(i) = d(i) + row(j) + end if + row(j) = dzero + rowlevs(j) = m+1 + end if + end do + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + if (abs(d(i)) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + int_err(1) = i + write(ch_err,'(g20.10)') d(i) + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + d(i) = done/d(i) + end if + ! Now scale upper part + do j=uia2(i), uia2(i+1)-1 + uaspk(j) = d(i)*uaspk(j) + end do + + call psb_erractionrestore(err_act) + return +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + + end subroutine iluk_copyout + + +end subroutine mld_diluk_fct diff --git a/mlprec/mld_dmlprec_bld.f90 b/mlprec/mld_dmlprec_bld.f90 index c6e27676..07ff07ba 100644 --- a/mlprec/mld_dmlprec_bld.f90 +++ b/mlprec/mld_dmlprec_bld.f90 @@ -84,7 +84,7 @@ subroutine mld_dmlprec_bld(a,desc_a,p,info) case(mld_ilu_n_) call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev) case(mld_ilu_t_) - call mld_check_def(p%dprcparm(mld_fact_eps_),'Eps',dzero,is_legal_ml_eps) + call mld_check_def(p%dprcparm(mld_fact_thrs_),'Eps',dzero,is_legal_fact_thrs) end select call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega) call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',& diff --git a/mlprec/mld_dprecinit.f90 b/mlprec/mld_dprecinit.f90 index 7f5884a4..0cf7595a 100644 --- a/mlprec/mld_dprecinit.f90 +++ b/mlprec/mld_dprecinit.f90 @@ -57,7 +57,7 @@ subroutine mld_dprecinit(p,ptype,info,nlev) endif select case(toupper(ptype(1:len_trim(ptype)))) - case ('NOPREC') + case ('NONE','NOPREC') nlev_ = 1 ilev_ = 1 allocate(p%baseprecv(nlev_),stat=info) @@ -96,15 +96,15 @@ subroutine mld_dprecinit(p,ptype,info,nlev) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return - p%baseprecv(ilev_)%iprcparm(:) = 0 - p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ - p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ + p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 case ('AS') nlev_ = 1 diff --git a/mlprec/mld_dprecset.f90 b/mlprec/mld_dprecset.f90 index bef1f496..2c1cce5c 100644 --- a/mlprec/mld_dprecset.f90 +++ b/mlprec/mld_dprecset.f90 @@ -206,13 +206,15 @@ subroutine mld_dprecsetd(p,what,val,info,ilev) select case(what) ! Right now we don't have any at base level. Will change when ! we implement mld_ilu_t_ + case(mld_fact_thrs_) + p%baseprecv(ilev_)%dprcparm(what) = val case default write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' info = -2 end select else if (ilev_ > 1) then select case(what) - case(mld_aggr_damp_) + case(mld_aggr_damp_,mld_fact_thrs_) p%baseprecv(ilev_)%dprcparm(what) = val case default write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' diff --git a/mlprec/mld_prec_mod.f90 b/mlprec/mld_prec_mod.f90 index 11e0aec7..0065d67d 100644 --- a/mlprec/mld_prec_mod.f90 +++ b/mlprec/mld_prec_mod.f90 @@ -440,6 +440,27 @@ module mld_prec_mod end subroutine mld_zilu_fct end interface + interface mld_iluk_fct + subroutine mld_diluk_fct(fill_in,ialg,a,l,u,d,info,blck) + use psb_base_mod + integer, intent(in) :: fill_in,ialg + integer, intent(out) :: info + type(psb_dspmat_type),intent(in) :: a + type(psb_dspmat_type),intent(inout) :: l,u + type(psb_dspmat_type),intent(in), optional, target :: blck + real(kind(1.d0)), intent(inout) :: d(:) + end subroutine mld_diluk_fct + subroutine mld_ziluk_fct(fill_in,ialg,a,l,u,d,info,blck) + use psb_base_mod + integer, intent(in) :: fill_in,ialg + integer, intent(out) :: info + type(psb_zspmat_type),intent(in) :: a + type(psb_zspmat_type),intent(inout) :: l,u + type(psb_zspmat_type),intent(in), optional, target :: blck + complex(kind(1.d0)), intent(inout) :: d(:) + end subroutine mld_ziluk_fct + end interface + interface mld_asmat_bld Subroutine mld_dasmat_bld(ptype,novr,a,blk,desc_data,upd,desc_p,info,outfmt) use psb_base_mod diff --git a/mlprec/mld_prec_type.f90 b/mlprec/mld_prec_type.f90 index 8c91f682..61ae1c6e 100644 --- a/mlprec/mld_prec_type.f90 +++ b/mlprec/mld_prec_type.f90 @@ -164,8 +164,8 @@ module mld_prec_type integer, parameter :: mld_no_ml_=0, mld_add_ml_=1, mld_mult_ml_=2 integer, parameter :: mld_new_ml_prec_=3, mld_max_ml_=mld_new_ml_prec_ ! Legal values for entry: mld_smooth_pos_ - integer, parameter :: mld_pre_smooth_=1, mld_post_smooth_=2, mld_twoside_smooth_=3,& - & mld_max_smooth_=mld_twoside_smooth_ + integer, parameter :: mld_pre_smooth_=1, mld_post_smooth_=2,& + & mld_twoside_smooth_=3, mld_max_smooth_=mld_twoside_smooth_ ! Legal values for entry: mld_sub_solve_ integer, parameter :: mld_f_none_=0,mld_ilu_n_=1,mld_milu_n_=2, mld_ilu_t_=3 integer, parameter :: mld_slu_=4, mld_umf_=5, mld_sludist_=6 @@ -185,7 +185,7 @@ module mld_prec_type integer, parameter :: mld_renum_none_=0, mld_renum_glb_=1, mld_renum_gps_=2 ! Entries in dprcparm: ILU(T) epsilon, smoother omega - integer, parameter :: mld_fact_eps_=1 + integer, parameter :: mld_fact_thrs_=1 integer, parameter :: mld_aggr_damp_=2 integer, parameter :: mld_aggr_thresh_=3 integer, parameter :: mld_dfpsz_=4 @@ -379,24 +379,43 @@ contains write(iout,*) 'Preconditioner description' if (allocated(p%baseprecv)) then if (size(p%baseprecv)>=1) then + ilev = 1 write(iout,*) 'Base preconditioner' - select case(p%baseprecv(1)%iprcparm(mld_prec_type_)) + select case(p%baseprecv(ilev)%iprcparm(mld_prec_type_)) case(mld_noprec_) write(iout,*) 'No preconditioning' case(mld_diag_) write(iout,*) 'Diagonal scaling' case(mld_bjac_) write(iout,*) 'Block Jacobi with: ',& - & fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) + & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + case(mld_ilu_n_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + case(mld_ilu_t_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) + case(mld_slu_,mld_umf_,mld_sludist_) + case default + write(iout,*) 'Should never get here!' + end select case(mld_as_) write(iout,*) 'Additive Schwarz with: ',& - & fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) + & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + case(mld_ilu_n_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + case(mld_ilu_t_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) + case(mld_slu_,mld_umf_,mld_sludist_) + case default + write(iout,*) 'Should never get here!' + end select write(iout,*) 'Overlap:',& - & p%baseprecv(1)%iprcparm(mld_n_ovr_) + & p%baseprecv(ilev)%iprcparm(mld_n_ovr_) write(iout,*) 'Restriction: ',& - & restrict_names(p%baseprecv(1)%iprcparm(mld_sub_restr_)) + & restrict_names(p%baseprecv(ilev)%iprcparm(mld_sub_restr_)) write(iout,*) 'Prolongation: ',& - & prolong_names(p%baseprecv(1)%iprcparm(mld_sub_prol_)) + & prolong_names(p%baseprecv(ilev)%iprcparm(mld_sub_prol_)) end select end if if (size(p%baseprecv)>=2) then @@ -430,9 +449,9 @@ contains & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) case(mld_ilu_n_) - write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) case(mld_ilu_t_) - write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_eps_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) case(mld_slu_,mld_umf_,mld_sludist_) case default write(iout,*) 'Should never get here!' @@ -498,9 +517,9 @@ contains !!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ case(mld_ilu_n_) -!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) +!!$ write(iout,*) 'Fill level:',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) !!$ case(mld_ilu_t_) -!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_eps_) +!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_thrs_) !!$ case(mld_slu_,mld_umf_,mld_sludist_) !!$ case default !!$ write(iout,*) 'Should never get here!' @@ -530,23 +549,42 @@ contains if (allocated(p%baseprecv)) then if (size(p%baseprecv)>=1) then write(iout,*) 'Base preconditioner' - select case(p%baseprecv(1)%iprcparm(mld_prec_type_)) + ilev=1 + select case(p%baseprecv(ilev)%iprcparm(mld_prec_type_)) case(mld_noprec_) write(iout,*) 'No preconditioning' case(mld_diag_) write(iout,*) 'Diagonal scaling' case(mld_bjac_) write(iout,*) 'Block Jacobi with: ',& - & fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) + & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + case(mld_ilu_n_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + case(mld_ilu_t_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) + case(mld_slu_,mld_umf_,mld_sludist_) + case default + write(iout,*) 'Should never get here!' + end select case(mld_as_) write(iout,*) 'Additive Schwarz with: ',& - & fact_names(p%baseprecv(1)%iprcparm(mld_sub_solve_)) + & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) + case(mld_ilu_n_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + case(mld_ilu_t_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) + case(mld_slu_,mld_umf_,mld_sludist_) + case default + write(iout,*) 'Should never get here!' + end select write(iout,*) 'Overlap:',& - & p%baseprecv(1)%iprcparm(mld_n_ovr_) + & p%baseprecv(ilev)%iprcparm(mld_n_ovr_) write(iout,*) 'Restriction: ',& - & restrict_names(p%baseprecv(1)%iprcparm(mld_sub_restr_)) + & restrict_names(p%baseprecv(ilev)%iprcparm(mld_sub_restr_)) write(iout,*) 'Prolongation: ',& - & prolong_names(p%baseprecv(1)%iprcparm(mld_sub_prol_)) + & prolong_names(p%baseprecv(ilev)%iprcparm(mld_sub_prol_)) end select end if if (size(p%baseprecv)>=2) then @@ -580,9 +618,9 @@ contains & fact_names(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) select case(p%baseprecv(ilev)%iprcparm(mld_sub_solve_)) case(mld_ilu_n_) - write(iout,*) 'Fill level :',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) + write(iout,*) 'Fill level:',p%baseprecv(ilev)%iprcparm(mld_sub_fill_in_) case(mld_ilu_t_) - write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_eps_) + write(iout,*) 'Fill threshold :',p%baseprecv(ilev)%dprcparm(mld_fact_thrs_) case(mld_slu_,mld_umf_,mld_sludist_) case default write(iout,*) 'Should never get here!' @@ -648,9 +686,9 @@ contains !!$ & fact_names(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ select case(p%baseprecv(2)%iprcparm(mld_sub_solve_)) !!$ case(mld_ilu_n_) -!!$ write(iout,*) 'Fill level :',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) +!!$ write(iout,*) 'Fill level:',p%baseprecv(2)%iprcparm(mld_sub_fill_in_) !!$ case(mld_ilu_t_) -!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_eps_) +!!$ write(iout,*) 'Fill threshold :',p%baseprecv(2)%dprcparm(mld_fact_thrs_) !!$ case(mld_slu_,mld_umf_,mld_sludist_) !!$ case default !!$ write(iout,*) 'Should never get here!' @@ -781,14 +819,14 @@ contains is_legal_omega = ((ip>=0.0d0).and.(ip<=2.0d0)) return end function is_legal_omega - function is_legal_ml_eps(ip) + function is_legal_fact_thrs(ip) use psb_base_mod real(kind(1.d0)), intent(in) :: ip - logical :: is_legal_ml_eps + logical :: is_legal_fact_thrs - is_legal_ml_eps = (ip>=0.0d0) + is_legal_fact_thrs = (ip>=0.0d0) return - end function is_legal_ml_eps + end function is_legal_fact_thrs subroutine mld_icheck_def(ip,name,id,is_legal) diff --git a/mlprec/mld_zilu_bld.f90 b/mlprec/mld_zilu_bld.f90 index 4ccd46ab..b47bf726 100644 --- a/mlprec/mld_zilu_bld.f90 +++ b/mlprec/mld_zilu_bld.f90 @@ -144,12 +144,37 @@ subroutine mld_zilu_bld(a,desc_a,p,upd,info,blck) endif - ! - ! Ok, factor the matrix. - ! - t5 = psb_wtime() - call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& - & p%d,info,blck=blck) + + select case(p%iprcparm(mld_sub_solve_)) + + case(mld_ilu_n_,mld_milu_n_,mld_ilu_t_) + ! + ! Need to decide what to do with ILU(T). + ! Should we pack the functionality together with ILU(K)? + ! Maybe for mental sanity, a separate routine would be better. + ! + ! Decide what to do with MILU(K) + ! + ! Ok, factor the matrix. + ! + t5 = psb_wtime() + select case(p%iprcparm(mld_sub_fill_in_)) + case(:-1) + ! This is an error. + call psb_errpush(30,name,i_err=(/3,p%iprcparm(mld_sub_fill_in_),0,0,0/)) + goto 9999 + case(0) + call mld_ilu_fct(p%iprcparm(mld_sub_solve_),a,p%av(mld_l_pr_),p%av(mld_u_pr_),& + & p%d,info,blck=blck) + case(1:) + call mld_iluk_fct(p%iprcparm(mld_sub_fill_in_),p%iprcparm(mld_sub_solve_),& + & a,p%av(mld_l_pr_),p%av(mld_u_pr_),p%d,info,blck=blck) + end select + case default + ! If we end up here, something was wrong up in the call chain. + call psb_errpush(4000,name) + goto 9999 + end select if(info/=0) then info=4010 ch_err='mld_ilu_fct' diff --git a/mlprec/mld_zilu_fct.f90 b/mlprec/mld_zilu_fct.f90 index af5ed90f..302f41dc 100644 --- a/mlprec/mld_zilu_fct.f90 +++ b/mlprec/mld_zilu_fct.f90 @@ -164,72 +164,22 @@ contains if(debug) write(0,*)'LUINT Done Allocating TRW' lia2(1) = 1 uia2(1) = 1 - l1=0 - l2=0 + l1 = 0 + l2 = 0 m = ma+mb if(debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb - do i = 1, ma + do i = 1, m if(debug) write(0,*)'LUINT: Loop index ',i,ma d(i) = zzero - ! - ! Here we take a fast shortcut if possible, otherwise - ! use spgtblk, slower but able (in principle) to handle - ! anything. - ! - if (toupper(a%fida)=='CSR') then - do j = a%ia2(i), a%ia2(i+1) - 1 - k = a%ia1(j) - ! write(0,*)'KKKKK',k - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = a%aspk(j) - lia1(l1) = k - else if (k == i) then - d(i) = a%aspk(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = a%aspk(j) - uia1(l2) = k - end if - enddo - + if (i <= ma) then + call ilu_copyin(i,ma,a,i,m,l1,lia1,lia2,laspk,& + & d(i),l2,uia1,uia2,uaspk,ktrw,trw) else - - if ((mod(i,nrb) == 1).or.(nrb==1)) then - irb = min(ma-i+1,nrb) - call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) - if(info.ne.0) then - info=4010 - ch_err='psb_sp_getblk' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > trw%infoa(psb_nnz_)) exit - if (trw%ia1(ktrw) > i) exit - k = trw%ia2(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%aspk(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%aspk(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%aspk(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - - end if - -!!$ + call ilu_copyin(i-ma,mb,b,i,m,l1,lia1,lia2,laspk,& + & d(i),l2,uia1,uia2,uaspk,ktrw,trw) + endif lia2(i+1) = l1 + 1 uia2(i+1) = l2 + 1 @@ -317,145 +267,6 @@ contains enddo enddo - do i = ma+1, m - d(i) = zzero - - - if (toupper(b%fida)=='CSR') then - - do j = b%ia2(i-ma), b%ia2(i-ma+1) - 1 - k = b%ia1(j) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = b%aspk(j) - lia1(l1) = k - else if (k == i) then - d(i) = b%aspk(j) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = b%aspk(j) - uia1(l2) = k - end if - enddo - - else - - if ((mod((i-ma),nrb) == 1).or.(nrb==1)) then - irb = min(m-i+1,nrb) - call psb_sp_getblk(i-ma,b,trw,info,lrw=i-ma+irb-1) - if(info.ne.0) then - info=4010 - ch_err='psb_sp_getblk' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - ktrw=1 - end if - - do - if (ktrw > trw%infoa(psb_nnz_)) exit - if (trw%ia1(ktrw) > i) exit - k = trw%ia2(ktrw) - if ((k < i).and.(k >= 1)) then - l1 = l1 + 1 - laspk(l1) = trw%aspk(ktrw) - lia1(l1) = k - else if (k == i) then - d(i) = trw%aspk(ktrw) - else if ((k > i).and.(k <= m)) then - l2 = l2 + 1 - uaspk(l2) = trw%aspk(ktrw) - uia1(l2) = k - end if - ktrw = ktrw + 1 - enddo - - endif - - - lia2(i+1) = l1 + 1 - uia2(i+1) = l2 + 1 - - dia = d(i) - do kk = lia2(i), lia2(i+1) - 1 - ! - ! compute element alo(i,k) of incomplete factorization - ! - temp = laspk(kk) - k = lia1(kk) - laspk(kk) = temp*d(k) - ! update the rest of row i using alo(i,k) - low1 = kk + 1 - low2 = uia2(i) - updateloopb: do jj = uia2(k), uia2(k+1) - 1 - j = uia1(jj) - ! - if (j < i) then - ! search alo(i,*) for matching index J - do ll = low1, lia2(i+1) - 1 - l = lia1(ll) - if (l > j) then - low1 = ll - exit - else if (l == j) then - laspk(ll) = laspk(ll) - temp*uaspk(jj) - low1 = ll + 1 - cycle updateloopb - end if - enddo - ! - else if (j == i) then - ! j=i update diagonal - dia = dia - temp*uaspk(jj) - cycle updateloopb - ! - else if (j > i) then - ! search aup(i,*) for matching index j - do ll = low2, uia2(i+1) - 1 - l = uia1(ll) - if (l > j) then - low2 = ll - exit - else if (l == j) then - uaspk(ll) = uaspk(ll) - temp*uaspk(jj) - low2 = ll + 1 - cycle updateloopb - end if - enddo - end if - ! - ! If we get here we missed the cycle updateloop, - ! which means that this entry does not match; thus - ! we take it out of diagonal for MILU. - ! - if (ialg == mld_milu_n_) then - dia = dia - temp*uaspk(jj) - end if - enddo updateloopb - enddo - ! - ! - ! Non singularity - ! - if (abs(dia) < epstol) then - ! - ! Pivot too small: unstable factorization - ! - int_err(1) = i - write(ch_err,'(g20.10)') abs(dia) - info = 2 - call psb_errpush(info,name,i_err=int_err,a_err=ch_err) - goto 9999 - else - dia = zone/dia - end if - d(i) = dia - ! Scale row i of upper triangle - do kk = uia2(i), uia2(i+1) - 1 - uaspk(kk) = uaspk(kk)*dia - enddo - enddo - call psb_sp_free(trw,info) if(info.ne.0) then info=4010 @@ -476,4 +287,93 @@ contains end if return end subroutine mld_zilu_fctint + + subroutine ilu_copyin(i,m,a,jd,jmax,l1,lia1,lia2,laspk,& + & dia,l2,uia1,uia2,uaspk,ktrw,trw) + use psb_base_mod + implicit none + type(psb_zspmat_type) :: a,trw + integer :: i, m,ktrw,jd,jmax,l1,l2 + integer :: lia1(:),lia2(:),uia1(:),uia2(:) + complex(kind(1.d0)) :: laspk(:), uaspk(:), dia + type(psb_int_heap) :: heap + + integer :: k,j,info,irb + integer, parameter :: nrb=16 + character(len=20), parameter :: name='mld_dilu_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) of + ! handling anything. + ! + + if (toupper(a%fida)=='CSR') then + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + ! write(0,*)'KKKKK',k + if ((k < jd).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = a%aspk(j) + lia1(l1) = k + else if (k == jd) then + dia = a%aspk(j) + else if ((k > jd).and.(k <= jmax)) then + l2 = l2 + 1 + uaspk(l2) = a%aspk(j) + uia1(l2) = k + end if + enddo + + else + + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if(info.ne.0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((k < jd).and.(k >= 1)) then + l1 = l1 + 1 + laspk(l1) = trw%aspk(ktrw) + lia1(l1) = k + else if (k == jd) then + dia = trw%aspk(ktrw) + else if ((k > jd).and.(k <= jmax)) then + l2 = l2 + 1 + uaspk(l2) = trw%aspk(ktrw) + uia1(l2) = k + end if + ktrw = ktrw + 1 + enddo + + end if + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + end subroutine ilu_copyin + end subroutine mld_zilu_fct diff --git a/mlprec/mld_ziluk_fct.f90 b/mlprec/mld_ziluk_fct.f90 new file mode 100644 index 00000000..1428762b --- /dev/null +++ b/mlprec/mld_ziluk_fct.f90 @@ -0,0 +1,500 @@ +!!$ +!!$ +!!$ MD2P4 +!!$ Multilevel Domain Decomposition Parallel Preconditioner Package for PSBLAS +!!$ for +!!$ Parallel Sparse BLAS v2.0 +!!$ (C) Copyright 2006 Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Daniela di Serafino Second University of Naples +!!$ Pasqua D'Ambra ICAR-CNR +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +subroutine mld_ziluk_fct(fill_in,ialg,a,l,u,d,info,blck) + + ! + ! This routine copies and factors "on the fly" from A and BLCK + ! into L/D/U. + ! + ! + use psb_base_mod + use mld_prec_mod, mld_protect_name => mld_diluk_fct + implicit none + ! .. Scalar Arguments .. + integer, intent(in) :: fill_in, ialg + integer, intent(out) :: info + ! .. Array Arguments .. + type(psb_zspmat_type),intent(in) :: a + type(psb_zspmat_type),intent(inout) :: l,u + type(psb_zspmat_type),intent(in), optional, target :: blck + complex(kind(1.d0)), intent(inout) :: d(:) + ! .. Local Scalars .. + complex(kind(1.d0)) :: dia, temp + integer :: i, j, jj, k, kk, l1, l2, ll, low1, low2,m,ma,err_act + + type(psb_zspmat_type), pointer :: blck_ + character(len=20) :: name, ch_err + logical, parameter :: debug=.false. + + name='mld_diluk_fct' + info = 0 + call psb_erractionsave(err_act) + ! .. Executable Statements .. + ! + if (debug) write(0,*) 'mld_diluk_fct: start' + if (present(blck)) then + blck_ => blck + else + allocate(blck_,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + + call psb_sp_all(0,0,blck_,1,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_all' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + endif + if (debug) write(0,*) 'mld_diluk_fct: calling fctint' + call mld_ziluk_fctint(fill_in,ialg,m,a%m,a,blck_%m,blck_,& + & d,l%aspk,l%ia1,l%ia2,u%aspk,u%ia1,u%ia2,l1,l2,info) + if (info /= 0) then + info=4010 + ch_err='mld_diluk_fctint' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + l%infoa(1) = l1 + l%fida = 'CSR' + l%descra = 'TLU' + u%infoa(1) = l2 + u%fida = 'CSR' + u%descra = 'TUU' + l%m = m + l%k = m + u%m = m + u%k = m + if (present(blck)) then + blck_ => null() + else + call psb_sp_free(blck_,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + deallocate(blck_) + endif + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + +contains + subroutine mld_ziluk_fctint(fill_in,ialg,m,ma,a,mb,b,& + & d,laspk,lia1,lia2,uaspk,uia1,uia2,l1,l2,info) + use psb_base_mod + implicit none + integer, intent(in) :: fill_in, ialg + type(psb_zspmat_type) :: a,b + integer :: m,ma,mb,l1,l2,info + integer, dimension(:), allocatable :: lia1,lia2,uia1,uia2 + complex(kind(1.d0)), dimension(:), allocatable :: laspk,uaspk + complex(kind(1.d0)), dimension(:) :: d + + integer :: i,j,k,l,low1,low2,kk,jj,ll, ktrw,err_act, & + & isz,minj,maxj,lrwk,nidx + complex(kind(1.d0)) :: dia,temp,rwk + integer, allocatable :: uplevs(:), rowlevs(:),idxs(:) + complex(kind(1.d0)), allocatable :: row(:) + type(psb_int_heap) :: heap + + logical,parameter :: debug=.false. + type(psb_zspmat_type) :: trw + integer :: int_err(5) + character(len=20), parameter :: name='mld_diluk_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + m = ma+mb + + ! + ! Temp buffer for copyin function. + ! + if (debug) write(0,*)'LUINT Allocating TRW' + call psb_sp_all(0,0,trw,1,info) + if (info==0) call psb_ensure_size(m+1,lia2,info) + if (info==0) call psb_ensure_size(m+1,uia2,info) + + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='psb_sp_all') + goto 9999 + end if + if (debug) write(0,*)'LUINT Done Allocating TRW' + + l1=0 + l2=0 + lia2(1) = 1 + uia2(1) = 1 + + if (debug) write(0,*)'In DCSRLU Begin cycle',m,ma,mb + ! Initial allocation + allocate(uplevs(size(uaspk)),rowlevs(m),row(m),stat=info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + uplevs(:) = m+1 + row(:) = zzero + rowlevs(:) = m+1 + + do i = 1, m + if (debug.and.(mod(i,500)==1)) write(0,*)'LUINT: Loop index ',i,ma,minj,maxj + if (debug) write(0,*)'LUINT: Loop index ',i,ma + ! + ! At each iteration of the loop we keep the indices affected in a heap + ! initialized and filled in the copyin function, and updated during + ! the elimination. The heap is ideal because at each step we need the + ! lowest index, but we also need to insert new items, and the heap allows + ! to do both in log time. + ! + d(i) = zzero + if (i<=ma) then + call iluk_copyin(i,ma,a,m,row,rowlevs,heap,ktrw,trw) + else + call iluk_copyin(i-ma,mb,b,m,row,rowlevs,heap,ktrw,trw) + endif + + if (debug) write(0,*)'LUINT: input Copy done',minj,maxj + ! Do an elimination step on current row + ! Turns out we only need to keep track of levels + ! for the upper triangle, hence no uplevs variable. + ! + call iluk_fact(fill_in,i,m,row,rowlevs,heap,& + & d,uia1,uia2,uaspk,uplevs,nidx,idxs) + ! + ! Copy the row into the lower/diag/upper structures. + ! + call iluk_copyout(fill_in,ialg,i,m,row,rowlevs,nidx,idxs,& + & l1,l2,lia1,lia2,laspk,d,uia1,uia2,uaspk,uplevs) + + end do + + ! + ! And we're done......hopefully :-) + ! + deallocate(uplevs,rowlevs,row,stat=info) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Deallocate') + goto 9999 + end if + if (info == 0) call psb_sp_free(trw,info) + if (info /= 0) then + info=4010 + ch_err='psb_sp_free' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (debug) write(0,*)'Leaving ilu_fct' + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + end subroutine mld_ziluk_fctint + + + subroutine iluk_copyin(i,m,a,jmax,row,rowlevs,heap,ktrw,trw) + use psb_base_mod + implicit none + type(psb_zspmat_type) :: a,trw + integer :: i, rowlevs(:),m,ktrw,jmax + complex(kind(1.d0)) :: row(:) + type(psb_int_heap) :: heap + + integer :: k,j,info,irb + integer, parameter :: nrb=16 + character(len=20), parameter :: name='mld_diluk_fctint' + character(len=20) :: ch_err + + if (psb_get_errstatus() /= 0) return + info=0 + call psb_erractionsave(err_act) + + ! + ! Here we take a fast shortcut if possible, otherwise + ! use spgtblk, slower but able (in principle) to handle + ! anything. + ! + if (toupper(a%fida)=='CSR') then + call psb_init_heap(heap,info) + + do j = a%ia2(i), a%ia2(i+1) - 1 + k = a%ia1(j) + if ((1<=k).and.(k<=jmax)) then + row(k) = a%aspk(j) + rowlevs(k) = 0 + call psb_insert_heap(k,heap,info) + end if + end do + else + if ((mod(i,nrb) == 1).or.(nrb==1)) then + irb = min(m-i+1,nrb) + call psb_sp_getblk(i,a,trw,info,lrw=i+irb-1) + if (info /= 0) then + info=4010 + ch_err='psb_sp_getblk' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + ktrw=1 + end if + + do + if (ktrw > trw%infoa(psb_nnz_)) exit + if (trw%ia1(ktrw) > i) exit + k = trw%ia2(ktrw) + if ((1<=k).and.(k<=jmax)) then + row(k) = trw%aspk(ktrw) + rowlevs(k) = 0 + call psb_insert_heap(k,heap,info) + end if + ktrw = ktrw + 1 + enddo + end if + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + + end subroutine iluk_copyin + + + subroutine iluk_fact(fill_in,i,m,row,rowlevs,heap,d,uia1,uia2,uaspk,uplevs,nidx,idxs) + use psb_base_mod + implicit none + type(psb_zspmat_type) :: a + type(psb_int_heap) :: heap + integer :: i,m, rowlevs(:),minj,maxj,fill_in,nidx + integer, allocatable :: idxs(:) + integer :: uia1(:),uia2(:),uplevs(:) + complex(kind(1.d0)) :: row(:), uaspk(:),d(:) + + integer :: k,j,lrwk,jj,info, lastk + complex(kind(1.d0)) :: rwk + + ! Do an elimination step on current row + ! Turns out we only need to keep track of levels + ! for the upper triangle. + if (.not.allocated(idxs)) then + allocate(idxs(200),stat=info) + endif + nidx = 0 + lastk = -1 + ! + ! Do while there are indices to be processed + ! + do + call psb_heap_get_first(k,heap,info) + if (info < 0) exit + ! + ! An index may have been put on the heap more than once. + ! + if (k == lastk) cycle + lastk = k + nidx = nidx + 1 + if (nidx>size(idxs)) then + call psb_realloc(nidx+psb_heap_resize,idxs,info) + end if + idxs(nidx) = k + if ((row(k) /= zzero).and.(rowlevs(k) <= fill_in).and.(ki) then + ! Upper part + if (rowlevs(j) <= fill_in) then + l2 = l2 + 1 + if (size(uaspk) < l2) then + ! Figure out a good reallocation size!! + isz = max((l2/i)*m,int(1.2*l2),l2+100) + call psb_realloc(isz,uaspk,info) + if (info == 0) call psb_realloc(isz,uia1,info) + if (info == 0) call psb_realloc(isz,uplevs,info,pad=(m+1)) + if (info /= 0) then + info=4010 + call psb_errpush(info,name,a_err='Allocate') + goto 9999 + end if + end if + uia1(l2) = j + uaspk(l2) = row(j) + uplevs(l2) = rowlevs(j) + else if (ialg == mld_milu_n_) then + d(i) = d(i) + row(j) + end if + row(j) = zzero + rowlevs(j) = m+1 + end if + end do + + lia2(i+1) = l1 + 1 + uia2(i+1) = l2 + 1 + + if (abs(d(i)) < epstol) then + ! + ! Pivot too small: unstable factorization + ! + info = 2 + int_err(1) = i + write(ch_err,'(g20.10)') d(i) + call psb_errpush(info,name,i_err=int_err,a_err=ch_err) + goto 9999 + else + d(i) = zone/d(i) + end if + ! Now scale upper part + do j=uia2(i), uia2(i+1)-1 + uaspk(j) = d(i)*uaspk(j) + end do + + call psb_erractionrestore(err_act) + return +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + + end subroutine iluk_copyout + + +end subroutine mld_ziluk_fct diff --git a/mlprec/mld_zmlprec_bld.f90 b/mlprec/mld_zmlprec_bld.f90 index 622d5409..3dd46834 100644 --- a/mlprec/mld_zmlprec_bld.f90 +++ b/mlprec/mld_zmlprec_bld.f90 @@ -85,7 +85,7 @@ subroutine mld_zmlprec_bld(a,desc_a,p,info) case(mld_ilu_n_) call mld_check_def(p%iprcparm(mld_sub_fill_in_),'Level',0,is_legal_ml_lev) case(mld_ilu_t_) - call mld_check_def(p%dprcparm(mld_fact_eps_),'Eps',dzero,is_legal_ml_eps) + call mld_check_def(p%dprcparm(mld_fact_thrs_),'Eps',dzero,is_legal_fact_thrs) end select call mld_check_def(p%dprcparm(mld_aggr_damp_),'omega',dzero,is_legal_omega) call mld_check_def(p%iprcparm(mld_smooth_sweeps_),'Jacobi sweeps',& diff --git a/mlprec/mld_zprecinit.f90 b/mlprec/mld_zprecinit.f90 index 1d70870c..1e41af90 100644 --- a/mlprec/mld_zprecinit.f90 +++ b/mlprec/mld_zprecinit.f90 @@ -67,10 +67,10 @@ subroutine mld_zprecinit(p,ptype,info,nlev) p%baseprecv(ilev_)%iprcparm(:) = 0 p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_noprec_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_f_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 case ('DIAG') @@ -80,12 +80,13 @@ subroutine mld_zprecinit(p,ptype,info,nlev) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return + p%baseprecv(ilev_)%iprcparm(:) = 0 p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_diag_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_f_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 case ('BJAC') @@ -95,34 +96,35 @@ subroutine mld_zprecinit(p,ptype,info,nlev) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return - p%baseprecv(ilev_)%iprcparm(:) = 0 - p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ - p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ + p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 - case ('ASM','AS') + case ('AS') nlev_ = 1 ilev_ = 1 allocate(p%baseprecv(nlev_),stat=info) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return + p%baseprecv(ilev_)%iprcparm(:) = 0 p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 1 - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 - case ('MLD', 'ML') + case ('ML') if (present(nlev)) then nlev_ = max(1,nlev) @@ -137,13 +139,14 @@ subroutine mld_zprecinit(p,ptype,info,nlev) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return + p%baseprecv(ilev_)%iprcparm(:) = 0 p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_as_ p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_halo_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 1 - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 if (nlev_ == 1) return @@ -151,41 +154,43 @@ subroutine mld_zprecinit(p,ptype,info,nlev) if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return - p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_ml_type_) = mld_mult_ml_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_alg_) = mld_dec_aggr_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_kind_) = mld_smooth_prol_ - p%baseprecv(ilev_)%iprcparm(mld_coarse_mat_) = mld_distr_mat_ - p%baseprecv(ilev_)%iprcparm(mld_smooth_pos_) = mld_post_smooth_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_eig_) = mld_max_norm_ - p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 - p%baseprecv(ilev_)%dprcparm(mld_aggr_damp_) = 4.d0/3.d0 + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_ml_type_) = mld_mult_ml_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_alg_) = mld_dec_aggr_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_kind_) = mld_smooth_prol_ + p%baseprecv(ilev_)%iprcparm(mld_coarse_mat_) = mld_distr_mat_ + p%baseprecv(ilev_)%iprcparm(mld_smooth_pos_) = mld_post_smooth_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_eig_) = mld_max_norm_ + p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_ilu_n_ + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 1 + p%baseprecv(ilev_)%dprcparm(mld_aggr_damp_) = 4.d0/3.d0 end do ilev_ = nlev_ if (info == 0) call psb_realloc(mld_ifpsz_,p%baseprecv(ilev_)%iprcparm,info) if (info == 0) call psb_realloc(mld_dfpsz_,p%baseprecv(ilev_)%dprcparm,info) if (info /= 0) return - p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ - p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ - p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_ml_type_) = mld_mult_ml_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_alg_) = mld_dec_aggr_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_kind_) = mld_smooth_prol_ - p%baseprecv(ilev_)%iprcparm(mld_coarse_mat_) = mld_distr_mat_ - p%baseprecv(ilev_)%iprcparm(mld_smooth_pos_) = mld_post_smooth_ - p%baseprecv(ilev_)%iprcparm(mld_aggr_eig_) = mld_max_norm_ - p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_umf_ - p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 - p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 4 - p%baseprecv(ilev_)%dprcparm(mld_aggr_damp_) = 4.d0/3.d0 + p%baseprecv(ilev_)%iprcparm(:) = 0 + p%baseprecv(ilev_)%iprcparm(mld_prec_type_) = mld_bjac_ + p%baseprecv(ilev_)%iprcparm(mld_sub_restr_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_prol_) = psb_none_ + p%baseprecv(ilev_)%iprcparm(mld_sub_ren_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_n_ovr_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_ml_type_) = mld_mult_ml_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_alg_) = mld_dec_aggr_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_kind_) = mld_smooth_prol_ + p%baseprecv(ilev_)%iprcparm(mld_coarse_mat_) = mld_distr_mat_ + p%baseprecv(ilev_)%iprcparm(mld_smooth_pos_) = mld_post_smooth_ + p%baseprecv(ilev_)%iprcparm(mld_aggr_eig_) = mld_max_norm_ + p%baseprecv(ilev_)%iprcparm(mld_sub_solve_) = mld_umf_ + p%baseprecv(ilev_)%iprcparm(mld_sub_fill_in_) = 0 + p%baseprecv(ilev_)%iprcparm(mld_smooth_sweeps_) = 4 + p%baseprecv(ilev_)%dprcparm(mld_aggr_damp_) = 4.d0/3.d0 case default write(0,*) 'Unknown preconditioner type request "',ptype,'"' diff --git a/mlprec/mld_zprecset.f90 b/mlprec/mld_zprecset.f90 index 4f44ee13..6472adbe 100644 --- a/mlprec/mld_zprecset.f90 +++ b/mlprec/mld_zprecset.f90 @@ -207,13 +207,15 @@ subroutine mld_zprecsetd(p,what,val,info,ilev) select case(what) ! Right now we don't have any at base level. Will change when ! we implement mld_ilu_t_ + case(mld_fact_thrs_) + p%baseprecv(ilev_)%dprcparm(what) = val case default write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' info = -2 end select else if (ilev_ > 1) then select case(what) - case(mld_aggr_damp_) + case(mld_aggr_damp_,mld_fact_thrs_) p%baseprecv(ilev_)%dprcparm(what) = val case default write(0,*) 'Error: trying to call PRECSET with an invalid WHAT' diff --git a/test/fileread/Makefile b/test/fileread/Makefile index 4f3e21ce..d18ca639 100644 --- a/test/fileread/Makefile +++ b/test/fileread/Makefile @@ -22,6 +22,7 @@ all: df_bench zf_bench df_bench.o zf_bench.o: getp.o getp.o: precdata.o + df_bench: $(DFOBJS) $(F90LINK) $(LINKOPT) $(DFOBJS) -o df_bench \ $(MLD_LIB) $(PSBLAS_LIB) $(LDLIBS) diff --git a/test/fileread/df_bench.f90 b/test/fileread/df_bench.f90 index 1aa8ddab..b84ff5ef 100644 --- a/test/fileread/df_bench.f90 +++ b/test/fileread/df_bench.f90 @@ -218,20 +218,24 @@ program df_bench if (precs(pp)%omega>=0.0) then call mld_precset(pre,mld_aggr_damp_,precs(pp)%omega,info,ilev=nlev) end if - call mld_precset(pre,mld_ml_type_, precs(pp)%mltype, info,ilev=nlev) - call mld_precset(pre,mld_aggr_alg_, precs(pp)%aggr, info,ilev=nlev) - call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) - call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) - call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) - call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) - call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind,info,ilev=nlev) + call mld_precset(pre,mld_ml_type_, precs(pp)%mltype, info,ilev=nlev) + call mld_precset(pre,mld_aggr_alg_, precs(pp)%aggr, info,ilev=nlev) + call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) + call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) + call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) + call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill2, info,ilev=nlev) + call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr2, info,ilev=nlev) + call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) + call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind, info,ilev=nlev) else call mld_precinit(pre,precs(pp)%lv1,info) end if - call mld_precset(pre,mld_n_ovr_, precs(pp)%novr,info ,ilev=1) - call mld_precset(pre,mld_sub_restr_, precs(pp)%restr,info ,ilev=1) - call mld_precset(pre,mld_sub_prol_, precs(pp)%prol,info ,ilev=1) - call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1,info ,ilev=1) + call mld_precset(pre,mld_n_ovr_, precs(pp)%novr, info,ilev=1) + call mld_precset(pre,mld_sub_restr_, precs(pp)%restr, info,ilev=1) + call mld_precset(pre,mld_sub_prol_, precs(pp)%prol, info,ilev=1) + call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1, info,ilev=1) + call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill1, info,ilev=1) + call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr1, info,ilev=1) ! setting initial guess to zero @@ -293,7 +297,6 @@ program df_bench & write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') & & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& & trim(cmethd),trim(precs(pp)%descr) - call flush(0) if (nt.lt.ntry) call mld_precfree(pre,info) if((t2+tprec).lt.mttot) then mtslv=t2 @@ -386,8 +389,6 @@ program df_bench call psb_gefree(b_col, desc_a,info) call psb_gefree(x_col, desc_a,info) call psb_spfree(a, desc_a,info) - write(0,*) 'Final cdfree' - call flush(0) call psb_cdfree(desc_a,info) deallocate(r_col,stat=info) deallocate(aux_b,stat=info) diff --git a/test/fileread/getp.f90 b/test/fileread/getp.f90 index 45c35110..1b4d2f24 100644 --- a/test/fileread/getp.f90 +++ b/test/fileread/getp.f90 @@ -21,10 +21,10 @@ contains type(precdata),allocatable :: precs(:) integer :: iret, istopc,itmax,itrace,ipart,nmat,nprecs,irst,irnum,ntry character(len=1024) :: charbuf - real(kind(1.d0)) :: eps, omega + real(kind(1.d0)) :: eps, omega,thr1,thr2 character :: afmt*5, lv1*10, lv2*10, pdescr*40 integer :: iam, nm, np, i, idx - integer, parameter :: npparms=12 + integer, parameter :: npparms=14 integer :: inparms(40), ip, pparms(npparms) call psb_info(icontxt,iam,np) @@ -81,7 +81,12 @@ contains end do idx=index(charbuf," ") read(charbuf(1:idx),*) omega - + charbuf=adjustl(charbuf(idx:)) + idx=index(charbuf," ") + read(charbuf(1:idx),*) thr1 + charbuf=adjustl(charbuf(idx:)) + idx=index(charbuf," ") + read(charbuf(1:idx),*) thr2 charbuf=adjustl(charbuf(idx:)) read(charbuf,'(a)') pdescr @@ -92,6 +97,8 @@ contains call psb_bcast(icontxt,lv2) call psb_bcast(icontxt,pparms(1:npparms),0) call psb_bcast(icontxt,omega,0) + call psb_bcast(icontxt,thr1,0) + call psb_bcast(icontxt,thr2,0) precs(np)%lv1 = lv1 precs(np)%lv2 = lv2 @@ -99,15 +106,19 @@ contains precs(np)%restr = pparms(2) precs(np)%prol = pparms(3) precs(np)%ftype1 = pparms(4) - precs(np)%mltype = pparms(5) - precs(np)%aggr = pparms(6) - precs(np)%smthkind = pparms(7) - precs(np)%cmat = pparms(8) - precs(np)%smthpos = pparms(9) - precs(np)%ftype2 = pparms(10) - precs(np)%jswp = pparms(11) - precs(np)%nlev = pparms(12) + precs(np)%fill1 = pparms(5) + precs(np)%mltype = pparms(6) + precs(np)%aggr = pparms(7) + precs(np)%smthkind = pparms(8) + precs(np)%cmat = pparms(9) + precs(np)%smthpos = pparms(10) + precs(np)%ftype2 = pparms(11) + precs(np)%fill2 = pparms(12) + precs(np)%jswp = pparms(13) + precs(np)%nlev = pparms(14) precs(np)%omega = omega + precs(np)%thr1 = thr1 + precs(np)%thr2 = thr2 end do read(*,*) nmat @@ -154,6 +165,8 @@ contains call psb_bcast(icontxt,pparms(1:npparms)) call psb_bcast(icontxt,omega) + call psb_bcast(icontxt,thr1,0) + call psb_bcast(icontxt,thr2,0) precs(np)%lv1 = lv1 precs(np)%lv2 = lv2 @@ -161,15 +174,19 @@ contains precs(np)%restr = pparms(2) precs(np)%prol = pparms(3) precs(np)%ftype1 = pparms(4) - precs(np)%mltype = pparms(5) - precs(np)%aggr = pparms(6) - precs(np)%smthkind = pparms(7) - precs(np)%cmat = pparms(8) - precs(np)%smthpos = pparms(9) - precs(np)%ftype2 = pparms(10) - precs(np)%jswp = pparms(11) - precs(np)%nlev = pparms(12) + precs(np)%fill1 = pparms(5) + precs(np)%mltype = pparms(6) + precs(np)%aggr = pparms(7) + precs(np)%smthkind = pparms(8) + precs(np)%cmat = pparms(9) + precs(np)%smthpos = pparms(10) + precs(np)%ftype2 = pparms(11) + precs(np)%fill2 = pparms(12) + precs(np)%jswp = pparms(13) + precs(np)%nlev = pparms(14) precs(np)%omega = omega + precs(np)%thr1 = thr1 + precs(np)%thr2 = thr2 end do diff --git a/test/fileread/precdata.f90 b/test/fileread/precdata.f90 index 08bbb563..245ea83c 100644 --- a/test/fileread/precdata.f90 +++ b/test/fileread/precdata.f90 @@ -7,6 +7,8 @@ module precd integer :: restr ! restriction over application of as integer :: prol ! prolongation over application of as integer :: ftype1 ! Factorization type: ILU, SuperLU, UMFPACK. + integer :: fill1 ! Fill-in for factorization 1 + real(kind(1.d0)) :: thr1 ! Threshold for fact. 1 ILU(T) integer :: mltype ! additive or multiplicative 2nd level prec integer :: aggr ! local or global aggregation integer :: smthkind ! smoothing type @@ -14,7 +16,9 @@ module precd integer :: smthpos ! pre, post, both smoothing integer :: glbsmth ! global smoothing integer :: ftype2 ! Factorization type: ILU, SuperLU, UMFPACK. - integer :: jswp ! jacobi sweeps + integer :: fill2 ! Fill-in for factorization 1 + real(kind(1.d0)) :: thr2 ! Threshold for fact. 1 ILU(T) + integer :: jswp ! Jacobi sweeps real(kind(1.d0)) :: omega ! smoother omega character(len=40) :: descr ! verbose description of the prec end type precdata diff --git a/test/fileread/runs/drt.sh b/test/fileread/runs/drt.sh index a56ab302..b9149753 100755 --- a/test/fileread/runs/drt.sh +++ b/test/fileread/runs/drt.sh @@ -29,37 +29,37 @@ CSR Matrix format 30 IRST Restart parameter for GMRES and BiCGSTAB(L) 0 RENUM: 0: none 1: global indices (2: GPS band reduction) $ntry NTRY for each comb. print out best timings -30 NPRCS/nov rstr prol fty1 mlty agg smth cm smp fty2 jswp nl omg -noprec none 0 0 0 0 0 0 0 0 0 0 0 1 -1.0 NOPREC -diag none 0 0 0 1 2 0 1 0 2 1 4 1 -1.0 DIAG -bjac none 0 0 0 1 2 0 1 0 2 1 4 1 -1.0 BJAC -as none 0 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS -as none 1 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS -as none 2 1 0 1 2 0 1 0 2 1 4 1 -1.0 RAS -as ml 0 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 -as ml 1 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 -as ml 2 1 0 1 2 0 1 0 2 1 4 2 -1.0 2L-M-RAS-I-D4 -as ml 0 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 -as ml 1 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 -as ml 2 1 0 1 2 0 1 0 2 5 4 2 -1.0 2L-M-RAS-U-D4 -as ml 0 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 -as ml 1 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 -as ml 2 1 0 1 2 0 1 0 2 1 4 3 -1.0 3L-M-RAS-I-D4 -as ml 0 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 -as ml 1 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 -as ml 2 1 0 1 2 0 1 0 2 5 4 3 -1.0 3L-M-RAS-U-D4 -as ml 0 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R -as ml 1 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R -as ml 2 1 0 1 2 0 1 1 2 1 1 2 -1.0 2L-M-RAS-I-R -as ml 0 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R -as ml 1 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R -as ml 2 1 0 1 2 0 1 1 2 5 1 2 -1.0 2L-M-RAS-U-R -as ml 0 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R -as ml 1 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R -as ml 2 1 0 1 2 0 1 1 2 1 1 3 -1.0 3L-M-RAS-I-R -as ml 0 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R -as ml 1 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R -as ml 2 1 0 1 2 0 1 1 2 5 1 3 -1.0 3L-M-RAS-U-R +30 NPRCS nov rst prl fc1 fl1 mlt agg smt cm smp ft2 fl2 jsw nl omg th1 th2 name +none none 0 0 0 0 0 0 0 0 0 0 0 0 0 1 -1.0 1e-4 1e-4 NOPREC +diag none 0 0 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 DIAG +bjac none 0 0 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 BJAC +as none 0 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS +as none 1 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS +as none 2 1 0 1 0 2 0 1 0 2 1 0 4 1 -1.0 1e-4 1e-4 RAS +as ml 0 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4 +as ml 1 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4 +as ml 2 1 0 1 0 2 0 1 0 2 1 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-I-D4 +as ml 0 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4 +as ml 1 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4 +as ml 2 1 0 1 0 2 0 1 0 2 5 0 4 2 -1.0 1e-4 1e-4 2L-M-RAS-U-D4 +as ml 0 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4 +as ml 1 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4 +as ml 2 1 0 1 0 2 0 1 0 2 1 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-I-D4 +as ml 0 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4 +as ml 1 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4 +as ml 2 1 0 1 0 2 0 1 0 2 5 0 4 3 -1.0 1e-4 1e-4 3L-M-RAS-U-D4 +as ml 0 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R +as ml 1 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R +as ml 2 1 0 1 0 2 0 1 1 2 1 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-I-R +as ml 0 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R +as ml 1 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R +as ml 2 1 0 1 0 2 0 1 1 2 5 0 1 2 -1.0 1e-4 1e-4 2L-M-RAS-U-R +as ml 0 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R +as ml 1 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R +as ml 2 1 0 1 0 2 0 1 1 2 1 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-I-R +as ml 0 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R +as ml 1 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R +as ml 2 1 0 1 0 2 0 1 1 2 5 0 1 3 -1.0 1e-4 1e-4 3L-M-RAS-U-R 2 Number of matrices thm50x30.mtx none thm200x120.mtx none diff --git a/test/fileread/zf_bench.f90 b/test/fileread/zf_bench.f90 index d63f159c..9b35b373 100644 --- a/test/fileread/zf_bench.f90 +++ b/test/fileread/zf_bench.f90 @@ -35,8 +35,8 @@ program zf_bench logical :: amroot, out1, out2 ! solver paramters - integer :: iter, itmax, ierr, itrace, ircode, ipart,& - & methd, istopc, iprec, ml, irnum, irst, ntry, nmat, ilev,nlev + integer :: iter, itmax, ierr, itrace, ircode, ipart,nlev,& + & methd, istopc, iprec, ml, irnum, irst, ntry, nmat, ilev,ipsize,asize,cdsize real(kind(1.d0)) :: err, eps character(len=5) :: afmt @@ -203,21 +203,24 @@ program zf_bench if (precs(pp)%omega>=0.0) then call mld_precset(pre,mld_aggr_damp_,precs(pp)%omega,info,ilev=nlev) end if - call mld_precset(pre,mld_ml_type_, precs(pp)%mltype, info,ilev=nlev) - call mld_precset(pre,mld_aggr_alg_, precs(pp)%aggr, info,ilev=nlev) - call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) - call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) - call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) - call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) - call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind,info,ilev=nlev) + call mld_precset(pre,mld_ml_type_, precs(pp)%mltype, info,ilev=nlev) + call mld_precset(pre,mld_aggr_alg_, precs(pp)%aggr, info,ilev=nlev) + call mld_precset(pre,mld_coarse_mat_, precs(pp)%cmat, info,ilev=nlev) + call mld_precset(pre,mld_smooth_pos_, precs(pp)%smthpos, info,ilev=nlev) + call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype2, info,ilev=nlev) + call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill2, info,ilev=nlev) + call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr2, info,ilev=nlev) + call mld_precset(pre,mld_smooth_sweeps_, precs(pp)%jswp, info,ilev=nlev) + call mld_precset(pre,mld_aggr_kind_, precs(pp)%smthkind, info,ilev=nlev) else call mld_precinit(pre,precs(pp)%lv1,info) end if - call mld_precset(pre,mld_n_ovr_, precs(pp)%novr,info ,ilev=1) - call mld_precset(pre,mld_sub_restr_, precs(pp)%restr,info ,ilev=1) - call mld_precset(pre,mld_sub_prol_, precs(pp)%prol,info ,ilev=1) - call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1,info ,ilev=1) - + call mld_precset(pre,mld_n_ovr_, precs(pp)%novr, info,ilev=1) + call mld_precset(pre,mld_sub_restr_, precs(pp)%restr, info,ilev=1) + call mld_precset(pre,mld_sub_prol_, precs(pp)%prol, info,ilev=1) + call mld_precset(pre,mld_sub_solve_, precs(pp)%ftype1, info,ilev=1) + call mld_precset(pre,mld_sub_fill_in_, precs(pp)%fill1, info,ilev=1) + call mld_precset(pre,mld_fact_thrs_, precs(pp)%thr1, info,ilev=1) ! setting initial guess to zero @@ -248,8 +251,8 @@ program zf_bench call psb_barrier(ictxt) t1 = psb_wtime() call psb_krylov(cmethd,a,pre,b_col,x_col,eps,desc_a,info,& - & itmax,iter,err,itrace,irst=ml,istop=istopc) - + & itmax=itmax,iter=iter,err=err,itrace=itrace,& + & irst=irst,istop=istopc) call psb_barrier(ictxt) t2 = psb_wtime() - t1 call psb_amx(ictxt,t2) @@ -263,15 +266,14 @@ program zf_bench !!$ call flush(6) !!$ call psb_barrier(ictxt) if(amroot.and.out2) & - & write(10,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a)') & + & write(10,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') & & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& - & trim(precs(pp)%descr) + & trim(cmethd),trim(precs(pp)%descr) if(amroot) & - & write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a)') & + & write(0,'(a20,2(1x,i3),1x,i5,3(1x,g9.4),1x,a8,1x,a)') & & mtrx(nm),np,precs(pp)%novr,iter,tprec,t2,t2+tprec,& - & trim(precs(pp)%descr) - - if(nt.lt.ntry) call mld_precfree(pre,info) + & trim(cmethd),trim(precs(pp)%descr) + if (nt.lt.ntry) call mld_precfree(pre,info) if((t2+tprec).lt.mttot) then mtslv=t2 mtprec=tprec @@ -285,6 +287,13 @@ program zf_bench call psb_spmm(-zone,a,x_col,zone,r_col,desc_a,info) call psb_genrm2s(resmx,r_col,desc_a,info) call psb_geamaxs(resmxp,r_col,desc_a,info) + + ipsize = mld_sizeof(pre) + asize = psb_sizeof(a) + cdsize = psb_sizeof(desc_a) + call psb_sum(ictxt,ipsize) + call psb_sum(ictxt,asize) + call psb_sum(ictxt,cdsize) if (amroot) then write(*,'("Matrix : ",a)') mtrx(nm) @@ -302,14 +311,19 @@ program zf_bench write(*,'("Total time : ",es10.4)')mttot write(*,'("Residual norm 2 : ",es10.4)')resmx write(*,'("Residual norm inf : ",es10.4)')resmxp + write(*,'("Total memory occupation for A: ",i10)')asize + write(*,'("Total memory occupation for DESC_A: ",i10)')cdsize + write(*,'("Total memory occupation for PRE: ",i10)')ipsize + write(*,'(72("="))') write(*,'(" ")') write(*,'(" ")') write(*,'(" ")') - if(out1) write(8,'(a20,2(1x,i3),1x,i5,5(1x,g9.4),1x,a)') mtrx(nm),& + if(out1) write(8,'(a20,2(1x,i3),1x,i5,5(1x,g9.4),1x,a8,1x,a)') mtrx(nm),& & np,precs(pp)%novr,& - & iter,mtprec,mtslv,mttot,resmx,resmxp,trim(precs(pp)%descr) + & iter,mtprec,mtslv,mttot,resmx,resmxp,& + & trim(cmethd),trim(precs(pp)%descr) end if call mld_precfree(pre,info)