From e8b8c53ffeb944b9d4edec10dbc70e28521b3c59 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Wed, 24 Jun 2009 11:13:52 +0000 Subject: [PATCH] mlprec/Makefile mlprec/mld_daggrmat_asb.f90 mlprec/mld_daggrmat_minnrg_asb.F90 mlprec/mld_dmlprec_aply.f90 mlprec/mld_dprecinit.F90 mlprec/mld_dprecset.f90 mlprec/mld_inner_mod.f90 mlprec/mld_prec_type.f90 mld2p4: First attempt at implementing MIN ENERGY smoother. Only for double real so far. --- mlprec/Makefile | 2 +- mlprec/mld_daggrmat_asb.f90 | 8 + mlprec/mld_daggrmat_minnrg_asb.F90 | 854 +++++++++++++++++++++++++++++ mlprec/mld_dmlprec_aply.f90 | 656 ++++++++++++++++++---- mlprec/mld_dprecinit.F90 | 8 + mlprec/mld_dprecset.f90 | 6 +- mlprec/mld_inner_mod.f90 | 12 + mlprec/mld_prec_type.f90 | 11 +- 8 files changed, 1451 insertions(+), 106 deletions(-) create mode 100644 mlprec/mld_daggrmat_minnrg_asb.F90 diff --git a/mlprec/Makefile b/mlprec/Makefile index 814d55bc..5f7a6d0c 100644 --- a/mlprec/Makefile +++ b/mlprec/Makefile @@ -8,7 +8,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(LIBDIR) $(FMFLAG)$(PSBLIBDIR) MODOBJS=mld_prec_type.o mld_prec_mod.o mld_inner_mod.o mld_move_alloc_mod.o MPFOBJS=mld_saggrmat_nosmth_asb.o mld_saggrmat_smth_asb.o \ - mld_daggrmat_nosmth_asb.o mld_daggrmat_smth_asb.o \ + mld_daggrmat_nosmth_asb.o mld_daggrmat_smth_asb.o mld_daggrmat_minnrg_asb.o\ mld_caggrmat_nosmth_asb.o mld_caggrmat_smth_asb.o \ mld_zaggrmat_nosmth_asb.o mld_zaggrmat_smth_asb.o MPCOBJS=mld_sslud_interface.o mld_dslud_interface.o mld_cslud_interface.o mld_zslud_interface.o diff --git a/mlprec/mld_daggrmat_asb.f90 b/mlprec/mld_daggrmat_asb.f90 index 2c4fb063..58914e5d 100644 --- a/mlprec/mld_daggrmat_asb.f90 +++ b/mlprec/mld_daggrmat_asb.f90 @@ -143,6 +143,14 @@ subroutine mld_daggrmat_asb(a,desc_a,ilaggr,nlaggr,p,info) goto 9999 end if + case(mld_min_energy_) + + call mld_aggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,p,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='mld_aggrmat_smth_asb') + goto 9999 + end if + case default call psb_errpush(4001,name,a_err='Invalid aggr kind') diff --git a/mlprec/mld_daggrmat_minnrg_asb.F90 b/mlprec/mld_daggrmat_minnrg_asb.F90 new file mode 100644 index 00000000..5ae31b83 --- /dev/null +++ b/mlprec/mld_daggrmat_minnrg_asb.F90 @@ -0,0 +1,854 @@ +!!$ +!!$ +!!$ MLD2P4 version 1.1 +!!$ MultiLevel Domain Decomposition Parallel Preconditioners Package +!!$ based on PSBLAS (Parallel Sparse BLAS version 2.3.1) +!!$ +!!$ (C) Copyright 2008,2009 +!!$ +!!$ Salvatore Filippone University of Rome Tor Vergata +!!$ Alfredo Buttari University of Rome Tor Vergata +!!$ Pasqua D'Ambra ICAR-CNR, Naples +!!$ Daniela di Serafino Second University of Naples +!!$ +!!$ Redistribution and use in source and binary forms, with or without +!!$ modification, are permitted provided that the following conditions +!!$ are met: +!!$ 1. Redistributions of source code must retain the above copyright +!!$ notice, this list of conditions and the following disclaimer. +!!$ 2. Redistributions in binary form must reproduce the above copyright +!!$ notice, this list of conditions, and the following disclaimer in the +!!$ documentation and/or other materials provided with the distribution. +!!$ 3. The name of the MLD2P4 group or the names of its contributors may +!!$ not be used to endorse or promote products derived from this +!!$ software without specific written permission. +!!$ +!!$ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +!!$ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +!!$ TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +!!$ PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE MLD2P4 GROUP OR ITS CONTRIBUTORS +!!$ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +!!$ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +!!$ SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +!!$ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +!!$ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +!!$ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +!!$ POSSIBILITY OF SUCH DAMAGE. +!!$ +!!$ +! File: mld_daggrmat_minnrg_asb.F90 +! +! Subroutine: mld_daggrmat_minnrg_asb +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! The prolongator P_C is built according to a smoothed aggregation algorithm, +! i.e. it is obtained by applying a damped Jacobi smoother to the piecewise +! constant interpolation operator P corresponding to the fine-to-coarse level +! mapping built by the mld_aggrmap_bld subroutine: +! +! P_C = (I - omega*D^(-1)A) * P, +! +! where D is the diagonal matrix with main diagonal equal to the main diagonal +! of A, and omega is a suitable smoothing parameter. An estimate of the spectral +! radius of D^(-1)A, to be used in the computation of omega, is provided, +! according to the value of p%iprcparm(mld_aggr_omega_alg_), specified by the user +! through mld_dprecinit and mld_dprecset. +! +! This routine can also build A_C according to a "bizarre" aggregation algorithm, +! using a "naive" prolongator proposed by the authors of MLD2P4. However, this +! prolongator still requires a deep analysis and testing and its use is not +! recommended. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%iprcparm(mld_coarse_mat_), +! specified by the user through mld_dprecinit and mld_dprecset. +! +! For more details see +! M. Brezina and P. Vanek, A black-box iterative solver based on a +! two-level Schwarz method, Computing, 63 (1999), 233-263. +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math. +! 57 (2007), 1181-1196. +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(mld_donelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), allocatable. +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. +! nlaggr - integer, dimension(:), allocatable. +! nlaggr(i) contains the aggregates held by process i. +! info - integer, output. +! Error code. +! +subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,p,info) + use psb_base_mod + use mld_inner_mod, mld_protect_name => mld_daggrmat_minnrg_asb + +#ifdef MPI_MOD + use mpi +#endif + implicit none +#ifdef MPI_H + include 'mpif.h' +#endif + + ! Arguments + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(inout) :: ilaggr(:), nlaggr(:) + type(mld_donelev_type), intent(inout), target :: p + integer, intent(out) :: info + + ! Local variables + type(psb_dspmat_type) :: b + integer, allocatable :: nzbr(:), idisp(:) + integer :: nrow, nglob, ncol, ntaggr, nzac, ip, ndx,& + & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrt + integer ::ictxt,np,me, err_act, icomm + character(len=20) :: name + type(psb_dspmat_type) :: am1,am2, af, ptilde, rtilde + type(psb_dspmat_type) :: am3,am4, ap, adap,atmp,rada, ra, atmp2 + real(psb_dpk_), allocatable :: adiag(:), pj(:), xj(:), yj(:), omf(:),omp(:),omi(:),& + & oden(:), adinv(:) + logical :: ml_global_nmb, filter_mat + integer :: debug_level, debug_unit + integer, parameter :: ncmax=16 + real(psb_dpk_) :: omega, anorm, tmp, dg, theta, alpha,beta, ommx + + name='mld_aggrmat_minnrg_asb' + if(psb_get_errstatus().ne.0) return + info=0 + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = psb_cd_get_context(desc_a) + icomm = psb_cd_get_mpic(desc_a) + ictxt = psb_cd_get_context(desc_a) + + call psb_info(ictxt, me, np) + + + call psb_nullify_sp(b) + call psb_nullify_sp(am3) + call psb_nullify_sp(am4) + call psb_nullify_sp(am1) + call psb_nullify_sp(am2) + call psb_nullify_sp(Ap) + call psb_nullify_sp(Adap) + call psb_nullify_sp(Atmp) + call psb_nullify_sp(Atmp2) + call psb_nullify_sp(AF) + call psb_nullify_sp(ra) + call psb_nullify_sp(rada) + call psb_nullify_sp(ptilde) + call psb_nullify_sp(rtilde) + + nglob = psb_cd_get_global_rows(desc_a) + nrow = psb_cd_get_local_rows(desc_a) + ncol = psb_cd_get_local_cols(desc_a) + + theta = p%rprcparm(mld_aggr_thresh_) + + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + + allocate(nzbr(np), idisp(np),stat=info) + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/2*np,0,0,0,0/),& + & a_err='integer') + goto 9999 + end if + + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + ml_global_nmb = .true. + + filter_mat = (p%iprcparm(mld_aggr_filter_) == mld_filter_mat_) + + if (ml_global_nmb) then + ilaggr(1:nrow) = ilaggr(1:nrow) + naggrm1 + call psb_halo(ilaggr,desc_a,info) + + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_halo') + goto 9999 + end if + end if + + ! naggr: number of local aggregates + ! nrow: local rows. + ! + allocate(adiag(ncol),adinv(ncol),xj(ncol),& + & yj(ncol),omf(ncol),omp(ntaggr),oden(ntaggr),omi(ncol),stat=info) + + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/6*ncol+ntaggr,0,0,0,0/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! Get the diagonal D + call psb_sp_getdiag(a,adiag,info) + if (info == 0) & + & call psb_halo(adiag,desc_a,info) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_getdiag') + goto 9999 + end if + + ! 1. Allocate Ptilde in sparse matrix form + ptilde%fida='COO' + ptilde%m=ncol + if (ml_global_nmb) then + ptilde%k=ntaggr + call psb_sp_all(ncol,ntaggr,ptilde,ncol,info) + else + ptilde%k=naggr + call psb_sp_all(ncol,naggr,ptilde,ncol,info) + endif + + if (info /= 0) then + call psb_errpush(4010,name,a_err='spall') + goto 9999 + end if + + if (ml_global_nmb) then + do i=1,ncol + ptilde%aspk(i) = done + ptilde%ia1(i) = i + ptilde%ia2(i) = ilaggr(i) + end do + ptilde%infoa(psb_nnz_) = ncol + else + do i=1,nrow + ptilde%aspk(i) = done + ptilde%ia1(i) = i + ptilde%ia2(i) = ilaggr(i) + end do + ptilde%infoa(psb_nnz_) = nrow + endif + + call psb_spcnv(ptilde,info,afmt='csr',dupl=psb_dupl_add_) + if (info==0) call psb_spcnv(a,am3,info,afmt='csr',dupl=psb_dupl_add_) + if (info /= 0) then + call psb_errpush(4010,name,a_err='spcnv') + goto 9999 + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Initial copies done.' + + call psb_symbmm(am3,ptilde,ap,info) + if (info == 0) call psb_numbmm(am3,ptilde,ap) + + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 1') + goto 9999 + end if + + call psb_sp_clone(ap,atmp,info) + + + do i=1,size(adiag) + if (adiag(i) /= dzero) then + adinv(i) = done / adiag(i) + else + adinv(i) = done + end if + end do + call psb_sp_scal(adinv,atmp,info) + call psb_sphalo(atmp,desc_a,am4,info,& + & colcnv=.false.,rowscale=.true.,outfmt='CSR ') + if (info == 0) call psb_rwextd(ncol,atmp,info,b=am4) + if (info == 0) call psb_sp_free(am4,info) + + call psb_symbmm(am3,atmp,adap,info) + call psb_numbmm(am3,atmp,adap) + call psb_sp_free(atmp,info) + +!!$ write(0,*) 'Columns of AP',psb_sp_get_ncols(ap) +!!$ write(0,*) 'Columns of ADAP',psb_sp_get_ncols(adap) + call psb_spcnv(ap,info,afmt='coo') + if (info == 0) call psb_spcnv(ap,info,afmt='csc') + if (info == 0) call psb_spcnv(adap,info,afmt='coo') + if (info == 0) call psb_spcnv(adap,info,afmt='csc') + if (info /= 0) then + write(0,*) 'Failed conversion to CSC' + end if + + call csc_mat_col_prod(ap,adap,omp,info) + call csc_mat_col_prod(adap,adap,oden,info) + call psb_sum(ictxt,omp) + call psb_sum(ictxt,oden) + + omp = omp/oden +!!$ write(0,*) 'Check on output prolongator ',omp(1:min(size(omp),10)) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done NUMBMM 1' + + ! Compute omega_int + ommx = -1d300 + do i=1, ncol + omi(i) = omp(ilaggr(i)) + ommx = max(ommx,omi(i)) + end do + ! Compute omega_fine + do i=1, nrow + omf(i) = ommx + do j=am3%ia2(i),am3%ia2(i+1)-1 + omf(i) = min(omf(i),omi(am3%ia1(j))) + end do + omf(i) = max(dzero,omf(i)) + end do + + + if (filter_mat) then + ! + ! Build the filtered matrix Af from A + ! + call psb_spcnv(a,af,info,afmt='csr',dupl=psb_dupl_add_) + + do i=1,nrow + tmp = dzero + jd = -1 + do j=af%ia2(i),af%ia2(i+1)-1 + if (af%ia1(j) == i) jd = j + if (abs(af%aspk(j)) < theta*sqrt(abs(adiag(i)*adiag(af%ia1(j))))) then + tmp=tmp+af%aspk(j) + af%aspk(j)=dzero + endif + enddo + if (jd == -1) then + write(0,*) 'Wrong input: we need the diagonal!!!!', i + else + af%aspk(jd)=af%aspk(jd)-tmp + end if + enddo + ! Take out zeroed terms + call psb_spcnv(af,info,afmt='coo') + k = 0 + do j=1,psb_sp_get_nnzeros(af) + if ((af%aspk(j) /= dzero) .or. (af%ia1(j)==af%ia2(j))) then + k = k + 1 + af%aspk(k) = af%aspk(j) + af%ia1(k) = af%ia1(j) + af%ia2(k) = af%ia2(j) + end if + end do +!!$ write(debug_unit,*) me,' ',trim(name),' Non zeros from filtered matrix:',k,af%m,af%k + call psb_sp_setifld(k,psb_nnz_,af,info) + call psb_spcnv(af,info,afmt='csr') + end if + + omf(1:nrow) = omf(1:nrow) * adinv(1:nrow) +!!$ if (filter_mat) call psb_sp_scal(adinv,af,info) +!!$ +!!$ call psb_sp_scal(adinv,am3,info) +!!$ if (info /= 0) goto 9999 + + if (filter_mat) then + ! + ! Build the smoothed prolongator using the filtered matrix + ! + if (psb_toupper(af%fida)=='CSR') then + do i=1,af%m + do j=af%ia2(i),af%ia2(i+1)-1 + if (af%ia1(j) == i) then + af%aspk(j) = done - omf(i)*af%aspk(j) + else + af%aspk(j) = - omf(i)*af%aspk(j) + end if + end do + end do + else + call psb_errpush(4001,name,a_err='Invalid AF storage format') + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done gather, going for SYMBMM 1' + ! + ! Symbmm90 does the allocation for its result. + ! + ! am1 = (I-w*D*Af)Ptilde + ! Doing it this way means to consider diag(Af_i) + ! + ! + call psb_symbmm(af,ptilde,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 1') + goto 9999 + end if + + call psb_numbmm(af,ptilde,am1) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done NUMBMM 1' + else + ! + ! Build the smoothed prolongator using the original matrix + ! + if (psb_toupper(am3%fida)=='CSR') then + do i=1,am3%m + do j=am3%ia2(i),am3%ia2(i+1)-1 + if (am3%ia1(j) == i) then + am3%aspk(j) = done - omf(i)*am3%aspk(j) + else + am3%aspk(j) = - omf(i)*am3%aspk(j) + end if + end do + end do + else + call psb_errpush(4001,name,a_err='Invalid AM3 storage format') + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done gather, going for SYMBMM 1' + ! + ! Symbmm90 does the allocation for its result. + ! + ! am1 = (I-w*D*A)Ptilde + ! + ! + call psb_symbmm(am3,ptilde,am1,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 1') + goto 9999 + end if + + call psb_numbmm(am3,ptilde,am1) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done NUMBMM 1' + + end if + + ! + ! Ok, let's start over with the restrictor + ! + call psb_transp(ptilde,rtilde,fmt='CSR') + + call psb_spcnv(a,atmp,info,afmt='CSR') + call psb_sphalo(atmp,desc_a,am4,info,& + & colcnv=.true.,rowscale=.true.) + nrt = psb_sp_get_nrows(am4) + call psb_sp_clip(am4,atmp2,info,1,nrt,1,ncol) + call psb_spcnv(atmp2,info,afmt='CSR') + if (info == 0) call psb_rwextd(ncol,atmp,info,b=atmp2) + if (info == 0) call psb_sp_free(am4,info) + if (info == 0) call psb_sp_free(atmp2,info) + call psb_symbmm(rtilde,atmp,ra,info) + if (info == 0) call psb_numbmm(rtilde,atmp,ra) + if (info /= 0) then + write(0,*) 'From symbmm :',info + goto 9999 + end if + call psb_sp_scal(adinv,atmp,info) + call psb_symbmm(ra,atmp,rada,info) + call psb_numbmm(ra,atmp,rada) + + call csr_mat_row_prod(ra,rada,omp,info) + call csr_mat_row_prod(rada,rada,oden,info) + call psb_sum(ictxt,omp) + call psb_sum(ictxt,oden) + + omp = omp/oden +!!$ write(0,*) 'Check on output restrictor',omp(1:min(size(omp),10)) + ! Compute omega_int + ommx = -1d300 + do i=1, ncol + omi(i) = omp(ilaggr(i)) + ommx = max(ommx,omi(i)) + end do + ! Compute omega_fine + ! Going over the columns of atmp means going over the rows + ! of A^T. Hopefully ;-) + call psb_spcnv(atmp,atmp2,info,afmt='coo') + if (info == 0) call psb_spcnv(atmp2,info,afmt='csc') + + do i=1, ncol + omf(i) = ommx + do j=atmp2%ia2(i),atmp2%ia2(i+1)-1 + omf(i) = min(omf(i),omi(atmp2%ia1(j))) + end do + omf(i) = max(dzero,omf(i)) + end do + omf(1:ncol) = omf(1:ncol)*adinv(1:ncol) + call psb_sp_free(atmp2,info) + + + if (psb_toupper(atmp%fida)=='CSR') then + do i=1,atmp%m + do j=atmp%ia2(i),atmp%ia2(i+1)-1 + if (atmp%ia1(j) == i) then + atmp%aspk(j) = done - atmp%aspk(j)*omf(atmp%ia1(j)) + else + atmp%aspk(j) = - atmp%aspk(j)*omf(atmp%ia1(j)) + end if + end do + end do + else + call psb_errpush(4001,name,a_err='Invalid ATMP storage format') + goto 9999 + end if + call psb_symbmm(rtilde,atmp,am2,info) + call psb_numbmm(rtilde,atmp,am2) + + ! + ! Now we have to gather the halo of am1, and add it to itself + ! to multiply it by A, + ! + call psb_sphalo(am1,desc_a,am4,info,& + & colcnv=.false.,rowscale=.true.) + if (info == 0) call psb_rwextd(ncol,am1,info,b=am4) + if (info == 0) call psb_sp_free(am4,info) + + if(info /= 0) then + call psb_errpush(4001,name,a_err='Halo of am1') + goto 9999 + end if + + + + call psb_symbmm(a,am1,am3,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='symbmm 2') + goto 9999 + end if + + call psb_numbmm(a,am1,am3) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done NUMBMM 2' + + ! + ! Now we have to fix this. The only rows of B that are correct + ! are those corresponding to "local" aggregates, i.e. indices in ilaggr(:) + ! + call psb_spcnv(am2,info,afmt='COO') + nzl = psb_sp_get_nnzeros(am2) + i=0 + do k=1, nzl + if ((naggrm1 < am2%ia1(k)) .and. (am2%ia1(k) <= naggrp1)) then + i = i+1 + am2%aspk(i) = am2%aspk(k) + am2%ia1(i) = am2%ia1(k) + am2%ia2(i) = am2%ia2(k) + end if + end do + am2%infoa(psb_nnz_) = i + call psb_spcnv(am2,info,afmt='csr',dupl=psb_dupl_add_) + if (info /=0) then + call psb_errpush(4010,name,a_err='spcnv am2') + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting sphalo/ rwxtd' + + ! am2 = ((i-wDA)Ptilde)^T + call psb_sphalo(am3,desc_a,am4,info,& + & colcnv=.false.,rowscale=.true.) + if (info == 0) call psb_rwextd(ncol,am3,info,b=am4) + if (info == 0) call psb_sp_free(am4,info) + + if(info /= 0) then + call psb_errpush(4001,name,a_err='Extend am3') + goto 9999 + end if + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'starting symbmm 3' + call psb_symbmm(am2,am3,b,info) + if (info == 0) call psb_numbmm(am2,am3,b) + if (info == 0) call psb_sp_free(am3,info) + if (info == 0) call psb_spcnv(b,info,afmt='coo',dupl=psb_dupl_add_) + if (info /= 0) then + call psb_errpush(4001,name,a_err='Build b = am2 x am3') + goto 9999 + end if + + + + select case(p%iprcparm(mld_coarse_mat_)) + + case(mld_distr_mat_) + + call psb_sp_clone(b,p%ac,info) + nzac = p%ac%infoa(psb_nnz_) + nzl = p%ac%infoa(psb_nnz_) + if (info == 0) call psb_cdall(ictxt,p%desc_ac,info,nl=nlaggr(me+1)) + if (info == 0) call psb_cdins(nzl,p%ac%ia1,p%ac%ia2,p%desc_ac,info) + if (info == 0) call psb_cdasb(p%desc_ac,info) + if (info == 0) call psb_glob_to_loc(p%ac%ia1(1:nzl),p%desc_ac,info,iact='I') + if (info == 0) call psb_glob_to_loc(p%ac%ia2(1:nzl),p%desc_ac,info,iact='I') + if (info /= 0) then + call psb_errpush(4001,name,a_err='Creating p%desc_ac and converting ac') + goto 9999 + end if + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Assembld aux descr. distr.' + + + p%ac%m=psb_cd_get_local_rows(p%desc_ac) + p%ac%k=psb_cd_get_local_cols(p%desc_ac) + p%ac%fida='COO' + p%ac%descra='GUN' + + call psb_sp_free(b,info) + if (info == 0) deallocate(nzbr,idisp,stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='psb_sp_free') + goto 9999 + end if + + if (np>1) then + nzl = psb_sp_get_nnzeros(am1) + call psb_glob_to_loc(am1%ia1(1:nzl),p%desc_ac,info,'I') + if(info /= 0) then + call psb_errpush(4010,name,a_err='psb_glob_to_loc') + goto 9999 + end if + endif + am1%k=psb_cd_get_local_cols(p%desc_ac) + + if (np>1) then + call psb_spcnv(am2,info,afmt='coo',dupl=psb_dupl_add_) + nzl = am2%infoa(psb_nnz_) + if (info == 0) call psb_glob_to_loc(am2%ia1(1:nzl),p%desc_ac,info,'I') + if (info == 0) call psb_spcnv(am2,info,afmt='csr',dupl=psb_dupl_add_) + if(info /= 0) then + call psb_errpush(4001,name,a_err='Converting am2 to local') + goto 9999 + end if + end if + am2%m=psb_cd_get_local_cols(p%desc_ac) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done ac ' + + case(mld_repl_mat_) + ! + ! + call psb_cdall(ictxt,p%desc_ac,info,mg=ntaggr,repl=.true.) + nzbr(:) = 0 + nzbr(me+1) = b%infoa(psb_nnz_) + + call psb_sum(ictxt,nzbr(1:np)) + nzac = sum(nzbr) + if (info == 0) call psb_sp_all(ntaggr,ntaggr,p%ac,nzac,info) + if (info /= 0) goto 9999 + + do ip=1,np + idisp(ip) = sum(nzbr(1:ip-1)) + enddo + ndx = nzbr(me+1) + + call mpi_allgatherv(b%aspk,ndx,mpi_double_precision,p%ac%aspk,nzbr,idisp,& + & mpi_double_precision,icomm,info) + if (info == 0) call mpi_allgatherv(b%ia1,ndx,mpi_integer,p%ac%ia1,nzbr,idisp,& + & mpi_integer,icomm,info) + if (info == 0) call mpi_allgatherv(b%ia2,ndx,mpi_integer,p%ac%ia2,nzbr,idisp,& + & mpi_integer,icomm,info) + + if (info /= 0) then + call psb_errpush(4001,name,a_err=' from mpi_allgatherv') + goto 9999 + end if + + p%ac%m = ntaggr + p%ac%k = ntaggr + p%ac%infoa(psb_nnz_) = nzac + p%ac%fida='COO' + p%ac%descra='GUN' + call psb_spcnv(p%ac,info,afmt='coo',dupl=psb_dupl_add_) + if(info /= 0) goto 9999 + call psb_sp_free(b,info) + if(info /= 0) goto 9999 + + deallocate(nzbr,idisp,stat=info) + if (info /= 0) then + info = 4000 + call psb_errpush(info,name) + goto 9999 + end if + case default + info = 4001 + call psb_errpush(info,name,a_err='invalid mld_coarse_mat_') + goto 9999 + end select + + + + call psb_spcnv(p%ac,info,afmt='csr',dupl=psb_dupl_add_) + if(info /= 0) then + call psb_errpush(4010,name,a_err='spcnv') + goto 9999 + end if + + ! + ! Copy the prolongation/restriction matrices into the descriptor map. + ! am2 => R i.e. restriction operator + ! am1 => P i.e. prolongation operator + ! + p%map = psb_linmap(psb_map_aggr_,desc_a,& + & p%desc_ac,am2,am1,ilaggr,nlaggr) + if (info == 0) call psb_sp_free(am1,info) + if (info == 0) call psb_sp_free(am2,info) + if(info /= 0) then + call psb_errpush(4010,name,a_err='sp_Free') + goto 9999 + end if + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done smooth_aggregate ' + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + + +contains + + subroutine csc_mat_col_prod(a,b,v,info) + type(psb_dspmat_type), intent(in) :: a, b + real(psb_dpk_), intent(out) :: v(:) + integer, intent(out) :: info + + integer :: i,j,k, nr, nc,iap,nra,ibp,nrb + logical :: csca, cscb + + info = 0 + nc = psb_sp_get_ncols(a) + if (nc /= psb_sp_get_ncols(b)) then + write(0,*) 'Matrices A and B should have same columns' + info = -1 + return + end if + csca = (psb_toupper(a%fida(1:3))=='CSC') + cscb = (psb_toupper(b%fida(1:3))=='CSC') + + if (.not.(csca.and.cscb)) then + write(0,*) 'Matrices A and B should be in CSC' + info = -2 + return + end if + + do j=1, nc + iap = a%ia2(j) + nra = a%ia2(j+1)-iap + ibp = b%ia2(j) + nrb = b%ia2(j+1)-ibp + v(j) = sparse_srtd_dot(nra,a%ia1(iap:iap+nra-1),a%aspk(iap:iap+nra-1),& + & nrb,b%ia1(ibp:ibp+nrb-1),b%aspk(ibp:ibp+nrb-1)) + end do + + end subroutine csc_mat_col_prod + + + subroutine csr_mat_row_prod(a,b,v,info) + type(psb_dspmat_type), intent(in) :: a, b + real(psb_dpk_), intent(out) :: v(:) + integer, intent(out) :: info + + integer :: i,j,k, nr, nc,iap,nca,ibp,ncb + logical :: csra, csrb + + info = 0 + nr = psb_sp_get_nrows(a) + if (nr /= psb_sp_get_nrows(b)) then + write(0,*) 'Matrices A and B should have same rows' + info = -1 + return + end if + csra = (psb_toupper(a%fida(1:3))=='CSR') + csrb = (psb_toupper(b%fida(1:3))=='CSR') + + if (.not.(csra.and.csrb)) then + write(0,*) 'Matrices A and B should be in CSR' + info = -2 + return + end if + + do j=1, nr + iap = a%ia2(j) + nca = a%ia2(j+1)-iap + ibp = b%ia2(j) + ncb = b%ia2(j+1)-ibp + v(j) = sparse_srtd_dot(nca,a%ia1(iap:iap+nca-1),a%aspk(iap:iap+nca-1),& + & ncb,b%ia1(ibp:ibp+ncb-1),b%aspk(ibp:ibp+ncb-1)) + end do + + end subroutine csr_mat_row_prod + + function sparse_srtd_dot(nv1,iv1,v1,nv2,iv2,v2) result(dot) + integer, intent(in) :: nv1,nv2 + integer, intent(in) :: iv1(:), iv2(:) + real(psb_dpk_), intent(in) :: v1(:),v2(:) + real(psb_dpk_) :: dot + + integer :: i,j,k, ip1, ip2 + + dot = dzero + ip1 = 1 + ip2 = 1 + + do + if (ip1 > nv1) exit + if (ip2 > nv2) exit + if (iv1(ip1) == iv2(ip2)) then + dot = dot + v1(ip1)*v2(ip2) + ip1 = ip1 + 1 + ip2 = ip2 + 1 + else if (iv1(ip1) < iv2(ip2)) then + ip1 = ip1 + 1 + else + ip2 = ip2 + 1 + end if + end do + + end function sparse_srtd_dot + +end subroutine mld_daggrmat_minnrg_asb diff --git a/mlprec/mld_dmlprec_aply.f90 b/mlprec/mld_dmlprec_aply.f90 index 26b83901..2d75dec4 100644 --- a/mlprec/mld_dmlprec_aply.f90 +++ b/mlprec/mld_dmlprec_aply.f90 @@ -158,9 +158,13 @@ subroutine mld_dmlprec_aply(alpha,p,x,beta,y,desc_data,trans,work,info) ! Local variables integer :: ictxt, np, me, err_act - integer :: debug_level, debug_unit + integer :: debug_level, debug_unit, nlev,nc2l,nr2l,level character(len=20) :: name character :: trans_ + type psb_mlprec_wrk_type + real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) + end type psb_mlprec_wrk_type + type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) name='mld_dmlprec_aply' info = 0 @@ -177,79 +181,120 @@ subroutine mld_dmlprec_aply(alpha,p,x,beta,y,desc_data,trans,work,info) trans_ = psb_toupper(trans) - select case(p%precv(2)%iprcparm(mld_ml_type_)) + if (.false.) then + nlev = size(p%precv) + allocate(mlprec_wrk(nlev),stat=info) + if (info /= 0) then + call psb_errpush(4010,name,a_err='Allocate') + goto 9999 + end if + level = 1 - case(mld_no_ml_) - ! - ! No preconditioning, should not really get here - ! - call psb_errpush(4001,name,a_err='mld_no_ml_ in mlprc_aply?') - goto 9999 + nc2l = psb_cd_get_local_cols(p%precv(level)%base_desc) + nr2l = psb_cd_get_local_rows(p%precv(level)%base_desc) + allocate(mlprec_wrk(level)%x2l(nc2l),mlprec_wrk(level)%y2l(nc2l),& + & stat=info) + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/size(x)+size(y),0,0,0,0/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if - case(mld_add_ml_) - ! - ! Additive multilevel - ! + mlprec_wrk(level)%x2l(:) = x(:) + mlprec_wrk(level)%y2l(:) = dzero + + call inner_ml_aply(level,p,mlprec_wrk,trans_,work,info) + + if (info /= 0) then + call psb_errpush(4001,name,a_err='Inner prec aply') + goto 9999 + end if + + call psb_geaxpby(alpha,mlprec_wrk(level)%y2l,beta,y,& + & p%precv(level)%base_desc,info) - call add_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + if (info /= 0) then + call psb_errpush(4001,name,a_err='Error final update') + goto 9999 + end if + + + else + select case(p%precv(2)%iprcparm(mld_ml_type_)) - case(mld_mult_ml_) - ! - ! Multiplicative multilevel (multiplicative among the levels, additive inside - ! each level) - ! - ! Pre/post-smoothing versions. - ! Note that the transpose switches pre <-> post. - ! + case(mld_no_ml_) + ! + ! No preconditioning, should not really get here + ! + call psb_errpush(4001,name,a_err='mld_no_ml_ in mlprc_aply?') + goto 9999 - select case(p%precv(2)%iprcparm(mld_smoother_pos_)) + case(mld_add_ml_) + ! + ! Additive multilevel + ! - case(mld_post_smooth_) + call add_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) - select case (trans_) - case('N') - call mlt_post_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) - case('T','C') - call mlt_pre_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) - case default - info = 4001 - call psb_errpush(info,name,a_err='invalid trans') - goto 9999 - end select + case(mld_mult_ml_) + ! + ! Multiplicative multilevel (multiplicative among the levels, additive inside + ! each level) + ! + ! Pre/post-smoothing versions. + ! Note that the transpose switches pre <-> post. + ! + + select case(p%precv(2)%iprcparm(mld_smoother_pos_)) + + case(mld_post_smooth_) + + select case (trans_) + case('N') + call mlt_post_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + + case('T','C') + call mlt_pre_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + case default + info = 4001 + call psb_errpush(info,name,a_err='invalid trans') + goto 9999 + end select + + case(mld_pre_smooth_) + + select case (trans_) + case('N') + call mlt_pre_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + case('T','C') + call mlt_post_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + case default + info = 4001 + call psb_errpush(info,name,a_err='invalid trans') + goto 9999 + end select - case(mld_pre_smooth_) + case(mld_twoside_smooth_) + + call mlt_twoside_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) - select case (trans_) - case('N') - call mlt_pre_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) - case('T','C') - call mlt_post_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) case default - info = 4001 - call psb_errpush(info,name,a_err='invalid trans') + info = 4013 + call psb_errpush(info,name,a_err='invalid smooth_pos',& + & i_Err=(/p%precv(2)%iprcparm(mld_smoother_pos_),0,0,0,0/)) goto 9999 - end select - - case(mld_twoside_smooth_) - call mlt_twoside_ml_aply(alpha,p,x,beta,y,desc_data,trans_,work,info) + end select case default info = 4013 - call psb_errpush(info,name,a_err='invalid smooth_pos',& - & i_Err=(/p%precv(2)%iprcparm(mld_smoother_pos_),0,0,0,0/)) + call psb_errpush(info,name,a_err='invalid mltype',& + & i_Err=(/p%precv(2)%iprcparm(mld_ml_type_),0,0,0,0/)) goto 9999 end select - - case default - info = 4013 - call psb_errpush(info,name,a_err='invalid mltype',& - & i_Err=(/p%precv(2)%iprcparm(mld_ml_type_),0,0,0,0/)) - goto 9999 - - end select - + endif call psb_erractionrestore(err_act) return @@ -262,6 +307,422 @@ subroutine mld_dmlprec_aply(alpha,p,x,beta,y,desc_data,trans,work,info) return contains + + recursive subroutine inner_ml_aply(level,p,mlprec_wrk,trans,work,info) + + implicit none + + ! Arguments + integer :: level + type(mld_dprec_type), intent(in) :: p + type(psb_mlprec_wrk_type), intent(inout) :: mlprec_wrk(:) + character, intent(in) :: trans + real(psb_dpk_),target :: work(:) + integer, intent(out) :: info + + ! Local variables + integer :: ictxt,np,me,i, nr2l,nc2l,err_act + integer :: debug_level, debug_unit + integer :: nlev, ilev + character(len=20) :: name + + name = 'inner_ml_aply' + info = 0 + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + nlev = size(p%precv) + if ((level < 1) .or. (level > nlev)) then + call psb_errpush(4001,name,a_err='wrong call level to inner_ml') + goto 9999 + end if + ictxt = psb_cd_get_context(p%precv(level)%base_desc) + call psb_info(ictxt, me, np) + + + if (level > 1) then + nc2l = psb_cd_get_local_cols(p%precv(level)%base_desc) + nr2l = psb_cd_get_local_rows(p%precv(level)%base_desc) + allocate(mlprec_wrk(level)%x2l(nc2l),mlprec_wrk(level)%y2l(nc2l),& + & stat=info) + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/2*nc2l,0,0,0,0/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + end if + + select case(p%precv(level)%iprcparm(mld_ml_type_)) + + case(mld_no_ml_) + ! + ! No preconditioning, should not really get here + ! + write(0,*) 'MLD_NO_ML_ in inner_ml ',level + call psb_errpush(4001,name,a_err='mld_no_ml_ in mlprc_aply?') + goto 9999 + + case(mld_add_ml_) + ! + ! Additive multilevel + ! + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%x2l,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + + end if + + + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,dzero,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc, trans,work,info) + if (info /= 0) goto 9999 + if (level < nlev) then + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + if (info /= 0) goto 9999 + ! + ! Apply the prolongator + ! + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & done,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + if (info /= 0) goto 9999 + + end if + + case(mld_mult_ml_) + ! + ! Multiplicative multilevel (multiplicative among the levels, additive inside + ! each level) + ! + ! Pre/post-smoothing versions. + ! Note that the transpose switches pre <-> post. + ! + + select case(p%precv(level)%iprcparm(mld_smoother_pos_)) + + case(mld_post_smooth_) + + select case (trans_) + case('N') + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%x2l,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + end if + + ! This is one step of post-smoothing + + if (level < nlev) then + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + if (info /= 0) goto 9999 + ! + ! Apply the prolongator + ! + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & dzero,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + if (info /= 0) goto 9999 + ! + ! Compute the residual + ! + call psb_spmm(-done,p%precv(level)%base_a,mlprec_wrk(level)%y2l,& + & done,mlprec_wrk(level)%x2l,p%precv(level)%base_desc,info,& + & work=work,trans=trans) + if (info /= 0) goto 9999 + + + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,done,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,& + & trans,work,info) + else + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,dzero,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,& + & trans,work,info) + + end if + + + case('T','C') + + ! Post-smoothing transpose is pre-smoothing + + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%x2l,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + + + end if + + ! + ! Apply the base preconditioner + ! + call mld_baseprec_aply(done,p%precv(ilev)%prec,mlprec_wrk(ilev)%x2l,& + & dzero,mlprec_wrk(ilev)%y2l,p%precv(ilev)%base_desc,trans,work,info) + + if (info /= 0) goto 9999 + + ! + ! Compute the residual (at all levels but the coarsest one) + ! + if (ilev < nlev) then + call psb_spmm(-done,p%precv(ilev)%base_a,& + & mlprec_wrk(ilev)%y2l,done,mlprec_wrk(ilev)%x2l,& + & p%precv(ilev)%base_desc,info,work=work,trans=trans) + if (info /= 0) goto 9999 + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + if (info /= 0) goto 9999 + + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & done,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + if (info /= 0) goto 9999 + + end if + + case default + info = 4001 + call psb_errpush(info,name,a_err='invalid trans') + goto 9999 + end select + + case(mld_pre_smooth_) + + select case (trans_) + case('N') + ! One step of pre-smoothing + + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%x2l,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + + end if + + ! + ! Apply the base preconditioner + ! + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,& + & dzero,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,trans,work,info) + + if (info /= 0) goto 9999 + + ! + ! Compute the residual (at all levels but the coarsest one) + ! + if (level < nlev) then + call psb_spmm(-done,p%precv(level)%base_a,& + & mlprec_wrk(level)%y2l,done,mlprec_wrk(level)%x2l,& + & p%precv(level)%base_desc,info,work=work,trans=trans) + if (info /= 0) goto 9999 + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + if (info /= 0) goto 9999 + + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & done,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + if (info /= 0) goto 9999 + + end if + + + case('T','C') + + ! pre-smooth transpose is post-smoothing + + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%x2l,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + + end if + + if (level < nlev) then + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + if (info /= 0) goto 9999 + ! + ! Apply the prolongator + ! + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & dzero,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + if (info /= 0) goto 9999 + ! + ! Compute the residual + ! + call psb_spmm(-done,p%precv(level)%base_a,mlprec_wrk(level)%y2l,& + & done,mlprec_wrk(level)%x2l,p%precv(level)%base_desc,info,& + & work=work,trans=trans) + if (info /= 0) goto 9999 + + + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,done,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,& + & trans,work,info) + else + + call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,dzero,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,& + & trans,work,info) + end if + + case default + info = 4001 + call psb_errpush(info,name,a_err='invalid trans') + goto 9999 + end select + + case(mld_twoside_smooth_) + + nc2l = psb_cd_get_local_cols(p%precv(level)%base_desc) + nr2l = psb_cd_get_local_rows(p%precv(level)%base_desc) + allocate(mlprec_wrk(level)%ty(nc2l), mlprec_wrk(level)%tx(nc2l), stat=info) + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/2*nc2l,0,0,0,0/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + if (level > 1) then + ! Apply the restriction + call psb_map_X2Y(done,mlprec_wrk(level-1)%ty,& + & dzero,mlprec_wrk(level)%x2l,& + & p%precv(level)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + end if + call psb_geaxpby(done,mlprec_wrk(level)%x2l,dzero,mlprec_wrk(level)%tx,& + & p%precv(level)%base_desc,info) + ! + ! Apply the base preconditioner + ! + if (info == 0) call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%x2l,dzero,mlprec_wrk(level)%y2l,& + & p%precv(level)%base_desc,trans,work,info) + ! + ! Compute the residual (at all levels but the coarsest one) + ! and call recursively + ! + if(level < nlev) then + mlprec_wrk(level)%ty = mlprec_wrk(level)%x2l + if (info == 0) call psb_spmm(-done,p%precv(level)%base_a,& + & mlprec_wrk(level)%y2l,done,mlprec_wrk(level)%ty,& + & p%precv(level)%base_desc,info,work=work,trans=trans) + + call inner_ml_aply(level+1,p,mlprec_wrk,trans,work,info) + + + ! + ! Apply the prolongator + ! + call psb_map_Y2X(done,mlprec_wrk(level+1)%y2l,& + & done,mlprec_wrk(level)%y2l,& + & p%precv(level+1)%map,info,work=work) + + if (info /=0 ) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if + + ! + ! Compute the residual + ! + call psb_spmm(-done,p%precv(level)%base_a,mlprec_wrk(level)%y2l,& + & done,mlprec_wrk(level)%tx,p%precv(level)%base_desc,info,& + & work=work,trans=trans) + ! + ! Apply the base preconditioner + ! + if (info == 0) call mld_baseprec_aply(done,p%precv(level)%prec,& + & mlprec_wrk(level)%tx,& + & done,mlprec_wrk(level)%y2l,p%precv(level)%base_desc,& + & trans, work,info) + if (info /= 0) then + call psb_errpush(4001,name,a_err='Error: residual/baseprec_aply') + goto 9999 + end if + + endif + + case default + info = 4013 + call psb_errpush(info,name,a_err='invalid smooth_pos',& + & i_Err=(/p%precv(level)%iprcparm(mld_smoother_pos_),0,0,0,0/)) + goto 9999 + + end select + + case default + info = 4013 + call psb_errpush(info,name,a_err='invalid mltype',& + & i_Err=(/p%precv(level)%iprcparm(mld_ml_type_),0,0,0,0/)) + goto 9999 + + end select + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_erractionrestore(err_act) + if (err_act.eq.psb_act_abort_) then + call psb_error() + return + end if + return + + end subroutine inner_ml_aply + + ! ! Subroutine: add_ml_aply ! Version: real @@ -330,7 +791,7 @@ contains ! 3. DO ilev=nlev-1,1,-1 ! ! ! Transfer Y(ilev+1) to the next finer level. - ! Y(ilev) = P(ilev+1)*Y(ilev+1) + ! Y(ilev) = Y(ilev) + P(ilev+1)*Y(ilev+1) ! ! ENDDO ! @@ -356,9 +817,9 @@ contains integer :: nlev, ilev character(len=20) :: name - type psb_mlprec_wrk_type - real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) - end type psb_mlprec_wrk_type +!!$ type psb_mlprec_wrk_type +!!$ real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) +!!$ end type psb_mlprec_wrk_type type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) name='add_ml_aply' @@ -396,38 +857,33 @@ contains mlprec_wrk(1)%x2l(:) = x(:) mlprec_wrk(1)%y2l(:) = dzero - - call mld_baseprec_aply(alpha,p%precv(1)%prec,x,beta,y,& - & p%precv(1)%base_desc,trans,work,info) - if (info /=0) then - call psb_errpush(4010,name,a_err='baseprec_aply') - goto 9999 - end if ! ! STEP 2 ! ! For each level except the finest one ... ! - do ilev = 2, nlev - nc2l = psb_cd_get_local_cols(p%precv(ilev)%base_desc) - nr2l = psb_cd_get_local_rows(p%precv(ilev)%base_desc) - allocate(mlprec_wrk(ilev)%x2l(nc2l),mlprec_wrk(ilev)%y2l(nc2l),& - & stat=info) - if (info /= 0) then - info=4025 - call psb_errpush(info,name,i_err=(/2*nc2l,0,0,0,0/),& - & a_err='real(psb_dpk_)') - goto 9999 - end if - - ! Apply the prolongator transpose, i.e. the restriction - call psb_map_X2Y(done,mlprec_wrk(ilev-1)%x2l,& - & dzero,mlprec_wrk(ilev)%x2l,& - & p%precv(ilev)%map,info,work=work) - - if (info /=0) then - call psb_errpush(4001,name,a_err='Error during restriction') - goto 9999 + do ilev = 1, nlev + if (ilev > 1) then + nc2l = psb_cd_get_local_cols(p%precv(ilev)%base_desc) + nr2l = psb_cd_get_local_rows(p%precv(ilev)%base_desc) + allocate(mlprec_wrk(ilev)%x2l(nc2l),mlprec_wrk(ilev)%y2l(nc2l),& + & stat=info) + if (info /= 0) then + info=4025 + call psb_errpush(info,name,i_err=(/2*nc2l,0,0,0,0/),& + & a_err='real(psb_dpk_)') + goto 9999 + end if + + ! Apply the prolongator transpose, i.e. the restriction + call psb_map_X2Y(done,mlprec_wrk(ilev-1)%x2l,& + & dzero,mlprec_wrk(ilev)%x2l,& + & p%precv(ilev)%map,info,work=work) + + if (info /=0) then + call psb_errpush(4001,name,a_err='Error during restriction') + goto 9999 + end if end if ! @@ -467,7 +923,7 @@ contains ! ! Compute the output vector Y ! - call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,done,y,p%precv(1)%base_desc,info) + call psb_geaxpby(alpha,mlprec_wrk(1)%y2l,beta,y,p%precv(1)%base_desc,info) if (info /= 0) then call psb_errpush(4001,name,a_err='Error on final update') goto 9999 @@ -593,9 +1049,9 @@ contains integer :: nlev, ilev character(len=20) :: name - type psb_mlprec_wrk_type - real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) - end type psb_mlprec_wrk_type +!!$ type psb_mlprec_wrk_type +!!$ real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) +!!$ end type psb_mlprec_wrk_type type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) name='mlt_pre_ml_aply' @@ -686,7 +1142,7 @@ contains call psb_map_X2Y(done,mlprec_wrk(ilev-1)%tx,& & dzero,mlprec_wrk(ilev)%x2l,& & p%precv(ilev)%map,info,work=work) - + if (info /=0) then call psb_errpush(4001,name,a_err='Error during restriction') goto 9999 @@ -854,9 +1310,9 @@ contains integer :: nlev, ilev character(len=20) :: name - type psb_mlprec_wrk_type - real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) - end type psb_mlprec_wrk_type +!!$ type psb_mlprec_wrk_type +!!$ real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) +!!$ end type psb_mlprec_wrk_type type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) name='mlt_post_ml_aply' @@ -933,7 +1389,7 @@ contains call psb_map_X2Y(done,mlprec_wrk(ilev-1)%x2l,& & dzero,mlprec_wrk(ilev)%x2l,& & p%precv(ilev)%map,info,work=work) - + if (info /=0) then call psb_errpush(4001,name,a_err='Error during restriction') goto 9999 @@ -1156,9 +1612,9 @@ contains integer :: nlev, ilev character(len=20) :: name - type psb_mlprec_wrk_type - real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) - end type psb_mlprec_wrk_type +!!$ type psb_mlprec_wrk_type +!!$ real(psb_dpk_), allocatable :: tx(:), ty(:), x2l(:), y2l(:) +!!$ end type psb_mlprec_wrk_type type(psb_mlprec_wrk_type), allocatable :: mlprec_wrk(:) name='mlt_twoside_ml_aply' @@ -1248,7 +1704,7 @@ contains call psb_map_X2Y(done,mlprec_wrk(ilev-1)%ty,& & dzero,mlprec_wrk(ilev)%x2l,& & p%precv(ilev)%map,info,work=work) - + if (info /=0) then call psb_errpush(4001,name,a_err='Error during restriction') goto 9999 diff --git a/mlprec/mld_dprecinit.F90 b/mlprec/mld_dprecinit.F90 index 353076d3..3584152f 100644 --- a/mlprec/mld_dprecinit.F90 +++ b/mlprec/mld_dprecinit.F90 @@ -217,6 +217,14 @@ subroutine mld_dprecinit(p,ptype,info,nlev) p%precv(ilev_)%rprcparm(:) = dzero p%precv(ilev_)%prec%iprcparm(:) = 0 p%precv(ilev_)%prec%rprcparm(:) = dzero + p%precv(ilev_)%iprcparm(mld_ml_type_) = mld_mult_ml_ + p%precv(ilev_)%iprcparm(mld_aggr_alg_) = mld_dec_aggr_ + p%precv(ilev_)%iprcparm(mld_aggr_kind_) = mld_smooth_prol_ + p%precv(ilev_)%iprcparm(mld_coarse_mat_) = mld_distr_mat_ + p%precv(ilev_)%iprcparm(mld_smoother_pos_) = mld_post_smooth_ + p%precv(ilev_)%iprcparm(mld_aggr_omega_alg_) = mld_eig_est_ + p%precv(ilev_)%iprcparm(mld_aggr_eig_) = mld_max_norm_ + p%precv(ilev_)%iprcparm(mld_aggr_filter_) = mld_no_filter_mat_ p%precv(ilev_)%prec%iprcparm(mld_smoother_type_) = mld_as_ p%precv(ilev_)%prec%iprcparm(mld_sub_solve_) = mld_ilu_n_ p%precv(ilev_)%prec%iprcparm(mld_sub_restr_) = psb_halo_ diff --git a/mlprec/mld_dprecset.f90 b/mlprec/mld_dprecset.f90 index 5eb0868b..fa0a8757 100644 --- a/mlprec/mld_dprecset.f90 +++ b/mlprec/mld_dprecset.f90 @@ -140,6 +140,9 @@ subroutine mld_dprecseti(p,what,val,info,ilev) case(mld_smoother_type_,mld_sub_solve_,mld_sub_restr_,mld_sub_prol_,& & mld_sub_ren_,mld_sub_ovr_,mld_sub_fillin_,mld_smoother_sweeps_) p%precv(ilev_)%prec%iprcparm(what) = val + case(mld_ml_type_,mld_aggr_alg_,mld_aggr_kind_,& + & mld_smoother_pos_,mld_aggr_omega_alg_,mld_aggr_eig_) + p%precv(ilev_)%iprcparm(what) = val case default write(0,*) name,': Error: invalid WHAT' info = -2 @@ -228,7 +231,7 @@ subroutine mld_dprecseti(p,what,val,info,ilev) end do case(mld_ml_type_,mld_aggr_alg_,mld_aggr_kind_,& & mld_smoother_pos_,mld_aggr_omega_alg_,mld_aggr_eig_) - do ilev_=2,nlev_ + do ilev_=1,nlev_ if (.not.allocated(p%precv(ilev_)%iprcparm)) then write(0,*) name,& &': Error: uninitialized preconditioner component, should call MLD_PRECINIT' @@ -237,6 +240,7 @@ subroutine mld_dprecseti(p,what,val,info,ilev) endif p%precv(ilev_)%iprcparm(what) = val end do + case(mld_coarse_mat_) if (.not.allocated(p%precv(nlev_)%iprcparm)) then write(0,*) name,& diff --git a/mlprec/mld_inner_mod.f90 b/mlprec/mld_inner_mod.f90 index 29847c0d..a1b394c3 100644 --- a/mlprec/mld_inner_mod.f90 +++ b/mlprec/mld_inner_mod.f90 @@ -630,6 +630,18 @@ module mld_inner_mod end subroutine mld_zaggrmat_smth_asb end interface + interface mld_aggrmat_minnrg_asb + subroutine mld_daggrmat_minnrg_asb(a,desc_a,ilaggr,nlaggr,p,info) + use psb_base_mod, only : psb_dspmat_type, psb_desc_type, psb_dpk_ + use mld_prec_type, only : mld_dbaseprec_type, mld_donelev_type + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer, intent(inout) :: ilaggr(:), nlaggr(:) + type(mld_donelev_type), intent(inout), target :: p + integer, intent(out) :: info + end subroutine mld_daggrmat_minnrg_asb + end interface + interface mld_baseprec_bld subroutine mld_sbaseprec_bld(a,desc_a,p,info,upd) use psb_base_mod, only : psb_sspmat_type, psb_desc_type, psb_spk_ diff --git a/mlprec/mld_prec_type.f90 b/mlprec/mld_prec_type.f90 index 799beade..e5c3aba5 100644 --- a/mlprec/mld_prec_type.f90 +++ b/mlprec/mld_prec_type.f90 @@ -353,9 +353,10 @@ module mld_prec_type ! ! Legal values for entry: mld_aggr_kind_ ! - integer, parameter :: mld_no_smooth_=0, mld_smooth_prol_=1, mld_biz_prol_=2 + integer, parameter :: mld_no_smooth_=0, mld_smooth_prol_=1 + integer, parameter :: mld_min_energy_=2, mld_biz_prol_=3 ! Disabling biz_prol for the time being. - integer, parameter :: mld_max_aggr_kind_=mld_smooth_prol_ + integer, parameter :: mld_max_aggr_kind_=mld_min_energy_ ! ! Legal values for entry: mld_aggr_filter_ ! @@ -412,8 +413,8 @@ module mld_prec_type & smooth_names(1:3)=(/'pre-smoothing ','post-smoothing ',& & 'pre/post-smoothing'/) character(len=15), parameter, private :: & - & aggr_kinds(0:2)=(/'nonsmoothed ','smoothed ',& - & 'bizr. smoothed'/) + & aggr_kinds(0:3)=(/'nonsmoothed ','smoothed ',& + & 'min energy ','bizr. smoothed'/) character(len=15), parameter, private :: & & matrix_names(0:1)=(/'distributed ','replicated '/) character(len=18), parameter, private :: & @@ -546,6 +547,8 @@ contains val = mld_no_smooth_ case('SMOOTHED') val = mld_smooth_prol_ + case('MINENERGY') + val = mld_min_energy_ case('PRE') val = mld_pre_smooth_ case('POST')