From e9ba51c7b36d75318a6d5244ce4f49de0f6a191d Mon Sep 17 00:00:00 2001 From: Cirdans-Home Date: Sat, 3 Apr 2021 00:01:47 +0200 Subject: [PATCH] merged with parmatch from amg-ext --- Make.inc.in | 2 +- amgprec/Makefile | 20 +- amgprec/amg_d_matchboxp_mod.f90 | 1768 +++++++++++ amgprec/amg_d_parmatch_aggregator_mod.F90 | 713 +++++ amgprec/amg_s_matchboxp_mod.f90 | 1768 +++++++++++ amgprec/amg_s_parmatch_aggregator_mod.F90 | 713 +++++ amgprec/impl/aggregator/Makefile | 32 +- amgprec/impl/aggregator/MatchBoxPC.cpp | 97 + amgprec/impl/aggregator/MatchBoxPC.h | 178 ++ ...DomEdgesLinearSearchMesgBndlSmallMateC.cpp | 2574 +++++++++++++++++ ...mg_d_parmatch_aggregator_inner_mat_asb.f90 | 231 ++ .../amg_d_parmatch_aggregator_mat_asb.f90 | 277 ++ .../amg_d_parmatch_aggregator_mat_bld.f90 | 275 ++ .../amg_d_parmatch_aggregator_tprol.f90 | 565 ++++ .../aggregator/amg_d_parmatch_smth_bld.f90 | 414 +++ .../aggregator/amg_d_parmatch_spmm_bld.f90 | 194 ++ .../amg_d_parmatch_spmm_bld_inner.f90 | 210 ++ .../aggregator/amg_d_parmatch_spmm_bld_ov.f90 | 180 ++ .../aggregator/amg_d_parmatch_unsmth_bld.f90 | 251 ++ ...mg_s_parmatch_aggregator_inner_mat_asb.f90 | 231 ++ .../amg_s_parmatch_aggregator_mat_asb.f90 | 277 ++ .../amg_s_parmatch_aggregator_mat_bld.f90 | 275 ++ .../amg_s_parmatch_aggregator_tprol.f90 | 565 ++++ .../aggregator/amg_s_parmatch_smth_bld.f90 | 414 +++ .../aggregator/amg_s_parmatch_spmm_bld.f90 | 194 ++ .../amg_s_parmatch_spmm_bld_inner.f90 | 210 ++ .../aggregator/amg_s_parmatch_spmm_bld_ov.f90 | 180 ++ .../aggregator/amg_s_parmatch_unsmth_bld.f90 | 251 ++ amgprec/impl/aggregator/dataStrStaticQueue.h | 199 ++ .../impl/aggregator/preProcessorDirectives.h | 79 + .../aggregator/primitiveDataTypeDefinitions.h | 156 + tests/fileread/amg_cf_sample.f90 | 2 +- tests/fileread/amg_df_sample.f90 | 2 +- tests/fileread/amg_sf_sample.f90 | 2 +- tests/fileread/amg_zf_sample.f90 | 2 +- tests/pdegen/amg_d_pde2d.f90 | 2 +- tests/pdegen/amg_d_pde3d.f90 | 2 +- tests/pdegen/amg_s_pde2d.f90 | 2 +- tests/pdegen/amg_s_pde3d.f90 | 2 +- 39 files changed, 13486 insertions(+), 23 deletions(-) create mode 100644 amgprec/amg_d_matchboxp_mod.f90 create mode 100644 amgprec/amg_d_parmatch_aggregator_mod.F90 create mode 100644 amgprec/amg_s_matchboxp_mod.f90 create mode 100644 amgprec/amg_s_parmatch_aggregator_mod.F90 create mode 100644 amgprec/impl/aggregator/MatchBoxPC.cpp create mode 100644 amgprec/impl/aggregator/MatchBoxPC.h create mode 100644 amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_aggregator_inner_mat_asb.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_asb.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_smth_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_spmm_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_ov.f90 create mode 100644 amgprec/impl/aggregator/amg_d_parmatch_unsmth_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_aggregator_inner_mat_asb.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_asb.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_aggregator_tprol.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_smth_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_spmm_bld.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_inner.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_ov.f90 create mode 100644 amgprec/impl/aggregator/amg_s_parmatch_unsmth_bld.f90 create mode 100755 amgprec/impl/aggregator/dataStrStaticQueue.h create mode 100755 amgprec/impl/aggregator/preProcessorDirectives.h create mode 100755 amgprec/impl/aggregator/primitiveDataTypeDefinitions.h diff --git a/Make.inc.in b/Make.inc.in index f52f0a60..04af9802 100644 --- a/Make.inc.in +++ b/Make.inc.in @@ -2,7 +2,7 @@ .mod=@MODEXT@ .fh=.fh .SUFFIXES: -.SUFFIXES: .f90 .F90 .f .F .c .o +.SUFFIXES: .f90 .F90 .f .F .c .cpp .o ########################################################## # # # Note: directories external to the MLD2P4 subtree # diff --git a/amgprec/Makefile b/amgprec/Makefile index 0bbd7eed..22a645c3 100644 --- a/amgprec/Makefile +++ b/amgprec/Makefile @@ -15,7 +15,8 @@ DMODOBJS=amg_d_prec_type.o \ amg_d_base_aggregator_mod.o \ amg_d_dec_aggregator_mod.o amg_d_symdec_aggregator_mod.o \ amg_d_ainv_solver.o amg_d_base_ainv_mod.o \ - amg_d_invk_solver.o amg_d_invt_solver.o amg_d_krm_solver.o + amg_d_invk_solver.o amg_d_invt_solver.o amg_d_krm_solver.o \ + amg_d_matchboxp_mod.o amg_d_parmatch_aggregator_mod.o #amg_d_bcmatch_aggregator_mod.o SMODOBJS=amg_s_prec_type.o amg_s_ilu_fact_mod.o \ @@ -26,7 +27,8 @@ SMODOBJS=amg_s_prec_type.o amg_s_ilu_fact_mod.o \ amg_s_base_aggregator_mod.o \ amg_s_dec_aggregator_mod.o amg_s_symdec_aggregator_mod.o \ amg_s_ainv_solver.o amg_s_base_ainv_mod.o \ - amg_s_invk_solver.o amg_s_invt_solver.o amg_s_krm_solver.o + amg_s_invk_solver.o amg_s_invt_solver.o amg_s_krm_solver.o \ + amg_s_matchboxp_mod.o amg_s_parmatch_aggregator_mod.o ZMODOBJS=amg_z_prec_type.o amg_z_ilu_fact_mod.o \ amg_z_inner_mod.o amg_z_ilu_solver.o amg_z_diag_solver.o amg_z_jac_smoother.o amg_z_as_smoother.o \ @@ -80,15 +82,15 @@ amg_base_prec_type.o: amg_const.h amg_s_prec_type.o amg_d_prec_type.o amg_c_prec_type.o amg_z_prec_type.o : amg_base_prec_type.o amg_prec_type.o: amg_s_prec_type.o amg_d_prec_type.o amg_c_prec_type.o amg_z_prec_type.o amg_prec_mod.o: amg_prec_type.o amg_s_prec_mod.o amg_d_prec_mod.o amg_c_prec_mod.o amg_z_prec_mod.o -amg_s_krm_solver.o: amg_s_prec_type.o amg_s_base_solver_mod.o -amg_d_krm_solver.o: amg_d_prec_type.o amg_d_base_solver_mod.o -amg_c_krm_solver.o: amg_c_prec_type.o amg_c_base_solver_mod.o -amg_z_krm_solver.o: amg_z_prec_type.o amg_z_base_solver_mod.o +amg_s_krm_solver.o: amg_s_prec_type.o amg_s_base_solver_mod.o +amg_d_krm_solver.o: amg_d_prec_type.o amg_d_base_solver_mod.o +amg_c_krm_solver.o: amg_c_prec_type.o amg_c_base_solver_mod.o +amg_z_krm_solver.o: amg_z_prec_type.o amg_z_base_solver_mod.o amg_s_prec_mod.o: amg_s_krm_solver.o -amg_d_prec_mod.o: amg_d_krm_solver.o -amg_c_prec_mod.o: amg_c_krm_solver.o -amg_z_prec_mod.o: amg_z_krm_solver.o +amg_d_prec_mod.o: amg_d_krm_solver.o +amg_c_prec_mod.o: amg_c_krm_solver.o +amg_z_prec_mod.o: amg_z_krm_solver.o $(SINNEROBJS) $(SOUTEROBJS): $(SMODOBJS) $(DINNEROBJS) $(DOUTEROBJS): $(DMODOBJS) diff --git a/amgprec/amg_d_matchboxp_mod.f90 b/amgprec/amg_d_matchboxp_mod.f90 new file mode 100644 index 00000000..2578be73 --- /dev/null +++ b/amgprec/amg_d_matchboxp_mod.f90 @@ -0,0 +1,1768 @@ +! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from amg4psblas-extension +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module dmatchboxp_mod + + use iso_c_binding + use psb_base_cbind_mod + + interface MatchBoxPC + subroutine dMatchBoxPC(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, myrank, numprocs, icomm,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card) bind(c,name='MatchBoxPC') + use iso_c_binding + import :: psb_c_ipk_, psb_c_lpk_, psb_c_mpk_, psb_c_epk_ + implicit none + + integer(psb_c_lpk_), value :: nlver,nledge + integer(psb_c_mpk_), value :: myrank, numprocs, icomm + integer(psb_c_lpk_) :: verlocptr(*),verlocind(*), verdistance(*) + integer(psb_c_lpk_) :: mate(*) + integer(psb_c_lpk_) :: msgindsent(*),msgactualsent(*) + real(c_double) :: ph0_time, ph1_time, ph2_time + integer(psb_c_lpk_) :: ph1_card(*),ph2_card(*) + real(c_double) :: edgelocweight(*) + real(c_double) :: msgpercent(*) + end subroutine dMatchBoxPC + end interface MatchBoxPC + + interface i_aggr_assign + module procedure i_daggr_assign + end interface i_aggr_assign + + interface build_matching + module procedure dbuild_matching + end interface build_matching + + interface build_ahat + module procedure dbuild_ahat + end interface build_ahat + + interface psb_gtranspose + module procedure psb_dgtranspose + end interface psb_gtranspose + + interface psb_htranspose + module procedure psb_dhtranspose + end interface psb_htranspose + + interface PMatchBox + module procedure dPMatchBox + end interface PMatchBox + +contains + + subroutine dmatchboxp_build_prol(w,a,desc_a,ilaggr,nlaggr,prol,info,& + & symmetrize,reproducible,display_inp, display_out, print_out) + use psb_base_mod + use psb_util_mod + use iso_c_binding + implicit none + real(psb_dpk_), allocatable, intent(inout) :: w(:) + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:) + integer(psb_lpk_), allocatable, intent(out) :: nlaggr(:) + type(psb_ldspmat_type), intent(out) :: prol + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: display_inp, display_out, reproducible + logical, optional, intent(in) :: symmetrize, print_out + + ! + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: iam, np, iown + integer(psb_ipk_) :: nr, nc, sweep, nzl, ncsave, nct, idx + integer(psb_lpk_) :: i, k, kg, idxg, ntaggr, naggrm1, naggrp1, & + & ip, nlpairs, nlsingl, nunmatched, lnr + real(psb_dpk_) :: wk, widx, wmax, nrmagg + real(psb_dpk_), allocatable :: wtemp(:) + integer(psb_lpk_), allocatable :: mate(:), ilv(:) + integer(psb_ipk_), save :: cnt=1 + character(len=256) :: aname + type(psb_ld_coo_sparse_mat) :: tmpcoo + logical :: display_out_, print_out_, reproducible_ + logical, parameter :: dump=.false., debug=.false., dump_mate=.false., & + & debug_ilaggr=.false., debug_sync=.false. + integer(psb_ipk_), save :: idx_bldmtc=-1, idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 + logical, parameter :: do_timings=.true. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,iam,np) + + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("MBP_BLDP: phase1 ") + if ((do_timings).and.(idx_bldmtc==-1)) & + & idx_bldmtc = psb_get_timer_idx("MBP_BLDP: buil_matching") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("MBP_BLDP: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("MBP_BLDP: phase3 ") + + if (do_timings) call psb_tic(idx_phase1) + + if (present(display_out)) then + display_out_ = display_out + else + display_out_ = .false. + end if + if (present(print_out)) then + print_out_ = print_out + else + print_out_ = .false. + end if + if (present(reproducible)) then + reproducible_ = reproducible + else + reproducible_ = .false. + end if + + allocate(nlaggr(0:np-1),stat=info) + if (info /= 0) then + return + end if + + nlaggr = 0 + ilv = [(i,i=1,desc_a%get_local_cols())] + call desc_a%l2gip(ilv,info,owned=.false.) + +!!$ if (dump) then +!!$ write(aname,'(a,i3.3,a)') 'amat',iam,'.mtx' +!!$ call a%print(fname=aname,head='Test ',iv=ilv) +!!$ end if + + call psb_geall(ilaggr,desc_a,info) + ilaggr = -1 + call psb_geasb(ilaggr,desc_a,info) + nr = a%get_nrows() + nc = a%get_ncols() + if (size(w) < nc) then + call psb_realloc(nc,w,info) + end if + call psb_halo(w,desc_a,info) + + if (debug) write(0,*) iam,' buildprol into buildmatching:',& + & nr, nc + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' buildprol into buildmatching:',& + & nr, nc + end if + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_bldmtc) + call build_matching(w,a,desc_a,mate,info,display_inp=display_inp,symmetrize=symmetrize) + if (do_timings) call psb_toc(idx_bldmtc) + if (debug) write(0,*) iam,' buildprol from buildmatching:',& + & info + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' out from buildmatching:', info + end if + + if (info == 0) then + if (do_timings) call psb_tic(idx_phase2) + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Into building the tentative prol:' + end if + + call psb_geall(wtemp,desc_a,info) + wtemp = dzero + call psb_geasb(wtemp,desc_a,info) + + nlaggr(iam) = 0 + nlpairs = 0 + nlsingl = 0 + nunmatched = 0 + ! + ! First sweep + ! On return from build_matching, mate has been converted to local numbering, + ! so assigning to idx is OK. + ! + do k=1, nr + idx = mate(k) + ! + ! Figure out an allocation of aggregates to processes + ! + if (idx < 0) then + ! + ! Unmatched vertex, potential singleton. + ! + nunmatched = nunmatched + 1 + if (abs(w(k)) nc) then + write(0,*) 'Impossible: mate(k) > nc' + cycle + else + + if (ilaggr(k) == -1) then + + wk = w(k) + widx = w(idx) + wmax = max(abs(wk),abs(widx)) + nrmagg = wmax*sqrt((wk/wmax)**2+(widx/wmax)**2) + if (nrmagg > epsilon(nrmagg)) then + if (idx <= nr) then + if (ilaggr(idx) == -1) then + ! Now, if both vertices are local, the aggregate is local + ! (kinda obvious). + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + ilaggr(idx) = nlaggr(iam) + wtemp(k) = w(k)/nrmagg + wtemp(idx) = w(idx)/nrmagg + end if + nlpairs = nlpairs+1 + else if (idx <= nc) then + ! + ! This pair involves a non-local vertex. + ! Set wtemp, then apply a tie-breaking algorithm + wtemp(k) = w(k)/nrmagg + idxg = ilv(idx) + kg = ilv(k) + if (reproducible_) then + ! + ! Tie-break by assigning to the lowest-index process + ! so that the numbering is the same as with NP=1 + ! + if (kg < idxg) then + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + nlpairs = nlpairs+1 + else + ilaggr(k) = -2 + end if + else + ! Use a statistically unbiased tie-breaking rule, + ! this will give an even spread. + ! Delegate to i_aggr_assign. + ! Should be a symmetric function. + ! + call desc_a%indxmap%qry_halo_owner(idx,iown,info) + ip = i_aggr_assign(iam, iown, kg, idxg, wk, widx, nrmagg) + if (iam == ip) then + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + nlpairs = nlpairs+1 + else + ilaggr(k) = -2 + end if + end if + end if + else + if (abs(w(k))0) + ! + do k=1,nr + if (ilaggr(k) > 0) ilaggr(k) = ilaggr(k) + naggrm1 + end do + call psb_halo(ilaggr,desc_a,info) + call psb_halo(wtemp,desc_a,info) + ! Cleanup as yet unmarked entries + do k=1,nr + if (ilaggr(k) == -2) then + idx = mate(k) + if (idx > nr) then + i = ilaggr(idx) + if (i > 0) then + ilaggr(k) = i + else + write(0,*) 'Error : unresolved (paired) index ',k,idx,i,nr,nc, ilv(k),ilv(idx) + end if + else + write(0,*) 'Error : unresolved (paired) index ',k,idx,i,nr,nc, ilv(k),ilv(idx) + end if + end if + if (ilaggr(k) <0) then + write(0,*) 'Matchboxp: Funny number: ',k,ilv(k),ilaggr(k),wtemp(k) + end if + end do + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Done building the tentative prol:' + end if + + + if (dump_mate) then + block + integer(psb_lpk_), allocatable :: glaggr(:) + write(aname,'(a,i3.3,a,i3.3,a)') 'mateg-',cnt,'-p',iam,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + do k=1, nr + write(20,'(3(I8,1X))') ilv(k),ilv(mate(k)) + end do + close(20) + write(aname,'(a,i3.3,a,i3.3,a)') 'nloc-',cnt,'-p',iam,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + write(20,'(a,I12,a)') 'nlpairs ',nlpairs + write(20,'(a,I12,a)') 'nlsingl ',nlsingl + write(20,'(a,I12,a)') 'nlaggr(iam) ',nlaggr(iam) + close(20) + write(aname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'ilaggr-',cnt,'-i',iam,'-p',np,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + do k=1, nr + write(20,'(3(I8,1X))') ilv(k),ilaggr(k) + end do + close(20) + + write(aname,'(a,i3.3,a,i3.3,a)') 'glaggr-',cnt,'-p',np,'.mtx' + call psb_gather(glaggr,ilaggr,desc_a,info,root=izero) + if (iam==0) call mm_array_write(glaggr,'Aggregates ',info,filename=aname) + + cnt=cnt+1 + end block + end if + block + integer(psb_lpk_) :: v(3) + v(1) = nunmatched + v(2) = nlsingl + v(3) = nlpairs + call psb_sum(ictxt,v) + nunmatched = v(1) + nlsingl = v(2) + nlpairs = v(3) + +!!$ call psb_sum(ictxt,nunmatched) +!!$ call psb_sum(ictxt,nlsingl) +!!$ call psb_sum(ictxt,nlpairs) + end block + if (iam == 0) then + write(0,*) 'Matching statistics: Unmatched nodes ',& + & nunmatched,' Singletons:',nlsingl,' Pairs:',nlpairs + end if + + if (display_out_) then + block + integer(psb_ipk_) :: idx + ! + ! And finally print out + ! + do i=0,np-1 + call psb_barrier(ictxt) + if (iam == i) then + write(0,*) 'Process ', iam,' hosts aggregates: (',naggrm1+1,' : ',naggrp1-1,')' + do k=1, nr + idx = mate(k) + kg = ilv(k) + if (idx >0) then + idxg = ilv(idx) + else + idxg = -1 + end if + if (idx < 0) then + write(0,*) kg,': singleton (',kg,' ( Proc',iam,') ) into aggregate => ', ilaggr(k) + else if (idx <= nr) then + write(0,*) kg,': paired with (',idxg,' ( Proc',iam,') ) into aggregate => ', ilaggr(k) + else + call desc_a%indxmap%qry_halo_owner(idx,iown,info) + write(0,*) kg,': paired with (',idxg,' ( Proc',iown,') ) into aggregate => ', ilaggr(k) + end if + end do + flush(0) + end if + end do + end block + end if + + ! Dirty trick: allocate tmpcoo with local + ! number of aggregates, then change to ntaggr. + ! Just to make sure the allocation is not global + lnr = nr + call tmpcoo%allocate(lnr,nlaggr(iam),lnr) + k = 0 + do i=1,nr + ! + ! Note: at this point, a value ilaggr(i)<=0 + ! tags an unaggregated row, and it has to be + ! left alone (i.e.: it should stay at fine level only) + ! + if (ilaggr(i)>0) then + k = k + 1 + tmpcoo%val(k) = wtemp(i) + tmpcoo%ia(k) = i + tmpcoo%ja(k) = ilaggr(i) + end if + end do + call tmpcoo%set_nzeros(k) + call tmpcoo%set_dupl(psb_dupl_add_) + call tmpcoo%set_sorted() ! This is now in row-major + + if (display_out_) then + call psb_barrier(ictxt) + flush(0) + + if (iam == 0) write(0,*) 'Prolongator: ' + flush(0) + do i=0,np-1 + call psb_barrier(ictxt) + if (iam == i) then + do k=1, nr + write(0,*) ilv(tmpcoo%ia(k)),tmpcoo%ja(k), tmpcoo%val(k) + end do + flush(0) + end if + end do + end if + + call prol%mv_from(tmpcoo) + if (do_timings) call psb_toc(idx_phase3) + + if (print_out_) then + write(aname,'(a,i3.3,a)') 'prol-g-',iam,'.mtx' + call prol%print(fname=aname,head='Test ',ivr=ilv) + write(aname,'(a,i3.3,a)') 'prol-',iam,'.mtx' + call prol%print(fname=aname,head='Test ') + end if + + else + write(0,*) iam,' : error from Matching: ',info + end if + + end subroutine dmatchboxp_build_prol + + function i_daggr_assign(iam, iown, kg, idxg, wk, widx, nrmagg) & + & result(iproc) + ! + ! How to break ties? This + ! must be a symmetric function, i.e. + ! the result value iproc MUST be the same upon + ! swapping simultaneously all pairs + ! (iam,iown) (kg,idxg) (wk,widx) + ! + implicit none + integer(psb_ipk_) :: iproc + integer(psb_ipk_), intent(in) :: iam, iown + integer(psb_lpk_), intent(in) :: kg, idxg + real(psb_dpk_), intent(in) :: wk, widx, nrmagg + ! + integer(psb_lpk_) :: kg2, idxg2 + + idxg2 = mod(idxg,2) + kg2 = mod(kg,2) + + ! + ! In this particular tie-breaking rule we are ignoring WK, WIDX. + ! This should statistically entail an even spread of aggregates. + ! + if ((kg2/=0).and.(idxg2/=0)) then + ! If both global indices are odd, + ! assign to the higher number process + iproc = max(iam,iown) + + else if ((kg2==0).and.(idxg2==0)) then + ! If both global indices are even, + ! assign to the lower number process; + iproc = min(iam,iown) + else + ! If the global indices are mixed, + ! then assign to the owner of the odd index + if (kg2 /= 0) then + iproc = iam + else + iproc = iown + end if + end if + end function i_daggr_assign + + + subroutine dbuild_matching(w,a,desc_a,mate,info,display_inp, symmetrize) + use psb_base_mod + use psb_util_mod + use iso_c_binding + implicit none + real(psb_dpk_) :: w(:) + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: mate(:) + integer(psb_ipk_), intent(out) :: info + logical, optional :: display_inp, symmetrize + + type(psb_dspmat_type) :: ahatnd + type(psb_ld_csr_sparse_mat) :: tcsr + type(psb_desc_type) :: desc_blk + integer(psb_lpk_), allocatable :: vnl(:) + integer(psb_lpk_), allocatable :: ph1crd(:), ph2crd(:) + integer(psb_lpk_), allocatable :: msgis(:),msgas(:) + real(psb_dpk_), allocatable :: msgprc(:) + real(psb_dpk_) :: ph0t,ph1t,ph2t + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: iam, np + integer(psb_lpk_) :: nr, nc, nz, i, nunmatch + integer(psb_ipk_), save :: cnt=2 + logical, parameter :: debug=.false., dump_ahat=.false., debug_sync=.false. + logical, parameter :: old_style=.false., sort_minp=.true. + character(len=40) :: name='build_matching', fname + integer(psb_ipk_), save :: idx_cmboxp=-1, idx_bldahat=-1, idx_phase2=-1, idx_phase3=-1 + logical, parameter :: do_timings=.true. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,iam,np) + + if (debug) write(0,*) iam,' buildmatching into build_ahat:' + if ((do_timings).and.(idx_bldahat==-1)) & + & idx_bldahat = psb_get_timer_idx("BLD_MTCH: bld_ahat") + if ((do_timings).and.(idx_cmboxp==-1)) & + & idx_cmboxp = psb_get_timer_idx("BLD_MTCH: PMatchBox") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("BLD_MTCH: phase2") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("BLD_MTCH: phase3") + + if (debug) write(0,*) iam,' buildmatching from cd_renum:',info + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Into build_ahat:' + end if + if (do_timings) call psb_tic(idx_bldahat) + call build_ahat(w,a,ahatnd,desc_a,info,symmetrize=symmetrize) + if (do_timings) call psb_toc(idx_bldahat) + if (info /= 0) then + write(0,*) 'Error from build_ahat ', info + end if + if (debug) write(0,*) iam,' buildmatching from build_ahat:',info + if (dump_ahat) then + block + character(len=40) :: fname + integer(psb_ipk_) :: k, nr + type(psb_ldspmat_type) :: tmp_mat + integer(psb_lpk_), allocatable :: ilv(:) + if (.false.) then + ilv = desc_a%get_global_indices(owned=.false.) + write(fname,'(a,i3.3,a,i3.3,a)') 'w-i',cnt,'-p',iam,'.mtx' + open(20,file=fname) + write(20,'(a,I3,a)') '% W vector ',iam,' ' + do k=1, nr + write(20,'(I8,1X,es26.18)') ilv(k),w(k) + end do + close(20) + write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',cnt,'-p',iam,'.mtx' +!!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) + write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-i',cnt,'-p',iam,'.mtx' + call ahatnd%print(fname=fname,head='Input to matching ') + else + write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-i',cnt,'-p',np,'.mtx' + call psb_gather(tmp_mat,ahatnd,desc_a,info,root=izero) + if (iam == 0) call tmp_mat%print(fname=fname,head='Input to matching ') + end if + + end block + end if + if (do_timings) call psb_tic(idx_phase2) + ! + ! Now AHATND should be symmetric and positive, without a diagonal. + ! Almost ready to call matching routine. + ! + call ahatnd%mv_to(tcsr) + nr = tcsr%get_nrows() + nc = tcsr%get_ncols() + nz = tcsr%get_nzeros() + allocate(vnl(0:np),mate(max(nr,nc)),& + & msgis(np),msgas(np),msgprc(np),& + & ph1crd(np),ph2crd(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + vnl = 0 + vnl(iam) = nr + call psb_sum(ictxt,vnl) + vnl(1:np) = vnl(0:np-1) + vnl(0) = 1 + do i=1,np + vnl(i) = vnl(i-1)+vnl(i) + end do + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' renum_blk' + end if + + call psb_cd_renum_block(desc_a,desc_blk,info) + associate (vlptr => tcsr%irp, vlind => tcsr%ja, ewght => tcsr%val) + ! Put column indices in global numbering + call psb_loc_to_glob(vlind,desc_blk,info,iact='E') + mate = -1 + + if (sort_minp) then + block + integer(psb_ipk_) :: ir1,ir2,nrz + do i=1,nr + ir1 = vlptr(i) + ir2 = vlptr(i+1) -1 + nrz = (ir2-ir1+1) + call fix_order(nrz,vlind(ir1:ir2),ewght(ir1:ir2),info) + end do + end block + end if + + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' into PMatchBox' + end if + if (do_timings) call psb_toc(idx_phase2) + ! + ! Now call matching! + ! + if (debug) write(0,*) iam,' buildmatching into PMatchBox:' + if (do_timings) call psb_tic(idx_cmboxp) + call PMatchBox(nr,nz,vlptr,vlind,ewght,& + & vnl, mate, iam, np,ictxt,& + & msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp) + if (do_timings) call psb_toc(idx_cmboxp) + if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info + if (debug_sync) then + call psb_max(ictxt,info) + if (iam == 0) write(0,*)' done PMatchBox', info + end if + end associate + if (do_timings) call psb_tic(idx_phase3) + nunmatch = count(mate(1:nr)<=0) + call psb_glob_to_loc(mate(1:nr),desc_blk,info,iact='I',owned=.false.) + nunmatch = abs(nunmatch - count(mate(1:nr)<=0)) + ! call psb_sum(ictxt,nunmatch) + if (nunmatch /= 0) write(0,*) iam,' Unmatched nodes local imbalance ',nunmatch +!!$ if (count(mate(1:nr)<0) /= nunmatch) write(0,*) iam,' Matching results ?',& +!!$ & nunmatch, count(mate(1:nr)<0) + call desc_blk%free(info) + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' done build_matching ' + end if + + if (dump_ahat) then + block + integer(psb_lpk_), allocatable :: mg(:), ml(:) + real(psb_dpk_), allocatable :: wl(:), wg(:) + ml=mate + call psb_loc_to_glob(ml(1:nr),desc_a,info,iact='E') + write(fname,'(a,i3.3,a,i3.3,a)') 'mate-i',cnt,'-p',np,'.mtx' + call psb_gather(mg,ml,desc_a,info,root=izero) + if (iam==0) call mm_array_write(mg,'Output from matching ',info,filename=fname) + wl=w + write(fname,'(a,i3.3,a,i3.3,a)') 'w-',cnt,'-p',np,'.mtx' + call psb_gather(wg,wl,desc_a,info,root=izero) + if (iam==0) call mm_array_write(wg,'Input smooth vector ',info,filename=fname) + + end block + cnt = cnt + 1 + end if + if (do_timings) call psb_toc(idx_phase3) + return + +9999 continue + call psb_error(ictxt) + contains + subroutine fix_order(n,ja,val,iret) + use psb_base_mod + implicit none + integer(psb_ipk_), intent(in) :: n + integer(psb_lpk_), intent(inout) :: ja(:) + real(psb_dpk_), intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: iret + integer(psb_lpk_), allocatable :: ix(:) + real(psb_dpk_), allocatable :: tmp(:) + + allocate(ix(n), tmp(n),stat=iret) + if (iret /= 0) return + call psb_msort(ja(1:n),ix=ix,dir=psb_sort_up_) + tmp(1:n) = val(ix(1:n)) + val(1:n) = tmp(1:n) + end subroutine fix_order + + end subroutine dbuild_matching + + subroutine dbuild_ahat(w,a,ahat,desc_a,info,symmetrize) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: w(:) + type(psb_dspmat_type), intent(inout) :: a + type(psb_dspmat_type), intent(out) :: ahat + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, optional :: symmetrize + + type(psb_dspmat_type) :: atnd + type(psb_d_coo_sparse_mat) :: tcoo1, tcoo2, tcoo3 + real(psb_dpk_), allocatable :: diag(:) + integer(psb_lpk_), allocatable :: ilv(:) + logical, parameter :: debug=.false., dump=.false., dump_ahat=.false., dump_inp=.false. + logical :: symmetrize_ + real(psb_dpk_) :: aii, ajj, aij, wii, wjj, tmp1, tmp2, minabs, edgnrm + integer(psb_ipk_) :: nr, nc, nz, i, nz2, nrg, ii, jj, k, k2, ncols + integer(psb_lpk_) :: nzglob + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + character(len=80) :: aname + real(psb_dpk_), parameter :: eps=epsilon(1.d0) + integer(psb_ipk_), save :: idx_glbt=-1, idx_phase1=-1, idx_phase2=-1 + logical, parameter :: do_timings=.true. + logical, parameter :: debug_symmetry = .false., check_size=.false. + logical, parameter :: unroll_logtrans=.false. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,me,np) + if (present(symmetrize)) then + symmetrize_ = symmetrize + else + symmetrize_ = .false. + end if + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("BLD_AHAT: phase1 ") + if ((do_timings).and.(idx_glbt==-1)) & + & idx_glbt = psb_get_timer_idx("BLD_AHAT: glob_transp") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("BLD_AHAT: phase2 ") + if (do_timings) call psb_tic(idx_phase1) + if (dump_inp) then + block + type(psb_ldspmat_type) :: amglob + write(aname,'(a,i3.3,a)') 'a-bld-inp-',me,'.mtx' + call a%print(fname=aname,head='Test ') + call psb_gather(amglob,a,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'a-bld-inp-g-',amglob%get_nrows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + end block + end if + + ! + ! Extract diagonal of A + ! + call a%cp_to(tcoo1) +!!$ call a%triu(atnd,info) +!!$ call atnd%mv_to(tcoo1) + nr = tcoo1%get_nrows() + nc = tcoo1%get_ncols() + nz = tcoo1%get_nzeros() + diag = a%get_diag(info) + call psb_realloc(nc,diag,info) + call psb_halo(diag,desc_a,info) + + ncols = desc_a%get_local_cols() + call psb_realloc(ncols,ilv,info) + do i=1, ncols + ilv(i) = i + end do + call desc_a%l2gip(ilv,info,owned=.false.) + nr = tcoo1%get_nrows() + nc = tcoo1%get_ncols() + nz = tcoo1%get_nzeros() + call tcoo2%allocate(nr,nc,int(1.25*nz)) + k2 = 0 + ! + ! Build the entries of \^A for matching + ! + minabs = huge(dzero) + do k = 1, nz + ii = tcoo1%ia(k) + jj = tcoo1%ja(k) + ! + ! Run over only the upper triangle, then transpose. + ! In theory, A was SPD; in practice, since we + ! are building the hierarchy with Galerkin products P^T A P, + ! A will be affected by round-off + ! + if (ilv(ii) eps) then + tcoo2%val(k2) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) + else + tcoo2%val(k2) = eps + end if +!!$ tcoo2%val(k2) = abs( tcoo2%val(k2) ) +!!$ minabs = min(minabs, tcoo2%val(k2) ) + end if + !else + ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc + !end if + end do + call tcoo2%set_nzeros(k2) + + if (dump) then + block + type(psb_ldspmat_type) :: amglob + call ahat%cp_from(tcoo2) + call psb_gather(amglob,ahat,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-gu-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(aname,'(a,i0,a,i0,a)') 'ahat-gu-',desc_a%get_global_rows(),'-p',me,'.mtx' + call ahat%print(fname=aname,head='Test ',iv=ilv) + write(aname,'(a,i0,a,i0,a)') 'ainp-ahat-',desc_a%get_global_rows(),'-p',me,'.mtx' + call a%print(fname=aname,head='Test ',iv=ilv) + call psb_gather(amglob,a,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ainp-ahat-g-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(0,*) 'Done build_ahat' + end block + end if + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_glbt) + + call psb_glob_transpose(tcoo2,desc_a,info,atrans=tcoo1) + if (do_timings) call psb_toc(idx_glbt) + if (do_timings) call psb_tic(idx_phase2) + if (dump) then + block + type(psb_ldspmat_type) :: amglob + call atnd%cp_from(tcoo1) + call psb_gather(amglob,atnd,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-gl-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(aname,'(a,i0,a,i0,a)') 'ahat-gl-',desc_a%get_global_rows(),'-p',me,'.mtx' + call ahat%print(fname=aname,head='Test ',iv=ilv) + write(0,*) 'Done build_ahat' + end block + end if + + nz = tcoo1%get_nzeros() + nz2 = tcoo2%get_nzeros() + call tcoo2%ensure_size(nz+nz2) + tcoo2%ia(nz2+1:nz2+nz) = tcoo1%ia(1:nz) + tcoo2%ja(nz2+1:nz2+nz) = tcoo1%ja(1:nz) + tcoo2%val(nz2+1:nz2+nz) = tcoo1%val(1:nz) + call tcoo2%set_nzeros(nz+nz2) + call tcoo2%fix(info) + call tcoo1%free() + nz = tcoo2%get_nzeros() + if (unroll_logtrans) then + minabs = huge(dzero) + do k = 1, nz + tcoo2%val(k) = abs(tcoo2%val(k)) + minabs = min(minabs,tcoo2%val(k)) + end do + else + minabs = minval(abs(tcoo2%val(1:nz))) + end if + call psb_min(ictxt,minabs) + if (minabs <= dzero) then + if (me == 0) write(0,*) me, 'Min value for log correction is <=zero! ' + minabs = done + end if + if (unroll_logtrans) then + do k = 1, nz + ! tcoo2%val has already been subject to abs() above. + tcoo2%val(k) = log(tcoo2%val(k)/(0.999*minabs)) + if (tcoo2%val(k)<0) write(0,*) me, 'Warning: negative log output!' + end do + else + tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) + if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' + end if + + call ahat%mv_from(tcoo2) + + if (do_timings) call psb_toc(idx_phase2) + + if (check_size) then + nzglob = ahat%get_nzeros() + call psb_sum(ictxt,nzglob) + if (me==0) write(0,*) 'From build_ahat: global nzeros ',desc_a%get_global_rows(),nzglob + end if + if (debug_symmetry) then + block + integer(psb_ipk_) :: nz1, nz2 + real(psb_dpk_) :: mxv + call ahat%cp_to(tcoo1) + call psb_glob_transpose(tcoo1,desc_a,info,atrans=tcoo2) + nz1 = tcoo1%get_nzeros() + nz2 = tcoo2%get_nzeros() + call tcoo1%reallocate(nz1+nz2) + tcoo1%ia(nz1+1:nz1+nz2) = tcoo2%ia(1:nz2) + tcoo1%ja(nz1+1:nz1+nz2) = tcoo2%ja(1:nz2) + tcoo1%val(nz1+1:nz1+nz2) = -tcoo2%val(1:nz2) + call tcoo1%set_nzeros(nz1+nz2) + call tcoo1%set_dupl(psb_dupl_add_) + call tcoo1%fix(info) + nz1 = tcoo1%get_nzeros() + mxv = maxval(abs(tcoo1%val(1:nz1))) + call psb_max(ictxt,mxv) + if (me==0) write(0,*) 'Maximum disp from symmetry:',mxv + end block + end if + + if (dump_ahat) then + block + type(psb_ldspmat_type) :: amglob + write(aname,'(a,i3.3,a)') 'ahat-hfa-l-',me,'.mtx' + call ahat%print(fname=aname,head='Test ') + call psb_gather(amglob,ahat,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-hfa-g-',amglob%get_nrows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(0,*) 'Done build_ahat' + end block + end if + + end subroutine dbuild_ahat + + +! subroutine build_ahat_old(w,a,ahat,desc_a,info,symmetrize) +! use psb_base_mod +! implicit none +! real(psb_dpk_) :: w(:) +! type(psb_ldspmat_type), intent(inout) :: a +! type(psb_ldspmat_type), intent(out) :: ahat +! type(psb_desc_type) :: desc_a +! integer(psb_ipk_), intent(out) :: info +! logical, optional :: symmetrize +! +! type(psb_ldspmat_type) :: atnd +! type(psb_ld_coo_sparse_mat) :: tcoo1, tcoo2, tcoo3 +! real(psb_dpk_), allocatable :: diag(:) +! integer(psb_lpk_), allocatable :: ilv(:) +! logical, parameter :: debug=.false., dump=.false., dump_ahat=.false. +! logical :: symmetrize_ +! logical, parameter :: half_ahat=.true. +! real(psb_dpk_) :: aii, ajj, aij, wii, wjj, tmp1, tmp2, minabs, edgnrm +! integer(psb_lpk_) :: nr, nc, nz, i, nz2, nrg, ii, jj, k, k2 +! type(psb_ctxt_type) :: ictxt +! integer(psb_ipk_) :: me, np +! character(len=80) :: aname +! real(psb_dpk_), parameter :: eps=epsilon(1.d0) +! +! ictxt = desc_a%get_ctxt() +! call psb_info(ictxt,me,np) +! if (present(symmetrize)) then +! symmetrize_ = symmetrize +! else +! symmetrize_ = .false. +! end if +! +! ! +! ! Extract off-diagonal part of A into ahat +! ! Extract diagonal of A +! ! +! call a%clip_diag(ahat,info) +! call ahat%mv_to(tcoo1) +! nr = tcoo1%get_nrows() +! nc = tcoo1%get_ncols() +! nz = tcoo1%get_nzeros() +! diag = a%get_diag(info) +! call psb_realloc(nc,diag,info) +! call psb_halo(diag,desc_a,info) +! +! if (half_ahat) then +! !!$ write(0,*) me,' Temp placeholder ' +! ilv = [(i,i=1,desc_a%get_local_cols())] +! call desc_a%l2gip(ilv,info,owned=.false.) +! ! +! ! At this point the matrix is symmetric, hence we will encounter +! ! all entries as appropriate. +! ! +! nr = tcoo1%get_nrows() +! nc = tcoo1%get_ncols() +! nz = tcoo1%get_nzeros() +! call tcoo2%allocate(nr,nc,nz) +! k2 = 0 +! do k = 1, nz +! ii = tcoo1%ia(k) +! jj = tcoo1%ja(k) +! ! +! ! Run over only one strict triangle +! ! +! if (ilv(ii) eps) then +! tcoo2%val(k2) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) +! else +! tcoo2%val(k2) = eps +! end if +! +! end if +! !else +! ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc +! !end if +! end do +! call tcoo2%set_nzeros(k2) +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! call ahat%cp_from(tcoo2) +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-gu-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! call psb_glob_transpose(tcoo2,desc_a,info,atrans=tcoo1) +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! call atnd%cp_from(tcoo1) +! call psb_gather(amglob,atnd,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-gl-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! nz = tcoo1%get_nzeros() +! nz2 = tcoo2%get_nzeros() +! call tcoo2%reallocate(nz+nz2) +! tcoo2%ia(nz2+1:nz2+nz) = tcoo1%ia(1:nz) +! tcoo2%ja(nz2+1:nz2+nz) = tcoo1%ja(1:nz) +! tcoo2%val(nz2+1:nz2+nz) = tcoo1%val(1:nz) +! call tcoo2%set_nzeros(nz+nz2) +! call tcoo2%fix(info) +! call tcoo1%free() +! nz = tcoo2%get_nzeros() +! minabs = minval(abs(tcoo2%val(1:nz))) +! call psb_min(ictxt,minabs) +! if (minabs == dzero) then +! if (me == 0) write(0,*) me, 'Min value for log correction is zero! ' +! minabs = done +! end if +! tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) +! if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' +! call ahat%mv_from(tcoo2) +! if (dump_ahat) then +! block +! character(len=40) :: fname +! integer(psb_ipk_) :: k, nr +! integer(psb_lpk_), allocatable :: ilv(:) +! nr = desc_a%get_local_rows() +! ilv = desc_a%get_global_indices(owned=.false.) +! write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',me,'-p',np,'.mtx' +! !!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) +! write(fname,'(a,i3.3,a,i3.3,a)') 'ah2-inp-i',me,'-p',np,'.mtx' +! call ahat%print(fname=fname,head='ahat ',iv=ilv) +! end block +! end if +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ahat-hfa-l-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! end if +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! !!$ call psb_gather(amglob,a,desc_a,info) +! !!$ if (me==psb_root_) then +! !!$ write(aname,'(a,i3.3,a)') 'a-g-',amglob%get_nrows(),'.mtx' +! !!$ call amglob%print(fname=aname,head='Test ') +! !!$ end if +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-hfa-g-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! +! else +! +! if (debug) then +! ilv = [(i,i=1,desc_a%get_local_cols())] +! call desc_a%l2gip(ilv,info,owned=.false.) +! end if +! if (symmetrize_) then +! ! Is this faster than storing by CSR, going over the +! ! upper triangle, searching for lower triangle entry and +! ! then duplicating the output? Probably yes. +! if (debug) write(0,*) me,' Build_ahat: symmetrize :',nr,nc,nz +! call ahat%cp_from(tcoo1) +! call psb_htranspose(ahat,atnd,desc_a,info) +! if (debug) write(0,*) me,' Build_ahat: done transpose' +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ainp-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! write(aname,'(a,i3.3,a)') 'atnd-',me,'.mtx' +! call atnd%print(fname=aname,head='Test ',iv=ilv) +! end if +! call ahat%free() +! call atnd%mv_to(tcoo2) +! nz2 = tcoo2%get_nzeros() +! +! if (debug) then +! write(0,*) me,':',tcoo1%get_nrows(),tcoo1%get_ncols(),tcoo1%get_nzeros(),& +! & tcoo2%get_nrows(),tcoo2%get_ncols(),tcoo2%get_nzeros() +! flush(0) +! end if +! +! call tcoo3%allocate(nr, nc, max(2*nz,nz+nz2)) +! tcoo3%ia(1:nz) = tcoo1%ia(1:nz) +! tcoo3%ja(1:nz) = tcoo1%ja(1:nz) +! tcoo3%val(1:nz) = tcoo1%val(1:nz) +! tcoo3%ia(nz+1:nz+nz2) = tcoo2%ia(1:nz2) +! tcoo3%ja(nz+1:nz+nz2) = tcoo2%ja(1:nz2) +! tcoo3%val(nz+1:nz+nz2) = tcoo2%val(1:nz2) +! call tcoo3%set_nzeros(nz+nz2) +! call tcoo3%set_dupl(psb_dupl_add_) +! call tcoo3%fix(info) +! nz = tcoo3%get_nzeros() +! tcoo3%val(1:nz) = 0.5d0 * tcoo3%val(1:nz) +! call tcoo3%mv_to_coo(tcoo1,info) +! end if +! +! if (dump_ahat) then +! block +! character(len=40) :: fname +! integer(psb_ipk_) :: k, nr +! integer(psb_lpk_), allocatable :: ilv(:) +! nr = desc_a%get_local_rows() +! ilv = desc_a%get_global_indices(owned=.false.) +! write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',me,'-p',np,'.mtx' +! !!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) +! write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-inp-i',me,'-p',np,'.mtx' +! call ahat%print(fname=fname,head='Before building_ahat ',iv=ilv) +! end block +! end if +! +! +! ! +! ! At this point the matrix is symmetric, hence we will encounter +! ! all entries as appropriate. +! ! +! call tcoo1%cp_to_coo(tcoo2,info) +! nz = tcoo1%get_nzeros() +! do k = 1, nz +! ii = tcoo1%ia(k) +! jj = tcoo1%ja(k) +! aij = tcoo1%val(k) +! !if ((ii<= nr).and.(jj<=nc).and.(ii/=jj)) then +! ! This is already guaranteed by construction +! ! +! aii = diag(ii) +! ajj = diag(jj) +! wii = w(ii) +! wjj = w(jj) +! edgnrm = aii*(wii**2) + ajj*(wjj**2) +! if (edgnrm > eps) then +! tcoo2%val(k) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) +! else +! tcoo2%val(k)=eps +! end if +! if (debug) then +! block +! integer(psb_ipk_), parameter :: nr1=329, nc1=393 +! integer(psb_ipk_), parameter :: nr2=13, nc2=77 +! integer(psb_ipk_), parameter :: nr3=313, nc3=249 +! if ( ((ilv(ii)==nr1).and.(ilv(jj)==nc1)).or.((ilv(ii)==nc1).and.(ilv(jj)==nr1))& +! &.or.((ilv(ii)==nr2).and.(ilv(jj)==nc2)).or.((ilv(ii)==nc2).and.(ilv(jj)==nr2)) & +! &.or.((ilv(ii)==nr3).and.(ilv(jj)==nc3)).or.((ilv(ii)==nc3).and.(ilv(jj)==nr3)) & +! &) then +! write(0,*)me, 'Check on ahat:',ii,jj,ilv(ii),ilv(jj),aij,aii,ajj,wii,wjj,tcoo2%val(k) +! end if +! end block +! end if +! +! !else +! ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc +! !end if +! end do +! nz = tcoo2%get_nzeros() +! minabs = minval(abs(tcoo2%val(1:nz))) +! call psb_min(ictxt,minabs) +! if (minabs == dzero) then +! if (me == 0) write(0,*) me, 'Min value for log correction is zero! ' +! minabs = done +! end if +! tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) +! if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' +! call ahat%mv_from(tcoo2) +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ahat-l-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! end if +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! !!$ call psb_gather(amglob,a,desc_a,info) +! !!$ if (me==psb_root_) then +! !!$ write(aname,'(a,i3.3,a)') 'a-g-',amglob%get_nrows(),'.mtx' +! !!$ call amglob%print(fname=aname,head='Test ') +! !!$ end if +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-g-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! end if +! +! end subroutine build_ahat_old + + subroutine psb_dgtranspose(ain,aout,desc_a,info) + use psb_base_mod + implicit none + type(psb_ldspmat_type), intent(in) :: ain + type(psb_ldspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ldspmat_type) :: atmp, ahalo, aglb + type(psb_ld_coo_sparse_mat) :: tmpcoo + type(psb_ld_csr_sparse_mat) :: tmpcsr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start gtranspose ' + end if + call ain%cscnv(tmpcsr,info) + + if (debug) then + ilv = [(i,i=1,ncol)] + call desc_a%l2gip(ilv,info,owned=.false.) + write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' + call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) + end if + if (dump) then + call ain%cscnv(atmp,info) + call psb_gather(aglb,atmp,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) + call atmp%mv_from(tmpcsr) + + if (debug) then + write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' + call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) + !call psb_set_debug_level(9999) + end if + + ! FIXME THIS NEEDS REWORKING + if (debug) write(0,*) me,' Gtranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() + call psb_sphalo(atmp,desc_a,ahalo,info,rowscale=.true.) +!!$ call psb_sphalo(atmp,desc_a,ahalo,info,& +!!$ & colcnv=.false.,rowscale=.true.) + if (debug) write(0,*) me,' Gtranspose from sphalo :',ahalo%get_nrows(),ahalo%get_ncols() +!!$ call psb_set_debug_level(0) + if (info == psb_success_) call psb_rwextd(ncol,atmp,info,b=ahalo) + + if (debug) then + write(aname,'(a,i3.3,a)') 'ahalo-',me,'.mtx' + call ahalo%print(fname=aname,head='ahalo after haloTest ',iv=ilv) + write(aname,'(a,i3.3,a)') 'atmp-h-',me,'.mtx' + call atmp%print(fname=aname,head='atmp after haloTest ',iv=ilv) + end if + + if (info == psb_success_) call ahalo%free() + + call atmp%cp_to(tmpcoo) + call tmpcoo%transp() + !call psb_glob_to_loc(tmpcoo%ia,desc_a,info,iact='I') + if (debug) write(0,*) 'Before cleanup:',tmpcoo%get_nzeros() + + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + + if (debug) write(0,*) 'After cleanup:',tmpcoo%get_nzeros() + + call ahalo%mv_from(tmpcoo) + if (dump) then + call psb_gather(aglb,ahalo,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran-preclip.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + + call ahalo%csclip(aout,info,imax=nrow) + + if (debug) write(0,*) 'After clip:',aout%get_nzeros() + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'End gtranspose ' + end if + !call aout%cscnv(info,type='csr') + + if (dump) then + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + end subroutine psb_dgtranspose + + subroutine psb_dhtranspose(ain,aout,desc_a,info) + use psb_base_mod + implicit none + type(psb_ldspmat_type), intent(in) :: ain + type(psb_ldspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_ldspmat_type) :: atmp, ahalo, aglb + type(psb_ld_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch + type(psb_ld_csr_sparse_mat) :: tmpcsr + integer(psb_ipk_) :: nz1, nz2, nzh, nz + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + call ain%cscnv(tmpcsr,info) + + if (debug) then + ilv = [(i,i=1,ncol)] + call desc_a%l2gip(ilv,info,owned=.false.) + write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' + call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) + end if + if (dump) then + call ain%cscnv(atmp,info) + call psb_gather(aglb,atmp,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) + call atmp%mv_from(tmpcsr) + + if (debug) then + write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' + call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) + !call psb_set_debug_level(9999) + end if + + ! FIXME THIS NEEDS REWORKING + if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() + if (.true.) then + call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') + call atmp%mv_to(tmpc1) + call ahalo%mv_to(tmpch) + nz1 = tmpc1%get_nzeros() + call psb_loc_to_glob(tmpc1%ia(1:nz1),desc_a,info,iact='I') + call psb_loc_to_glob(tmpc1%ja(1:nz1),desc_a,info,iact='I') + nzh = tmpch%get_nzeros() + call psb_loc_to_glob(tmpch%ia(1:nzh),desc_a,info,iact='I') + call psb_loc_to_glob(tmpch%ja(1:nzh),desc_a,info,iact='I') + nlz = nz1+nzh + call tmpcoo%allocate(ncol,ncol,nlz) + tmpcoo%ia(1:nz1) = tmpc1%ia(1:nz1) + tmpcoo%ja(1:nz1) = tmpc1%ja(1:nz1) + tmpcoo%val(1:nz1) = tmpc1%val(1:nz1) + tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) + tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) + tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) + call tmpcoo%set_nzeros(nlz) + call tmpcoo%transp() + nz = tmpcoo%get_nzeros() + call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') + call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') + if (.true.) then + call tmpcoo%clean_negidx(info) + else + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + end if + call ahalo%mv_from(tmpcoo) + call ahalo%csclip(aout,info,imax=nrow) + + else + call psb_sphalo(atmp,desc_a,ahalo,info, rowscale=.true.) +!!$ call psb_sphalo(atmp,desc_a,ahalo,info,& +!!$ & colcnv=.false.,rowscale=.true.) + if (debug) write(0,*) me,' Htranspose from sphalo :',ahalo%get_nrows(),ahalo%get_ncols() +!!$ call psb_set_debug_level(0) + if (info == psb_success_) call psb_rwextd(ncol,atmp,info,b=ahalo) + + if (debug) then + write(aname,'(a,i3.3,a)') 'ahalo-',me,'.mtx' + call ahalo%print(fname=aname,head='ahalo after haloTest ',iv=ilv) + write(aname,'(a,i3.3,a)') 'atmp-h-',me,'.mtx' + call atmp%print(fname=aname,head='atmp after haloTest ',iv=ilv) + end if + + if (info == psb_success_) call ahalo%free() + + call atmp%cp_to(tmpcoo) + call tmpcoo%transp() + !call psb_glob_to_loc(tmpcoo%ia,desc_a,info,iact='I') + if (debug) write(0,*) 'Before cleanup:',tmpcoo%get_nzeros() + if (.true.) then + call tmpcoo%clean_negidx(info) + else + + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + end if + + if (debug) write(0,*) 'After cleanup:',tmpcoo%get_nzeros() + + call ahalo%mv_from(tmpcoo) + if (dump) then + call psb_gather(aglb,ahalo,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran-preclip.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + + call ahalo%csclip(aout,info,imax=nrow) + end if + + if (debug) write(0,*) 'After clip:',aout%get_nzeros() + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'End htranspose ' + end if + !call aout%cscnv(info,type='csr') + + if (dump) then + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + end subroutine psb_dhtranspose + + subroutine dPMatchBox(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, myrank, numprocs, ictxt,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card,info,display_inp) + use psb_base_mod + implicit none + type(psb_ctxt_type) :: ictxt + integer(psb_c_ipk_), value :: myrank, numprocs + integer(psb_c_lpk_), value :: nlver,nledge + integer(psb_c_lpk_) :: verlocptr(:),verlocind(:), verdistance(:) + integer(psb_c_lpk_) :: mate(:) + integer(psb_c_lpk_) :: msgindsent(*),msgactualsent(*) + real(c_double) :: ph0_time, ph1_time, ph2_time + integer(psb_c_lpk_) :: ph1_card(*),ph2_card(*) + real(c_double) :: edgelocweight(:) + real(c_double) :: msgpercent(*) + integer(psb_ipk_) :: info, me, np + integer(psb_c_mpk_) :: icomm, mrank, mnp + logical, optional :: display_inp + ! + logical, parameter :: debug=.false., debug_out=.false., debug_sync=.false., dump_input=.false. + logical :: display_ + integer(psb_lpk_) :: i,k + integer(psb_ipk_), save :: cnt=1 + + call psb_info(ictxt,me,np) + icomm = psb_get_mpi_comm(ictxt) + mrank = psb_get_mpi_rank(ictxt,me) + mnp = np + if (present(display_inp)) then + display_ = display_inp + else + display_ = .false. + end if + + verlocptr(:) = verlocptr(:) - 1 + verlocind(:) = verlocind(:) - 1 + verdistance(:) = verdistance(:) -1 + + if (dump_input) then + block + integer(psb_ipk_) :: iout=20,info,i,j,k,nr + character(len=80) :: fname + write(fname,'(a,i4.4,a,i4.4,a)') 'verlocptr-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verlocptr ' + write(iout,*) nlver + do i=1, nlver+1 + write(iout,*) verlocptr(i) + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + write(fname,'(a,i4.4,a,i4.4,a)') 'edges-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verlocind/edgeweight ' + write(iout,*) nlver + do i=1, nlver + do j=verlocptr(i)+1,verlocptr(i+1) + write(iout,*) i-1, verlocind(j),edgelocweight(j) + end do + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + if (me==0) then + write(fname,'(a,i4.4,a,i4.4,a)') 'verdistance-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verdistance' + write(iout,*) np + do i=1, np+1 + write(iout,*) verdistance(i) + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + end if + end block + cnt = cnt + 1 + end if + + if (debug.or.display_) then + do i=0,np-1 + if (me == i) then + write(6,*) 'Process: ',me,' : Input into matching: nlver ',nlver,' nledge ',nledge + write(6,*) 'Process: ',me,' : VERDISTANCE 0-base : ',verdistance(1:np+1) + write(6,*) 'Process: ',me,' : VERLOCPTR 0-base : ',verlocptr(1:nlver+1) + write(6,*) 'Process: ',me,' : VERLOCIND 0-base : ',verlocind(1:nledge) + write(6,*) 'Process: ',me,' : EDGELOCWEIGHT : ',edgelocweight(1:nledge) + write(6,*) 'Process: ',me,' : Initial MATE : ',mate(1:nlver) + flush(6) + end if + call psb_barrier(ictxt) + end do + end if + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*)' Calling MatchBoxP ' + end if + + call MatchBoxPC(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, mrank, mnp, icomm,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card) + verlocptr(:) = verlocptr(:) + 1 + verlocind(:) = verlocind(:) + 1 + verdistance(:) = verdistance(:) + 1 + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*)' Done MatchBoxP ' + end if + + if (debug_out) then + do k=0,np-1 + if (me == k) then + write(6,*) 'Process: ',me,' : from Matching (0-base): ',info + do i=1,nlver + write(6,*) '(',i,',',mate(i),')' + !mate(i) = mate(i) +1 + ! + end do + flush(6) + end if + call psb_barrier(ictxt) + end do + end if + where(mate>=0) mate = mate + 1 + + end subroutine dPMatchBox + +end module dmatchboxp_mod diff --git a/amgprec/amg_d_parmatch_aggregator_mod.F90 b/amgprec/amg_d_parmatch_aggregator_mod.F90 new file mode 100644 index 00000000..e6cf4b1f --- /dev/null +++ b/amgprec/amg_d_parmatch_aggregator_mod.F90 @@ -0,0 +1,713 @@ +! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from amg4psblas-extension +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! The aggregator object hosts the aggregation method for building +! the multilevel hierarchy. This variant is based on the hybrid method +! presented in +! +! +! sm - class(amg_T_base_smoother_type), allocatable +! The current level preconditioner (aka smoother). +! parms - type(amg_RTml_parms) +! The parameters defining the multilevel strategy. +! ac - The local part of the current-level matrix, built by +! coarsening the previous-level matrix. +! desc_ac - type(psb_desc_type). +! The communication descriptor associated to the matrix +! stored in ac. +! base_a - type(psb_Tspmat_type), pointer. +! Pointer (really a pointer!) to the local part of the current +! matrix (so we have a unified treatment of residuals). +! We need this to avoid passing explicitly the current matrix +! to the routine which applies the preconditioner. +! base_desc - type(psb_desc_type), pointer. +! Pointer to the communication descriptor associated to the +! matrix pointed by base_a. +! map - Stores the maps (restriction and prolongation) between the +! vector spaces associated to the index spaces of the previous +! and current levels. +! +! Methods: +! Most methods follow the encapsulation hierarchy: they take whatever action +! is appropriate for the current object, then call the corresponding method for +! the contained object. +! As an example: the descr() method prints out a description of the +! level. It starts by invoking the descr() method of the parms object, +! then calls the descr() method of the smoother object. +! +! descr - Prints a description of the object. +! default - Set default values +! dump - Dump to file object contents +! set - Sets various parameters; when a request is unknown +! it is passed to the smoother object for further processing. +! check - Sanity checks. +! sizeof - Total memory occupation in bytes +! get_nzeros - Number of nonzeros +! +! + +module amg_d_parmatch_aggregator_mod + use amg_d_base_aggregator_mod + use dmatchboxp_mod + + type, extends(amg_d_base_aggregator_type) :: amg_d_parmatch_aggregator_type + integer(psb_ipk_) :: matching_alg + integer(psb_ipk_) :: n_sweeps ! When n_sweeps >1 we need an auxiliary descriptor + integer(psb_ipk_) :: orig_aggr_size + integer(psb_ipk_) :: jacobi_sweeps + real(psb_dpk_), allocatable :: w(:), w_nxt(:) + type(psb_dspmat_type), allocatable :: prol, restr + type(psb_dspmat_type), allocatable :: ac, base_a, rwa + type(psb_desc_type), allocatable :: desc_ac, desc_ax, base_desc, rwdesc + integer(psb_ipk_) :: max_csize + integer(psb_ipk_) :: max_nlevels + logical :: reproducible_matching = .false. + logical :: need_symmetrize = .false. + logical :: unsmoothed_hierarchy = .true. + contains + procedure, pass(ag) :: bld_tprol => amg_d_parmatch_aggregator_build_tprol + procedure, pass(ag) :: mat_bld => amg_d_parmatch_aggregator_mat_bld + procedure, pass(ag) :: mat_asb => amg_d_parmatch_aggregator_mat_asb + procedure, pass(ag) :: inner_mat_asb => amg_d_parmatch_aggregator_inner_mat_asb + procedure, pass(ag) :: bld_map => amg_d_parmatch_aggregator_bld_map + procedure, pass(ag) :: csetc => d_parmatch_aggr_csetc + procedure, pass(ag) :: cseti => d_parmatch_aggr_cseti + procedure, pass(ag) :: default => d_parmatch_aggr_set_default + procedure, pass(ag) :: sizeof => d_parmatch_aggregator_sizeof + procedure, pass(ag) :: update_next => d_parmatch_aggregator_update_next + procedure, pass(ag) :: bld_wnxt => d_parmatch_bld_wnxt + procedure, pass(ag) :: bld_default_w => d_bld_default_w + procedure, pass(ag) :: set_c_default_w => d_set_prm_c_default_w + procedure, pass(ag) :: descr => d_parmatch_aggregator_descr + procedure, pass(ag) :: clone => d_parmatch_aggregator_clone + procedure, pass(ag) :: free => d_parmatch_aggregator_free + procedure, nopass :: fmt => d_parmatch_aggregator_fmt + procedure, nopass :: xt_desc => amg_d_parmatch_aggregator_xt_desc + end type amg_d_parmatch_aggregator_type + +!!$ interface +!!$ subroutine glob_transpose(ain,desc_r,desc_c,atrans,desc_rx,info) +!!$ import :: psb_desc_type, psb_ld_coo_sparse_mat, psb_ipk_ +!!$ implicit none +!!$ type(psb_ld_coo_sparse_mat), intent(in) :: ain +!!$ type(psb_ld_coo_sparse_mat), intent(out) :: atrans +!!$ type(psb_desc_type), intent(inout) :: desc_r, desc_c +!!$ type(psb_desc_type), intent(out) :: desc_rx +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine glob_transpose +!!$ end interface + + interface + subroutine amg_d_parmatch_aggregator_build_tprol(ag,parms,ag_data,& + & a,desc_a,ilaggr,nlaggr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(amg_daggr_data), intent(in) :: ag_data + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:),nlaggr(:) + type(psb_ldspmat_type), intent(out) :: t_prol + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_aggregator_build_tprol + end interface + + interface + subroutine amg_d_parmatch_aggregator_mat_bld(ag,parms,a,desc_a,ilaggr,nlaggr,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_aggregator_mat_bld + end interface + + interface + subroutine amg_d_parmatch_aggregator_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_aggregator_mat_asb + end interface + + + interface + subroutine amg_d_parmatch_aggregator_inner_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) :: op_prol,op_restr + type(psb_dspmat_type), intent(inout) :: ac + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_aggregator_inner_mat_asb + end interface + + + interface + subroutine amg_d_parmatch_spmm_bld(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_spmm_bld + end interface + + interface + subroutine amg_d_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_unsmth_bld + end interface + + interface + subroutine amg_d_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_smth_bld + end interface + + interface + subroutine amg_d_parmatch_spmm_bld_ov(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data + implicit none + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_spmm_bld_ov + end interface + + interface + subroutine amg_d_parmatch_spmm_bld_inner(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_d_parmatch_aggregator_type, psb_desc_type, psb_dspmat_type,& + & psb_ldspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_dml_parms, amg_daggr_data,& + & psb_d_csr_sparse_mat, psb_ld_csr_sparse_mat + implicit none + type(psb_d_csr_sparse_mat), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_d_parmatch_spmm_bld_inner + end interface + +!!$ interface +!!$ Subroutine amg_d_p_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) +!!$ import +!!$ Implicit None +!!$ type(psb_ld_csr_sparse_mat),intent(in) :: acsr +!!$ type(psb_ld_csr_sparse_mat),intent(inout) :: bcsr +!!$ type(psb_ld_csr_sparse_mat),intent(out) :: ccsr +!!$ type(psb_desc_type),intent(in) :: desc_a +!!$ type(psb_desc_type),intent(inout) :: desc_c +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_), intent(in), optional :: data +!!$ end Subroutine amg_d_p_csr_spspmm +!!$ end interface + + private :: is_legal_malg, is_legal_csize, is_legal_nsweeps, is_legal_nlevels + +contains + + subroutine d_bld_default_w(ag,nr) + use psb_realloc_mod + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_ipk_), intent(in) :: nr + integer(psb_ipk_) :: info + call psb_realloc(nr,ag%w,info) + if (info /= psb_success_) return + ag%w = done + !call ag%set_c_default_w() + end subroutine d_bld_default_w + + subroutine d_set_prm_c_default_w(ag) + use psb_realloc_mod + use iso_c_binding + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_ipk_) :: info + + !write(0,*) 'prm_c_deafult_w ' + call psb_safe_ab_cpy(ag%w,ag%w_nxt,info) + + end subroutine d_set_prm_c_default_w + + subroutine d_parmatch_bld_wnxt(ag,ilaggr,valaggr,nx) + use psb_realloc_mod + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_lpk_), intent(in) :: ilaggr(:) + real(psb_dpk_), intent(in) :: valaggr(:) + integer(psb_ipk_), intent(in) :: nx + + integer(psb_ipk_) :: info,i,j + + ! The vector was already fixed in the call to BCMatch. + !write(0,*) 'Executing bld_wnxt ',nx + call psb_realloc(nx,ag%w_nxt,info) + + end subroutine d_parmatch_bld_wnxt + + function d_parmatch_aggregator_fmt() result(val) + implicit none + character(len=32) :: val + + val = "Parallel Matching aggregation" + end function d_parmatch_aggregator_fmt + + function amg_d_parmatch_aggregator_xt_desc() result(val) + implicit none + logical :: val + + val = .true. + end function amg_d_parmatch_aggregator_xt_desc + + function d_parmatch_aggregator_sizeof(ag) result(val) + use psb_realloc_mod + implicit none + class(amg_d_parmatch_aggregator_type), intent(in) :: ag + integer(psb_epk_) :: val + + val = 4 + val = val + psb_size(ag%w) + psb_size(ag%w_nxt) + if (allocated(ag%ac)) val = val + ag%ac%sizeof() + if (allocated(ag%base_a)) val = val + ag%base_a%sizeof() + if (allocated(ag%prol)) val = val + ag%prol%sizeof() + if (allocated(ag%restr)) val = val + ag%restr%sizeof() + if (allocated(ag%desc_ac)) val = val + ag%desc_ac%sizeof() + if (allocated(ag%base_desc)) val = val + ag%base_desc%sizeof() + if (allocated(ag%desc_ax)) val = val + ag%desc_ax%sizeof() + + end function d_parmatch_aggregator_sizeof + + subroutine d_parmatch_aggregator_descr(ag,parms,iout,info) + implicit none + class(amg_d_parmatch_aggregator_type), intent(in) :: ag + type(amg_dml_parms), intent(in) :: parms + integer(psb_ipk_), intent(in) :: iout + integer(psb_ipk_), intent(out) :: info + + write(iout,*) 'Parallel Matching Aggregator' + write(iout,*) ' Number of matching sweeps: ',ag%n_sweeps + write(iout,*) ' Matching algorithm : MatchBoxP (PREIS)' + write(iout,*) 'Aggregator object type: ',ag%fmt() + call parms%mldescr(iout,info) + + return + end subroutine d_parmatch_aggregator_descr + + function is_legal_malg(alg) result(val) + logical :: val + integer(psb_ipk_) :: alg + + val = (0==alg) + end function is_legal_malg + + function is_legal_csize(csize) result(val) + logical :: val + integer(psb_ipk_) :: csize + + val = ((-1==csize).or.(csize >0)) + end function is_legal_csize + + function is_legal_nsweeps(nsw) result(val) + logical :: val + integer(psb_ipk_) :: nsw + + val = (1<=nsw) + end function is_legal_nsweeps + + function is_legal_nlevels(nlv) result(val) + logical :: val + integer(psb_ipk_) :: nlv + + val = (1<=nlv) + end function is_legal_nlevels + + + subroutine d_parmatch_aggregator_update_next(ag,agnext,info) + use psb_realloc_mod + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + class(amg_d_base_aggregator_type), target, intent(inout) :: agnext + integer(psb_ipk_), intent(out) :: info + + ! + ! + select type(agnext) + class is (amg_d_parmatch_aggregator_type) + if (.not.is_legal_malg(agnext%matching_alg)) & + & agnext%matching_alg = ag%matching_alg + if (.not.is_legal_nsweeps(agnext%n_sweeps))& + & agnext%n_sweeps = ag%n_sweeps + if (.not.is_legal_csize(agnext%max_csize))& + & agnext%max_csize = ag%max_csize + if (.not.is_legal_nlevels(agnext%max_nlevels))& + & agnext%max_nlevels = ag%max_nlevels + ! Is this going to generate shallow copies/memory leaks/double frees? + ! To be investigated further. + call psb_safe_ab_cpy(ag%w_nxt,agnext%w,info) + call agnext%set_c_default_w() + if (ag%unsmoothed_hierarchy) then + agnext%unsmoothed_hierarchy = .true. + call move_alloc(ag%rwdesc,agnext%base_desc) + call move_alloc(ag%rwa,agnext%base_a) + end if + + class default + ! What should we do here? + end select + info = 0 + end subroutine d_parmatch_aggregator_update_next + + subroutine d_parmatch_aggr_csetc(ag,what,val,info,idx) + + Implicit None + + ! Arguments + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + character(len=*), intent(in) :: what + character(len=*), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: idx + integer(psb_ipk_) :: err_act, iwhat + character(len=20) :: name='d_parmatch_aggr_cseti' + info = psb_success_ + + ! For now we ignore IDX + + select case(psb_toupper(trim(what))) + case('PRMC_REPRODUCIBLE_MATCHING') + select case(psb_toupper(trim(val))) + case('F','FALSE') + ag%reproducible_matching = .false. + case('REPRODUCIBLE','TRUE','T') + ag%reproducible_matching =.true. + end select + case('PRMC_NEED_SYMMETRIZE') + select case(psb_toupper(trim(val))) + case('FALSE','F') + ag%need_symmetrize = .false. + case('SYMMETRIZE','TRUE','T') + ag%need_symmetrize =.true. + end select + case('PRMC_UNSMOOTHED_HIERARCHY') + select case(psb_toupper(trim(val))) + case('F','FALSE') + ag%unsmoothed_hierarchy = .false. + case('T','TRUE') + ag%unsmoothed_hierarchy =.true. + end select + case default + ! Do nothing + end select + return + end subroutine d_parmatch_aggr_csetc + + subroutine d_parmatch_aggr_cseti(ag,what,val,info,idx) + + Implicit None + + ! Arguments + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: idx + integer(psb_ipk_) :: err_act, iwhat + character(len=20) :: name='d_parmatch_aggr_cseti' + info = psb_success_ + + ! For now we ignore IDX + + select case(psb_toupper(trim(what))) + case('PRMC_MATCH_ALG') + ag%matching_alg=val + case('PRMC_SWEEPS') + ag%n_sweeps=val + case('AGGR_SIZE') + ag%orig_aggr_size = val + ag%n_sweeps=max(1,ceiling(log(val*1.0)/log(2.0))) + case('PRMC_MAX_CSIZE') + ag%max_csize=val + case('PRMC_MAX_NLEVELS') + ag%max_nlevels=val + case('PRMC_W_SIZE') + call ag%bld_default_w(val) + case('PRMC_REPRODUCIBLE_MATCHING') + ag%reproducible_matching = (val == 1) + case('PRMC_NEED_SYMMETRIZE') + ag%need_symmetrize = (val == 1) + case('PRMC_UNSMOOTHED_HIERARCHY') + ag%unsmoothed_hierarchy = (val == 1) + case default + ! Do nothing + end select + return + end subroutine d_parmatch_aggr_cseti + + subroutine d_parmatch_aggr_set_default(ag) + + Implicit None + + ! Arguments + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + character(len=20) :: name='d_parmatch_aggr_set_default' + ag%matching_alg = 0 + ag%n_sweeps = 1 + ag%jacobi_sweeps = 0 + ag%max_nlevels = 36 + ag%max_csize = -1 + ! + ! Apparently BootCMatch works better + ! by keeping all entries + ! + ag%do_clean_zeros = .false. + + return + + end subroutine d_parmatch_aggr_set_default + + subroutine d_parmatch_aggregator_free(ag,info) + use iso_c_binding + implicit none + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + integer(psb_ipk_), intent(out) :: info + + info = 0 + if ((info == 0).and.allocated(ag%w)) deallocate(ag%w,stat=info) + if ((info == 0).and.allocated(ag%w_nxt)) deallocate(ag%w_nxt,stat=info) + if ((info == 0).and.allocated(ag%prol)) then + call ag%prol%free(); deallocate(ag%prol,stat=info) + end if + if ((info == 0).and.allocated(ag%restr)) then + call ag%restr%free(); deallocate(ag%restr,stat=info) + end if + if ((info == 0).and.allocated(ag%ac)) then + call ag%ac%free(); deallocate(ag%ac,stat=info) + end if + if ((info == 0).and.allocated(ag%base_a)) then + call ag%base_a%free(); deallocate(ag%base_a,stat=info) + end if + if ((info == 0).and.allocated(ag%rwa)) then + call ag%rwa%free(); deallocate(ag%rwa,stat=info) + end if + if ((info == 0).and.allocated(ag%desc_ac)) then + call ag%desc_ac%free(info); deallocate(ag%desc_ac,stat=info) + end if + if ((info == 0).and.allocated(ag%desc_ax)) then + call ag%desc_ax%free(info); deallocate(ag%desc_ax,stat=info) + end if + if ((info == 0).and.allocated(ag%base_desc)) then + call ag%base_desc%free(info); deallocate(ag%base_desc,stat=info) + end if + if ((info == 0).and.allocated(ag%rwdesc)) then + call ag%rwdesc%free(info); deallocate(ag%rwdesc,stat=info) + end if + + end subroutine d_parmatch_aggregator_free + + subroutine d_parmatch_aggregator_clone(ag,agnext,info) + implicit none + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + class(amg_d_base_aggregator_type), allocatable, intent(inout) :: agnext + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(agnext)) then + call agnext%free(info) + if (info == 0) deallocate(agnext,stat=info) + end if + if (info /= 0) return + allocate(agnext,source=ag,stat=info) + select type(agnext) + class is (amg_d_parmatch_aggregator_type) + call agnext%set_c_default_w() + class default + ! Should never ever get here + info = -1 + end select + end subroutine d_parmatch_aggregator_clone + + subroutine amg_d_parmatch_aggregator_bld_map(ag,desc_a,desc_ac,ilaggr,nlaggr,& + & op_restr,op_prol,map,info) + use psb_base_mod + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_desc_type), intent(in), target :: desc_a, desc_ac + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_dspmat_type), intent(inout) :: op_prol, op_restr + type(psb_dlinmap_type), intent(out) :: map + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_parmatch_aggregator_bld_map' + + call psb_erractionsave(err_act) + ! + ! Copy the prolongation/restriction matrices into the descriptor map. + ! op_restr => PR^T i.e. restriction operator + ! op_prol => PR i.e. prolongation operator + ! + ! For parmatch have an explicit copy of the descriptors + ! + if (allocated(ag%desc_ax)) then +!!$ write(0,*) 'Building linmap with ag%desc_ax ',ag%desc_ax%get_local_rows(),ag%desc_ax%get_local_cols(),& +!!$ & desc_ac%get_local_rows(),desc_ac%get_local_cols() + map = psb_linmap(psb_map_gen_linear_,ag%desc_ax,& + & desc_ac,op_restr,op_prol,ilaggr,nlaggr) + else + map = psb_linmap(psb_map_gen_linear_,desc_a,& + & desc_ac,op_restr,op_prol,ilaggr,nlaggr) + end if + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_Free') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + end subroutine amg_d_parmatch_aggregator_bld_map + +end module amg_d_parmatch_aggregator_mod diff --git a/amgprec/amg_s_matchboxp_mod.f90 b/amgprec/amg_s_matchboxp_mod.f90 new file mode 100644 index 00000000..123061f1 --- /dev/null +++ b/amgprec/amg_s_matchboxp_mod.f90 @@ -0,0 +1,1768 @@ +! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from amg4psblas-extension +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +module smatchboxp_mod + + use iso_c_binding + use psb_base_cbind_mod + + interface MatchBoxPC + subroutine sMatchBoxPC(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, myrank, numprocs, icomm,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card) bind(c,name='MatchBoxPC') + use iso_c_binding + import :: psb_c_ipk_, psb_c_lpk_, psb_c_mpk_, psb_c_epk_ + implicit none + + integer(psb_c_lpk_), value :: nlver,nledge + integer(psb_c_mpk_), value :: myrank, numprocs, icomm + integer(psb_c_lpk_) :: verlocptr(*),verlocind(*), verdistance(*) + integer(psb_c_lpk_) :: mate(*) + integer(psb_c_lpk_) :: msgindsent(*),msgactualsent(*) + real(c_double) :: ph0_time, ph1_time, ph2_time + integer(psb_c_lpk_) :: ph1_card(*),ph2_card(*) + real(c_float) :: edgelocweight(*) + real(c_double) :: msgpercent(*) + end subroutine sMatchBoxPC + end interface MatchBoxPC + + interface i_aggr_assign + module procedure i_saggr_assign + end interface i_aggr_assign + + interface build_matching + module procedure sbuild_matching + end interface build_matching + + interface build_ahat + module procedure sbuild_ahat + end interface build_ahat + + interface psb_gtranspose + module procedure psb_sgtranspose + end interface psb_gtranspose + + interface psb_htranspose + module procedure psb_shtranspose + end interface psb_htranspose + + interface PMatchBox + module procedure sPMatchBox + end interface PMatchBox + +contains + + subroutine smatchboxp_build_prol(w,a,desc_a,ilaggr,nlaggr,prol,info,& + & symmetrize,reproducible,display_inp, display_out, print_out) + use psb_base_mod + use psb_util_mod + use iso_c_binding + implicit none + real(psb_spk_), allocatable, intent(inout) :: w(:) + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:) + integer(psb_lpk_), allocatable, intent(out) :: nlaggr(:) + type(psb_lsspmat_type), intent(out) :: prol + integer(psb_ipk_), intent(out) :: info + logical, optional, intent(in) :: display_inp, display_out, reproducible + logical, optional, intent(in) :: symmetrize, print_out + + ! + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: iam, np, iown + integer(psb_ipk_) :: nr, nc, sweep, nzl, ncsave, nct, idx + integer(psb_lpk_) :: i, k, kg, idxg, ntaggr, naggrm1, naggrp1, & + & ip, nlpairs, nlsingl, nunmatched, lnr + real(psb_spk_) :: wk, widx, wmax, nrmagg + real(psb_spk_), allocatable :: wtemp(:) + integer(psb_lpk_), allocatable :: mate(:), ilv(:) + integer(psb_ipk_), save :: cnt=1 + character(len=256) :: aname + type(psb_ls_coo_sparse_mat) :: tmpcoo + logical :: display_out_, print_out_, reproducible_ + logical, parameter :: dump=.false., debug=.false., dump_mate=.false., & + & debug_ilaggr=.false., debug_sync=.false. + integer(psb_ipk_), save :: idx_bldmtc=-1, idx_phase1=-1, idx_phase2=-1, idx_phase3=-1 + logical, parameter :: do_timings=.true. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,iam,np) + + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("MBP_BLDP: phase1 ") + if ((do_timings).and.(idx_bldmtc==-1)) & + & idx_bldmtc = psb_get_timer_idx("MBP_BLDP: buil_matching") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("MBP_BLDP: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("MBP_BLDP: phase3 ") + + if (do_timings) call psb_tic(idx_phase1) + + if (present(display_out)) then + display_out_ = display_out + else + display_out_ = .false. + end if + if (present(print_out)) then + print_out_ = print_out + else + print_out_ = .false. + end if + if (present(reproducible)) then + reproducible_ = reproducible + else + reproducible_ = .false. + end if + + allocate(nlaggr(0:np-1),stat=info) + if (info /= 0) then + return + end if + + nlaggr = 0 + ilv = [(i,i=1,desc_a%get_local_cols())] + call desc_a%l2gip(ilv,info,owned=.false.) + +!!$ if (dump) then +!!$ write(aname,'(a,i3.3,a)') 'amat',iam,'.mtx' +!!$ call a%print(fname=aname,head='Test ',iv=ilv) +!!$ end if + + call psb_geall(ilaggr,desc_a,info) + ilaggr = -1 + call psb_geasb(ilaggr,desc_a,info) + nr = a%get_nrows() + nc = a%get_ncols() + if (size(w) < nc) then + call psb_realloc(nc,w,info) + end if + call psb_halo(w,desc_a,info) + + if (debug) write(0,*) iam,' buildprol into buildmatching:',& + & nr, nc + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' buildprol into buildmatching:',& + & nr, nc + end if + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_bldmtc) + call build_matching(w,a,desc_a,mate,info,display_inp=display_inp,symmetrize=symmetrize) + if (do_timings) call psb_toc(idx_bldmtc) + if (debug) write(0,*) iam,' buildprol from buildmatching:',& + & info + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' out from buildmatching:', info + end if + + if (info == 0) then + if (do_timings) call psb_tic(idx_phase2) + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Into building the tentative prol:' + end if + + call psb_geall(wtemp,desc_a,info) + wtemp = szero + call psb_geasb(wtemp,desc_a,info) + + nlaggr(iam) = 0 + nlpairs = 0 + nlsingl = 0 + nunmatched = 0 + ! + ! First sweep + ! On return from build_matching, mate has been converted to local numbering, + ! so assigning to idx is OK. + ! + do k=1, nr + idx = mate(k) + ! + ! Figure out an allocation of aggregates to processes + ! + if (idx < 0) then + ! + ! Unmatched vertex, potential singleton. + ! + nunmatched = nunmatched + 1 + if (abs(w(k)) nc) then + write(0,*) 'Impossible: mate(k) > nc' + cycle + else + + if (ilaggr(k) == -1) then + + wk = w(k) + widx = w(idx) + wmax = max(abs(wk),abs(widx)) + nrmagg = wmax*sqrt((wk/wmax)**2+(widx/wmax)**2) + if (nrmagg > epsilon(nrmagg)) then + if (idx <= nr) then + if (ilaggr(idx) == -1) then + ! Now, if both vertices are local, the aggregate is local + ! (kinda obvious). + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + ilaggr(idx) = nlaggr(iam) + wtemp(k) = w(k)/nrmagg + wtemp(idx) = w(idx)/nrmagg + end if + nlpairs = nlpairs+1 + else if (idx <= nc) then + ! + ! This pair involves a non-local vertex. + ! Set wtemp, then apply a tie-breaking algorithm + wtemp(k) = w(k)/nrmagg + idxg = ilv(idx) + kg = ilv(k) + if (reproducible_) then + ! + ! Tie-break by assigning to the lowest-index process + ! so that the numbering is the same as with NP=1 + ! + if (kg < idxg) then + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + nlpairs = nlpairs+1 + else + ilaggr(k) = -2 + end if + else + ! Use a statistically unbiased tie-breaking rule, + ! this will give an even spread. + ! Delegate to i_aggr_assign. + ! Should be a symmetric function. + ! + call desc_a%indxmap%qry_halo_owner(idx,iown,info) + ip = i_aggr_assign(iam, iown, kg, idxg, wk, widx, nrmagg) + if (iam == ip) then + nlaggr(iam) = nlaggr(iam) + 1 + ilaggr(k) = nlaggr(iam) + nlpairs = nlpairs+1 + else + ilaggr(k) = -2 + end if + end if + end if + else + if (abs(w(k))0) + ! + do k=1,nr + if (ilaggr(k) > 0) ilaggr(k) = ilaggr(k) + naggrm1 + end do + call psb_halo(ilaggr,desc_a,info) + call psb_halo(wtemp,desc_a,info) + ! Cleanup as yet unmarked entries + do k=1,nr + if (ilaggr(k) == -2) then + idx = mate(k) + if (idx > nr) then + i = ilaggr(idx) + if (i > 0) then + ilaggr(k) = i + else + write(0,*) 'Error : unresolved (paired) index ',k,idx,i,nr,nc, ilv(k),ilv(idx) + end if + else + write(0,*) 'Error : unresolved (paired) index ',k,idx,i,nr,nc, ilv(k),ilv(idx) + end if + end if + if (ilaggr(k) <0) then + write(0,*) 'Matchboxp: Funny number: ',k,ilv(k),ilaggr(k),wtemp(k) + end if + end do + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Done building the tentative prol:' + end if + + + if (dump_mate) then + block + integer(psb_lpk_), allocatable :: glaggr(:) + write(aname,'(a,i3.3,a,i3.3,a)') 'mateg-',cnt,'-p',iam,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + do k=1, nr + write(20,'(3(I8,1X))') ilv(k),ilv(mate(k)) + end do + close(20) + write(aname,'(a,i3.3,a,i3.3,a)') 'nloc-',cnt,'-p',iam,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + write(20,'(a,I12,a)') 'nlpairs ',nlpairs + write(20,'(a,I12,a)') 'nlsingl ',nlsingl + write(20,'(a,I12,a)') 'nlaggr(iam) ',nlaggr(iam) + close(20) + write(aname,'(a,i3.3,a,i3.3,a,i3.3,a)') 'ilaggr-',cnt,'-i',iam,'-p',np,'.mtx' + open(20,file=aname) + write(20,'(a,I3,a)') '% sparse vector on process ',iam,' ' + do k=1, nr + write(20,'(3(I8,1X))') ilv(k),ilaggr(k) + end do + close(20) + + write(aname,'(a,i3.3,a,i3.3,a)') 'glaggr-',cnt,'-p',np,'.mtx' + call psb_gather(glaggr,ilaggr,desc_a,info,root=izero) + if (iam==0) call mm_array_write(glaggr,'Aggregates ',info,filename=aname) + + cnt=cnt+1 + end block + end if + block + integer(psb_lpk_) :: v(3) + v(1) = nunmatched + v(2) = nlsingl + v(3) = nlpairs + call psb_sum(ictxt,v) + nunmatched = v(1) + nlsingl = v(2) + nlpairs = v(3) + +!!$ call psb_sum(ictxt,nunmatched) +!!$ call psb_sum(ictxt,nlsingl) +!!$ call psb_sum(ictxt,nlpairs) + end block + if (iam == 0) then + write(0,*) 'Matching statistics: Unmatched nodes ',& + & nunmatched,' Singletons:',nlsingl,' Pairs:',nlpairs + end if + + if (display_out_) then + block + integer(psb_ipk_) :: idx + ! + ! And finally print out + ! + do i=0,np-1 + call psb_barrier(ictxt) + if (iam == i) then + write(0,*) 'Process ', iam,' hosts aggregates: (',naggrm1+1,' : ',naggrp1-1,')' + do k=1, nr + idx = mate(k) + kg = ilv(k) + if (idx >0) then + idxg = ilv(idx) + else + idxg = -1 + end if + if (idx < 0) then + write(0,*) kg,': singleton (',kg,' ( Proc',iam,') ) into aggregate => ', ilaggr(k) + else if (idx <= nr) then + write(0,*) kg,': paired with (',idxg,' ( Proc',iam,') ) into aggregate => ', ilaggr(k) + else + call desc_a%indxmap%qry_halo_owner(idx,iown,info) + write(0,*) kg,': paired with (',idxg,' ( Proc',iown,') ) into aggregate => ', ilaggr(k) + end if + end do + flush(0) + end if + end do + end block + end if + + ! Dirty trick: allocate tmpcoo with local + ! number of aggregates, then change to ntaggr. + ! Just to make sure the allocation is not global + lnr = nr + call tmpcoo%allocate(lnr,nlaggr(iam),lnr) + k = 0 + do i=1,nr + ! + ! Note: at this point, a value ilaggr(i)<=0 + ! tags an unaggregated row, and it has to be + ! left alone (i.e.: it should stay at fine level only) + ! + if (ilaggr(i)>0) then + k = k + 1 + tmpcoo%val(k) = wtemp(i) + tmpcoo%ia(k) = i + tmpcoo%ja(k) = ilaggr(i) + end if + end do + call tmpcoo%set_nzeros(k) + call tmpcoo%set_dupl(psb_dupl_add_) + call tmpcoo%set_sorted() ! This is now in row-major + + if (display_out_) then + call psb_barrier(ictxt) + flush(0) + + if (iam == 0) write(0,*) 'Prolongator: ' + flush(0) + do i=0,np-1 + call psb_barrier(ictxt) + if (iam == i) then + do k=1, nr + write(0,*) ilv(tmpcoo%ia(k)),tmpcoo%ja(k), tmpcoo%val(k) + end do + flush(0) + end if + end do + end if + + call prol%mv_from(tmpcoo) + if (do_timings) call psb_toc(idx_phase3) + + if (print_out_) then + write(aname,'(a,i3.3,a)') 'prol-g-',iam,'.mtx' + call prol%print(fname=aname,head='Test ',ivr=ilv) + write(aname,'(a,i3.3,a)') 'prol-',iam,'.mtx' + call prol%print(fname=aname,head='Test ') + end if + + else + write(0,*) iam,' : error from Matching: ',info + end if + + end subroutine smatchboxp_build_prol + + function i_saggr_assign(iam, iown, kg, idxg, wk, widx, nrmagg) & + & result(iproc) + ! + ! How to break ties? This + ! must be a symmetric function, i.e. + ! the result value iproc MUST be the same upon + ! swapping simultaneously all pairs + ! (iam,iown) (kg,idxg) (wk,widx) + ! + implicit none + integer(psb_ipk_) :: iproc + integer(psb_ipk_), intent(in) :: iam, iown + integer(psb_lpk_), intent(in) :: kg, idxg + real(psb_spk_), intent(in) :: wk, widx, nrmagg + ! + integer(psb_lpk_) :: kg2, idxg2 + + idxg2 = mod(idxg,2) + kg2 = mod(kg,2) + + ! + ! In this particular tie-breaking rule we are ignoring WK, WIDX. + ! This should statistically entail an even spread of aggregates. + ! + if ((kg2/=0).and.(idxg2/=0)) then + ! If both global indices are odd, + ! assign to the higher number process + iproc = max(iam,iown) + + else if ((kg2==0).and.(idxg2==0)) then + ! If both global indices are even, + ! assign to the lower number process; + iproc = min(iam,iown) + else + ! If the global indices are mixed, + ! then assign to the owner of the odd index + if (kg2 /= 0) then + iproc = iam + else + iproc = iown + end if + end if + end function i_saggr_assign + + + subroutine sbuild_matching(w,a,desc_a,mate,info,display_inp, symmetrize) + use psb_base_mod + use psb_util_mod + use iso_c_binding + implicit none + real(psb_spk_) :: w(:) + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: mate(:) + integer(psb_ipk_), intent(out) :: info + logical, optional :: display_inp, symmetrize + + type(psb_sspmat_type) :: ahatnd + type(psb_ls_csr_sparse_mat) :: tcsr + type(psb_desc_type) :: desc_blk + integer(psb_lpk_), allocatable :: vnl(:) + integer(psb_lpk_), allocatable :: ph1crd(:), ph2crd(:) + integer(psb_lpk_), allocatable :: msgis(:),msgas(:) + real(psb_dpk_), allocatable :: msgprc(:) + real(psb_dpk_) :: ph0t,ph1t,ph2t + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: iam, np + integer(psb_lpk_) :: nr, nc, nz, i, nunmatch + integer(psb_ipk_), save :: cnt=2 + logical, parameter :: debug=.false., dump_ahat=.false., debug_sync=.false. + logical, parameter :: old_style=.false., sort_minp=.true. + character(len=40) :: name='build_matching', fname + integer(psb_ipk_), save :: idx_cmboxp=-1, idx_bldahat=-1, idx_phase2=-1, idx_phase3=-1 + logical, parameter :: do_timings=.true. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,iam,np) + + if (debug) write(0,*) iam,' buildmatching into build_ahat:' + if ((do_timings).and.(idx_bldahat==-1)) & + & idx_bldahat = psb_get_timer_idx("BLD_MTCH: bld_ahat") + if ((do_timings).and.(idx_cmboxp==-1)) & + & idx_cmboxp = psb_get_timer_idx("BLD_MTCH: PMatchBox") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("BLD_MTCH: phase2") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("BLD_MTCH: phase3") + + if (debug) write(0,*) iam,' buildmatching from cd_renum:',info + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' Into build_ahat:' + end if + if (do_timings) call psb_tic(idx_bldahat) + call build_ahat(w,a,ahatnd,desc_a,info,symmetrize=symmetrize) + if (do_timings) call psb_toc(idx_bldahat) + if (info /= 0) then + write(0,*) 'Error from build_ahat ', info + end if + if (debug) write(0,*) iam,' buildmatching from build_ahat:',info + if (dump_ahat) then + block + character(len=40) :: fname + integer(psb_ipk_) :: k, nr + type(psb_lsspmat_type) :: tmp_mat + integer(psb_lpk_), allocatable :: ilv(:) + if (.false.) then + ilv = desc_a%get_global_indices(owned=.false.) + write(fname,'(a,i3.3,a,i3.3,a)') 'w-i',cnt,'-p',iam,'.mtx' + open(20,file=fname) + write(20,'(a,I3,a)') '% W vector ',iam,' ' + do k=1, nr + write(20,'(I8,1X,es26.18)') ilv(k),w(k) + end do + close(20) + write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',cnt,'-p',iam,'.mtx' +!!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) + write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-i',cnt,'-p',iam,'.mtx' + call ahatnd%print(fname=fname,head='Input to matching ') + else + write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-i',cnt,'-p',np,'.mtx' + call psb_gather(tmp_mat,ahatnd,desc_a,info,root=izero) + if (iam == 0) call tmp_mat%print(fname=fname,head='Input to matching ') + end if + + end block + end if + if (do_timings) call psb_tic(idx_phase2) + ! + ! Now AHATND should be symmetric and positive, without a diagonal. + ! Almost ready to call matching routine. + ! + call ahatnd%mv_to(tcsr) + nr = tcsr%get_nrows() + nc = tcsr%get_ncols() + nz = tcsr%get_nzeros() + allocate(vnl(0:np),mate(max(nr,nc)),& + & msgis(np),msgas(np),msgprc(np),& + & ph1crd(np),ph2crd(np),stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + vnl = 0 + vnl(iam) = nr + call psb_sum(ictxt,vnl) + vnl(1:np) = vnl(0:np-1) + vnl(0) = 1 + do i=1,np + vnl(i) = vnl(i-1)+vnl(i) + end do + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' renum_blk' + end if + + call psb_cd_renum_block(desc_a,desc_blk,info) + associate (vlptr => tcsr%irp, vlind => tcsr%ja, ewght => tcsr%val) + ! Put column indices in global numbering + call psb_loc_to_glob(vlind,desc_blk,info,iact='E') + mate = -1 + + if (sort_minp) then + block + integer(psb_ipk_) :: ir1,ir2,nrz + do i=1,nr + ir1 = vlptr(i) + ir2 = vlptr(i+1) -1 + nrz = (ir2-ir1+1) + call fix_order(nrz,vlind(ir1:ir2),ewght(ir1:ir2),info) + end do + end block + end if + + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' into PMatchBox' + end if + if (do_timings) call psb_toc(idx_phase2) + ! + ! Now call matching! + ! + if (debug) write(0,*) iam,' buildmatching into PMatchBox:' + if (do_timings) call psb_tic(idx_cmboxp) + call PMatchBox(nr,nz,vlptr,vlind,ewght,& + & vnl, mate, iam, np,ictxt,& + & msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp) + if (do_timings) call psb_toc(idx_cmboxp) + if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info + if (debug_sync) then + call psb_max(ictxt,info) + if (iam == 0) write(0,*)' done PMatchBox', info + end if + end associate + if (do_timings) call psb_tic(idx_phase3) + nunmatch = count(mate(1:nr)<=0) + call psb_glob_to_loc(mate(1:nr),desc_blk,info,iact='I',owned=.false.) + nunmatch = abs(nunmatch - count(mate(1:nr)<=0)) + ! call psb_sum(ictxt,nunmatch) + if (nunmatch /= 0) write(0,*) iam,' Unmatched nodes local imbalance ',nunmatch +!!$ if (count(mate(1:nr)<0) /= nunmatch) write(0,*) iam,' Matching results ?',& +!!$ & nunmatch, count(mate(1:nr)<0) + call desc_blk%free(info) + if (debug_sync) then + call psb_barrier(ictxt) + if (iam == 0) write(0,*)' done build_matching ' + end if + + if (dump_ahat) then + block + integer(psb_lpk_), allocatable :: mg(:), ml(:) + real(psb_spk_), allocatable :: wl(:), wg(:) + ml=mate + call psb_loc_to_glob(ml(1:nr),desc_a,info,iact='E') + write(fname,'(a,i3.3,a,i3.3,a)') 'mate-i',cnt,'-p',np,'.mtx' + call psb_gather(mg,ml,desc_a,info,root=izero) + if (iam==0) call mm_array_write(mg,'Output from matching ',info,filename=fname) + wl=w + write(fname,'(a,i3.3,a,i3.3,a)') 'w-',cnt,'-p',np,'.mtx' + call psb_gather(wg,wl,desc_a,info,root=izero) + if (iam==0) call mm_array_write(wg,'Input smooth vector ',info,filename=fname) + + end block + cnt = cnt + 1 + end if + if (do_timings) call psb_toc(idx_phase3) + return + +9999 continue + call psb_error(ictxt) + contains + subroutine fix_order(n,ja,val,iret) + use psb_base_mod + implicit none + integer(psb_ipk_), intent(in) :: n + integer(psb_lpk_), intent(inout) :: ja(:) + real(psb_spk_), intent(inout) :: val(:) + integer(psb_ipk_),intent(out) :: iret + integer(psb_lpk_), allocatable :: ix(:) + real(psb_spk_), allocatable :: tmp(:) + + allocate(ix(n), tmp(n),stat=iret) + if (iret /= 0) return + call psb_msort(ja(1:n),ix=ix,dir=psb_sort_up_) + tmp(1:n) = val(ix(1:n)) + val(1:n) = tmp(1:n) + end subroutine fix_order + + end subroutine sbuild_matching + + subroutine sbuild_ahat(w,a,ahat,desc_a,info,symmetrize) + use psb_base_mod + implicit none + real(psb_spk_), intent(in) :: w(:) + type(psb_sspmat_type), intent(inout) :: a + type(psb_sspmat_type), intent(out) :: ahat + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + logical, optional :: symmetrize + + type(psb_sspmat_type) :: atnd + type(psb_s_coo_sparse_mat) :: tcoo1, tcoo2, tcoo3 + real(psb_spk_), allocatable :: diag(:) + integer(psb_lpk_), allocatable :: ilv(:) + logical, parameter :: debug=.false., dump=.false., dump_ahat=.false., dump_inp=.false. + logical :: symmetrize_ + real(psb_spk_) :: aii, ajj, aij, wii, wjj, tmp1, tmp2, minabs, edgnrm + integer(psb_ipk_) :: nr, nc, nz, i, nz2, nrg, ii, jj, k, k2, ncols + integer(psb_lpk_) :: nzglob + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + character(len=80) :: aname + real(psb_spk_), parameter :: eps=epsilon(1.d0) + integer(psb_ipk_), save :: idx_glbt=-1, idx_phase1=-1, idx_phase2=-1 + logical, parameter :: do_timings=.true. + logical, parameter :: debug_symmetry = .false., check_size=.false. + logical, parameter :: unroll_logtrans=.false. + + ictxt = desc_a%get_ctxt() + call psb_info(ictxt,me,np) + if (present(symmetrize)) then + symmetrize_ = symmetrize + else + symmetrize_ = .false. + end if + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("BLD_AHAT: phase1 ") + if ((do_timings).and.(idx_glbt==-1)) & + & idx_glbt = psb_get_timer_idx("BLD_AHAT: glob_transp") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("BLD_AHAT: phase2 ") + if (do_timings) call psb_tic(idx_phase1) + if (dump_inp) then + block + type(psb_lsspmat_type) :: amglob + write(aname,'(a,i3.3,a)') 'a-bld-inp-',me,'.mtx' + call a%print(fname=aname,head='Test ') + call psb_gather(amglob,a,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'a-bld-inp-g-',amglob%get_nrows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + end block + end if + + ! + ! Extract diagonal of A + ! + call a%cp_to(tcoo1) +!!$ call a%triu(atnd,info) +!!$ call atnd%mv_to(tcoo1) + nr = tcoo1%get_nrows() + nc = tcoo1%get_ncols() + nz = tcoo1%get_nzeros() + diag = a%get_diag(info) + call psb_realloc(nc,diag,info) + call psb_halo(diag,desc_a,info) + + ncols = desc_a%get_local_cols() + call psb_realloc(ncols,ilv,info) + do i=1, ncols + ilv(i) = i + end do + call desc_a%l2gip(ilv,info,owned=.false.) + nr = tcoo1%get_nrows() + nc = tcoo1%get_ncols() + nz = tcoo1%get_nzeros() + call tcoo2%allocate(nr,nc,int(1.25*nz)) + k2 = 0 + ! + ! Build the entries of \^A for matching + ! + minabs = huge(szero) + do k = 1, nz + ii = tcoo1%ia(k) + jj = tcoo1%ja(k) + ! + ! Run over only the upper triangle, then transpose. + ! In theory, A was SPD; in practice, since we + ! are building the hierarchy with Galerkin products P^T A P, + ! A will be affected by round-off + ! + if (ilv(ii) eps) then + tcoo2%val(k2) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) + else + tcoo2%val(k2) = eps + end if +!!$ tcoo2%val(k2) = abs( tcoo2%val(k2) ) +!!$ minabs = min(minabs, tcoo2%val(k2) ) + end if + !else + ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc + !end if + end do + call tcoo2%set_nzeros(k2) + + if (dump) then + block + type(psb_lsspmat_type) :: amglob + call ahat%cp_from(tcoo2) + call psb_gather(amglob,ahat,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-gu-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(aname,'(a,i0,a,i0,a)') 'ahat-gu-',desc_a%get_global_rows(),'-p',me,'.mtx' + call ahat%print(fname=aname,head='Test ',iv=ilv) + write(aname,'(a,i0,a,i0,a)') 'ainp-ahat-',desc_a%get_global_rows(),'-p',me,'.mtx' + call a%print(fname=aname,head='Test ',iv=ilv) + call psb_gather(amglob,a,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ainp-ahat-g-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(0,*) 'Done build_ahat' + end block + end if + if (do_timings) call psb_toc(idx_phase1) + if (do_timings) call psb_tic(idx_glbt) + + call psb_glob_transpose(tcoo2,desc_a,info,atrans=tcoo1) + if (do_timings) call psb_toc(idx_glbt) + if (do_timings) call psb_tic(idx_phase2) + if (dump) then + block + type(psb_lsspmat_type) :: amglob + call atnd%cp_from(tcoo1) + call psb_gather(amglob,atnd,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-gl-',desc_a%get_global_rows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(aname,'(a,i0,a,i0,a)') 'ahat-gl-',desc_a%get_global_rows(),'-p',me,'.mtx' + call ahat%print(fname=aname,head='Test ',iv=ilv) + write(0,*) 'Done build_ahat' + end block + end if + + nz = tcoo1%get_nzeros() + nz2 = tcoo2%get_nzeros() + call tcoo2%ensure_size(nz+nz2) + tcoo2%ia(nz2+1:nz2+nz) = tcoo1%ia(1:nz) + tcoo2%ja(nz2+1:nz2+nz) = tcoo1%ja(1:nz) + tcoo2%val(nz2+1:nz2+nz) = tcoo1%val(1:nz) + call tcoo2%set_nzeros(nz+nz2) + call tcoo2%fix(info) + call tcoo1%free() + nz = tcoo2%get_nzeros() + if (unroll_logtrans) then + minabs = huge(szero) + do k = 1, nz + tcoo2%val(k) = abs(tcoo2%val(k)) + minabs = min(minabs,tcoo2%val(k)) + end do + else + minabs = minval(abs(tcoo2%val(1:nz))) + end if + call psb_min(ictxt,minabs) + if (minabs <= szero) then + if (me == 0) write(0,*) me, 'Min value for log correction is <=zero! ' + minabs = done + end if + if (unroll_logtrans) then + do k = 1, nz + ! tcoo2%val has already been subject to abs() above. + tcoo2%val(k) = log(tcoo2%val(k)/(0.999*minabs)) + if (tcoo2%val(k)<0) write(0,*) me, 'Warning: negative log output!' + end do + else + tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) + if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' + end if + + call ahat%mv_from(tcoo2) + + if (do_timings) call psb_toc(idx_phase2) + + if (check_size) then + nzglob = ahat%get_nzeros() + call psb_sum(ictxt,nzglob) + if (me==0) write(0,*) 'From build_ahat: global nzeros ',desc_a%get_global_rows(),nzglob + end if + if (debug_symmetry) then + block + integer(psb_ipk_) :: nz1, nz2 + real(psb_spk_) :: mxv + call ahat%cp_to(tcoo1) + call psb_glob_transpose(tcoo1,desc_a,info,atrans=tcoo2) + nz1 = tcoo1%get_nzeros() + nz2 = tcoo2%get_nzeros() + call tcoo1%reallocate(nz1+nz2) + tcoo1%ia(nz1+1:nz1+nz2) = tcoo2%ia(1:nz2) + tcoo1%ja(nz1+1:nz1+nz2) = tcoo2%ja(1:nz2) + tcoo1%val(nz1+1:nz1+nz2) = -tcoo2%val(1:nz2) + call tcoo1%set_nzeros(nz1+nz2) + call tcoo1%set_dupl(psb_dupl_add_) + call tcoo1%fix(info) + nz1 = tcoo1%get_nzeros() + mxv = maxval(abs(tcoo1%val(1:nz1))) + call psb_max(ictxt,mxv) + if (me==0) write(0,*) 'Maximum disp from symmetry:',mxv + end block + end if + + if (dump_ahat) then + block + type(psb_lsspmat_type) :: amglob + write(aname,'(a,i3.3,a)') 'ahat-hfa-l-',me,'.mtx' + call ahat%print(fname=aname,head='Test ') + call psb_gather(amglob,ahat,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i0,a)') 'ahat-hfa-g-',amglob%get_nrows(),'.mtx' + call amglob%print(fname=aname,head='Test ') + end if + write(0,*) 'Done build_ahat' + end block + end if + + end subroutine sbuild_ahat + + +! subroutine build_ahat_old(w,a,ahat,desc_a,info,symmetrize) +! use psb_base_mod +! implicit none +! real(psb_dpk_) :: w(:) +! type(psb_ldspmat_type), intent(inout) :: a +! type(psb_ldspmat_type), intent(out) :: ahat +! type(psb_desc_type) :: desc_a +! integer(psb_ipk_), intent(out) :: info +! logical, optional :: symmetrize +! +! type(psb_ldspmat_type) :: atnd +! type(psb_ld_coo_sparse_mat) :: tcoo1, tcoo2, tcoo3 +! real(psb_dpk_), allocatable :: diag(:) +! integer(psb_lpk_), allocatable :: ilv(:) +! logical, parameter :: debug=.false., dump=.false., dump_ahat=.false. +! logical :: symmetrize_ +! logical, parameter :: half_ahat=.true. +! real(psb_dpk_) :: aii, ajj, aij, wii, wjj, tmp1, tmp2, minabs, edgnrm +! integer(psb_lpk_) :: nr, nc, nz, i, nz2, nrg, ii, jj, k, k2 +! type(psb_ctxt_type) :: ictxt +! integer(psb_ipk_) :: me, np +! character(len=80) :: aname +! real(psb_dpk_), parameter :: eps=epsilon(1.d0) +! +! ictxt = desc_a%get_ctxt() +! call psb_info(ictxt,me,np) +! if (present(symmetrize)) then +! symmetrize_ = symmetrize +! else +! symmetrize_ = .false. +! end if +! +! ! +! ! Extract off-diagonal part of A into ahat +! ! Extract diagonal of A +! ! +! call a%clip_diag(ahat,info) +! call ahat%mv_to(tcoo1) +! nr = tcoo1%get_nrows() +! nc = tcoo1%get_ncols() +! nz = tcoo1%get_nzeros() +! diag = a%get_diag(info) +! call psb_realloc(nc,diag,info) +! call psb_halo(diag,desc_a,info) +! +! if (half_ahat) then +! !!$ write(0,*) me,' Temp placeholder ' +! ilv = [(i,i=1,desc_a%get_local_cols())] +! call desc_a%l2gip(ilv,info,owned=.false.) +! ! +! ! At this point the matrix is symmetric, hence we will encounter +! ! all entries as appropriate. +! ! +! nr = tcoo1%get_nrows() +! nc = tcoo1%get_ncols() +! nz = tcoo1%get_nzeros() +! call tcoo2%allocate(nr,nc,nz) +! k2 = 0 +! do k = 1, nz +! ii = tcoo1%ia(k) +! jj = tcoo1%ja(k) +! ! +! ! Run over only one strict triangle +! ! +! if (ilv(ii) eps) then +! tcoo2%val(k2) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) +! else +! tcoo2%val(k2) = eps +! end if +! +! end if +! !else +! ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc +! !end if +! end do +! call tcoo2%set_nzeros(k2) +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! call ahat%cp_from(tcoo2) +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-gu-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! call psb_glob_transpose(tcoo2,desc_a,info,atrans=tcoo1) +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! call atnd%cp_from(tcoo1) +! call psb_gather(amglob,atnd,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-gl-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! nz = tcoo1%get_nzeros() +! nz2 = tcoo2%get_nzeros() +! call tcoo2%reallocate(nz+nz2) +! tcoo2%ia(nz2+1:nz2+nz) = tcoo1%ia(1:nz) +! tcoo2%ja(nz2+1:nz2+nz) = tcoo1%ja(1:nz) +! tcoo2%val(nz2+1:nz2+nz) = tcoo1%val(1:nz) +! call tcoo2%set_nzeros(nz+nz2) +! call tcoo2%fix(info) +! call tcoo1%free() +! nz = tcoo2%get_nzeros() +! minabs = minval(abs(tcoo2%val(1:nz))) +! call psb_min(ictxt,minabs) +! if (minabs == dzero) then +! if (me == 0) write(0,*) me, 'Min value for log correction is zero! ' +! minabs = done +! end if +! tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) +! if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' +! call ahat%mv_from(tcoo2) +! if (dump_ahat) then +! block +! character(len=40) :: fname +! integer(psb_ipk_) :: k, nr +! integer(psb_lpk_), allocatable :: ilv(:) +! nr = desc_a%get_local_rows() +! ilv = desc_a%get_global_indices(owned=.false.) +! write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',me,'-p',np,'.mtx' +! !!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) +! write(fname,'(a,i3.3,a,i3.3,a)') 'ah2-inp-i',me,'-p',np,'.mtx' +! call ahat%print(fname=fname,head='ahat ',iv=ilv) +! end block +! end if +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ahat-hfa-l-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! end if +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! !!$ call psb_gather(amglob,a,desc_a,info) +! !!$ if (me==psb_root_) then +! !!$ write(aname,'(a,i3.3,a)') 'a-g-',amglob%get_nrows(),'.mtx' +! !!$ call amglob%print(fname=aname,head='Test ') +! !!$ end if +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-hfa-g-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! +! +! else +! +! if (debug) then +! ilv = [(i,i=1,desc_a%get_local_cols())] +! call desc_a%l2gip(ilv,info,owned=.false.) +! end if +! if (symmetrize_) then +! ! Is this faster than storing by CSR, going over the +! ! upper triangle, searching for lower triangle entry and +! ! then duplicating the output? Probably yes. +! if (debug) write(0,*) me,' Build_ahat: symmetrize :',nr,nc,nz +! call ahat%cp_from(tcoo1) +! call psb_htranspose(ahat,atnd,desc_a,info) +! if (debug) write(0,*) me,' Build_ahat: done transpose' +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ainp-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! write(aname,'(a,i3.3,a)') 'atnd-',me,'.mtx' +! call atnd%print(fname=aname,head='Test ',iv=ilv) +! end if +! call ahat%free() +! call atnd%mv_to(tcoo2) +! nz2 = tcoo2%get_nzeros() +! +! if (debug) then +! write(0,*) me,':',tcoo1%get_nrows(),tcoo1%get_ncols(),tcoo1%get_nzeros(),& +! & tcoo2%get_nrows(),tcoo2%get_ncols(),tcoo2%get_nzeros() +! flush(0) +! end if +! +! call tcoo3%allocate(nr, nc, max(2*nz,nz+nz2)) +! tcoo3%ia(1:nz) = tcoo1%ia(1:nz) +! tcoo3%ja(1:nz) = tcoo1%ja(1:nz) +! tcoo3%val(1:nz) = tcoo1%val(1:nz) +! tcoo3%ia(nz+1:nz+nz2) = tcoo2%ia(1:nz2) +! tcoo3%ja(nz+1:nz+nz2) = tcoo2%ja(1:nz2) +! tcoo3%val(nz+1:nz+nz2) = tcoo2%val(1:nz2) +! call tcoo3%set_nzeros(nz+nz2) +! call tcoo3%set_dupl(psb_dupl_add_) +! call tcoo3%fix(info) +! nz = tcoo3%get_nzeros() +! tcoo3%val(1:nz) = 0.5d0 * tcoo3%val(1:nz) +! call tcoo3%mv_to_coo(tcoo1,info) +! end if +! +! if (dump_ahat) then +! block +! character(len=40) :: fname +! integer(psb_ipk_) :: k, nr +! integer(psb_lpk_), allocatable :: ilv(:) +! nr = desc_a%get_local_rows() +! ilv = desc_a%get_global_indices(owned=.false.) +! write(fname,'(a,i3.3,a,i3.3,a)') 'aa-i',me,'-p',np,'.mtx' +! !!$ call a%print(fname=fname,head='Original matrix ',iv=ilv) +! write(fname,'(a,i3.3,a,i3.3,a)') 'ahat-inp-i',me,'-p',np,'.mtx' +! call ahat%print(fname=fname,head='Before building_ahat ',iv=ilv) +! end block +! end if +! +! +! ! +! ! At this point the matrix is symmetric, hence we will encounter +! ! all entries as appropriate. +! ! +! call tcoo1%cp_to_coo(tcoo2,info) +! nz = tcoo1%get_nzeros() +! do k = 1, nz +! ii = tcoo1%ia(k) +! jj = tcoo1%ja(k) +! aij = tcoo1%val(k) +! !if ((ii<= nr).and.(jj<=nc).and.(ii/=jj)) then +! ! This is already guaranteed by construction +! ! +! aii = diag(ii) +! ajj = diag(jj) +! wii = w(ii) +! wjj = w(jj) +! edgnrm = aii*(wii**2) + ajj*(wjj**2) +! if (edgnrm > eps) then +! tcoo2%val(k) = done - (2*done*aij*wii*wjj)/(aii*(wii**2) + ajj*(wjj**2)) +! else +! tcoo2%val(k)=eps +! end if +! if (debug) then +! block +! integer(psb_ipk_), parameter :: nr1=329, nc1=393 +! integer(psb_ipk_), parameter :: nr2=13, nc2=77 +! integer(psb_ipk_), parameter :: nr3=313, nc3=249 +! if ( ((ilv(ii)==nr1).and.(ilv(jj)==nc1)).or.((ilv(ii)==nc1).and.(ilv(jj)==nr1))& +! &.or.((ilv(ii)==nr2).and.(ilv(jj)==nc2)).or.((ilv(ii)==nc2).and.(ilv(jj)==nr2)) & +! &.or.((ilv(ii)==nr3).and.(ilv(jj)==nc3)).or.((ilv(ii)==nc3).and.(ilv(jj)==nr3)) & +! &) then +! write(0,*)me, 'Check on ahat:',ii,jj,ilv(ii),ilv(jj),aij,aii,ajj,wii,wjj,tcoo2%val(k) +! end if +! end block +! end if +! +! !else +! ! write(0,*) 'build_ahat: index error :',ii,jj,' : boundaries :',nr,nc +! !end if +! end do +! nz = tcoo2%get_nzeros() +! minabs = minval(abs(tcoo2%val(1:nz))) +! call psb_min(ictxt,minabs) +! if (minabs == dzero) then +! if (me == 0) write(0,*) me, 'Min value for log correction is zero! ' +! minabs = done +! end if +! tcoo2%val(1:nz) = log(abs(tcoo2%val(1:nz))/(0.999*minabs)) +! if (any(tcoo2%val(1:nz)<0)) write(0,*) me, 'Warning: negative log output!' +! call ahat%mv_from(tcoo2) +! +! if (dump) then +! write(aname,'(a,i3.3,a)') 'ahat-l-',me,'.mtx' +! call ahat%print(fname=aname,head='Test ',iv=ilv) +! end if +! +! if (dump) then +! block +! type(psb_ldspmat_type) :: amglob +! !!$ call psb_gather(amglob,a,desc_a,info) +! !!$ if (me==psb_root_) then +! !!$ write(aname,'(a,i3.3,a)') 'a-g-',amglob%get_nrows(),'.mtx' +! !!$ call amglob%print(fname=aname,head='Test ') +! !!$ end if +! call psb_gather(amglob,ahat,desc_a,info) +! if (me==psb_root_) then +! write(aname,'(a,i0,a)') 'ahat-g-',amglob%get_nrows(),'.mtx' +! call amglob%print(fname=aname,head='Test ') +! end if +! write(0,*) 'Done build_ahat' +! end block +! end if +! end if +! +! end subroutine build_ahat_old + + subroutine psb_sgtranspose(ain,aout,desc_a,info) + use psb_base_mod + implicit none + type(psb_lsspmat_type), intent(in) :: ain + type(psb_lsspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lsspmat_type) :: atmp, ahalo, aglb + type(psb_ls_coo_sparse_mat) :: tmpcoo + type(psb_ls_csr_sparse_mat) :: tmpcsr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start gtranspose ' + end if + call ain%cscnv(tmpcsr,info) + + if (debug) then + ilv = [(i,i=1,ncol)] + call desc_a%l2gip(ilv,info,owned=.false.) + write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' + call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) + end if + if (dump) then + call ain%cscnv(atmp,info) + call psb_gather(aglb,atmp,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) + call atmp%mv_from(tmpcsr) + + if (debug) then + write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' + call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) + !call psb_set_debug_level(9999) + end if + + ! FIXME THIS NEEDS REWORKING + if (debug) write(0,*) me,' Gtranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() + call psb_sphalo(atmp,desc_a,ahalo,info,rowscale=.true.) +!!$ call psb_sphalo(atmp,desc_a,ahalo,info,& +!!$ & colcnv=.false.,rowscale=.true.) + if (debug) write(0,*) me,' Gtranspose from sphalo :',ahalo%get_nrows(),ahalo%get_ncols() +!!$ call psb_set_debug_level(0) + if (info == psb_success_) call psb_rwextd(ncol,atmp,info,b=ahalo) + + if (debug) then + write(aname,'(a,i3.3,a)') 'ahalo-',me,'.mtx' + call ahalo%print(fname=aname,head='ahalo after haloTest ',iv=ilv) + write(aname,'(a,i3.3,a)') 'atmp-h-',me,'.mtx' + call atmp%print(fname=aname,head='atmp after haloTest ',iv=ilv) + end if + + if (info == psb_success_) call ahalo%free() + + call atmp%cp_to(tmpcoo) + call tmpcoo%transp() + !call psb_glob_to_loc(tmpcoo%ia,desc_a,info,iact='I') + if (debug) write(0,*) 'Before cleanup:',tmpcoo%get_nzeros() + + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + + if (debug) write(0,*) 'After cleanup:',tmpcoo%get_nzeros() + + call ahalo%mv_from(tmpcoo) + if (dump) then + call psb_gather(aglb,ahalo,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran-preclip.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + + call ahalo%csclip(aout,info,imax=nrow) + + if (debug) write(0,*) 'After clip:',aout%get_nzeros() + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'End gtranspose ' + end if + !call aout%cscnv(info,type='csr') + + if (dump) then + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + end subroutine psb_sgtranspose + + subroutine psb_shtranspose(ain,aout,desc_a,info) + use psb_base_mod + implicit none + type(psb_lsspmat_type), intent(in) :: ain + type(psb_lsspmat_type), intent(out) :: aout + type(psb_desc_type) :: desc_a + integer(psb_ipk_), intent(out) :: info + + ! + ! BEWARE: This routine works under the assumption + ! that the same DESC_A works for both A and A^T, which + ! essentially means that A has a symmetric pattern. + ! + type(psb_lsspmat_type) :: atmp, ahalo, aglb + type(psb_ls_coo_sparse_mat) :: tmpcoo, tmpc1, tmpc2, tmpch + type(psb_ls_csr_sparse_mat) :: tmpcsr + integer(psb_ipk_) :: nz1, nz2, nzh, nz + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: me, np + integer(psb_lpk_) :: i, j, k, nrow, ncol, nlz + integer(psb_lpk_), allocatable :: ilv(:) + character(len=80) :: aname + logical, parameter :: debug=.false., dump=.false., debug_sync=.false. + + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Start htranspose ' + end if + call ain%cscnv(tmpcsr,info) + + if (debug) then + ilv = [(i,i=1,ncol)] + call desc_a%l2gip(ilv,info,owned=.false.) + write(aname,'(a,i3.3,a)') 'atmp-preh-',me,'.mtx' + call ain%print(fname=aname,head='atmp before haloTest ',iv=ilv) + end if + if (dump) then + call ain%cscnv(atmp,info) + call psb_gather(aglb,atmp,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'aglob-prehalo.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + !call psb_loc_to_glob(tmpcsr%ja,desc_a,info) + call atmp%mv_from(tmpcsr) + + if (debug) then + write(aname,'(a,i3.3,a)') 'tmpcsr-',me,'.mtx' + call atmp%print(fname=aname,head='tmpcsr ',iv=ilv) + !call psb_set_debug_level(9999) + end if + + ! FIXME THIS NEEDS REWORKING + if (debug) write(0,*) me,' Htranspose into sphalo :',atmp%get_nrows(),atmp%get_ncols() + if (.true.) then + call psb_sphalo(atmp,desc_a,ahalo,info, outfmt='coo ') + call atmp%mv_to(tmpc1) + call ahalo%mv_to(tmpch) + nz1 = tmpc1%get_nzeros() + call psb_loc_to_glob(tmpc1%ia(1:nz1),desc_a,info,iact='I') + call psb_loc_to_glob(tmpc1%ja(1:nz1),desc_a,info,iact='I') + nzh = tmpch%get_nzeros() + call psb_loc_to_glob(tmpch%ia(1:nzh),desc_a,info,iact='I') + call psb_loc_to_glob(tmpch%ja(1:nzh),desc_a,info,iact='I') + nlz = nz1+nzh + call tmpcoo%allocate(ncol,ncol,nlz) + tmpcoo%ia(1:nz1) = tmpc1%ia(1:nz1) + tmpcoo%ja(1:nz1) = tmpc1%ja(1:nz1) + tmpcoo%val(1:nz1) = tmpc1%val(1:nz1) + tmpcoo%ia(nz1+1:nz1+nzh) = tmpch%ia(1:nzh) + tmpcoo%ja(nz1+1:nz1+nzh) = tmpch%ja(1:nzh) + tmpcoo%val(nz1+1:nz1+nzh) = tmpch%val(1:nzh) + call tmpcoo%set_nzeros(nlz) + call tmpcoo%transp() + nz = tmpcoo%get_nzeros() + call psb_glob_to_loc(tmpcoo%ia(1:nz),desc_a,info,iact='I') + call psb_glob_to_loc(tmpcoo%ja(1:nz),desc_a,info,iact='I') + if (.true.) then + call tmpcoo%clean_negidx(info) + else + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + end if + call ahalo%mv_from(tmpcoo) + call ahalo%csclip(aout,info,imax=nrow) + + else + call psb_sphalo(atmp,desc_a,ahalo,info, rowscale=.true.) +!!$ call psb_sphalo(atmp,desc_a,ahalo,info,& +!!$ & colcnv=.false.,rowscale=.true.) + if (debug) write(0,*) me,' Htranspose from sphalo :',ahalo%get_nrows(),ahalo%get_ncols() +!!$ call psb_set_debug_level(0) + if (info == psb_success_) call psb_rwextd(ncol,atmp,info,b=ahalo) + + if (debug) then + write(aname,'(a,i3.3,a)') 'ahalo-',me,'.mtx' + call ahalo%print(fname=aname,head='ahalo after haloTest ',iv=ilv) + write(aname,'(a,i3.3,a)') 'atmp-h-',me,'.mtx' + call atmp%print(fname=aname,head='atmp after haloTest ',iv=ilv) + end if + + if (info == psb_success_) call ahalo%free() + + call atmp%cp_to(tmpcoo) + call tmpcoo%transp() + !call psb_glob_to_loc(tmpcoo%ia,desc_a,info,iact='I') + if (debug) write(0,*) 'Before cleanup:',tmpcoo%get_nzeros() + if (.true.) then + call tmpcoo%clean_negidx(info) + else + + j = 0 + do k=1, tmpcoo%get_nzeros() + if ((tmpcoo%ia(k) > 0).and.(tmpcoo%ja(k)>0)) then + j = j+1 + tmpcoo%ia(j) = tmpcoo%ia(k) + tmpcoo%ja(j) = tmpcoo%ja(k) + tmpcoo%val(j) = tmpcoo%val(k) + end if + end do + call tmpcoo%set_nzeros(j) + end if + + if (debug) write(0,*) 'After cleanup:',tmpcoo%get_nzeros() + + call ahalo%mv_from(tmpcoo) + if (dump) then + call psb_gather(aglb,ahalo,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran-preclip.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + + call ahalo%csclip(aout,info,imax=nrow) + end if + + if (debug) write(0,*) 'After clip:',aout%get_nzeros() + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'End htranspose ' + end if + !call aout%cscnv(info,type='csr') + + if (dump) then + write(aname,'(a,i3.3,a)') 'atran-',me,'.mtx' + call aout%print(fname=aname,head='atrans ',iv=ilv) + call psb_gather(aglb,aout,desc_a,info) + if (me==psb_root_) then + write(aname,'(a,i3.3,a)') 'atran.mtx' + call aglb%print(fname=aname,head='Test ') + end if + end if + + end subroutine psb_shtranspose + + subroutine sPMatchBox(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, myrank, numprocs, ictxt,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card,info,display_inp) + use psb_base_mod + implicit none + type(psb_ctxt_type) :: ictxt + integer(psb_c_ipk_), value :: myrank, numprocs + integer(psb_c_lpk_), value :: nlver,nledge + integer(psb_c_lpk_) :: verlocptr(:),verlocind(:), verdistance(:) + integer(psb_c_lpk_) :: mate(:) + integer(psb_c_lpk_) :: msgindsent(*),msgactualsent(*) + real(c_double) :: ph0_time, ph1_time, ph2_time + integer(psb_c_lpk_) :: ph1_card(*),ph2_card(*) + real(c_float) :: edgelocweight(:) + real(c_double) :: msgpercent(*) + integer(psb_ipk_) :: info, me, np + integer(psb_c_mpk_) :: icomm, mrank, mnp + logical, optional :: display_inp + ! + logical, parameter :: debug=.false., debug_out=.false., debug_sync=.false., dump_input=.false. + logical :: display_ + integer(psb_lpk_) :: i,k + integer(psb_ipk_), save :: cnt=1 + + call psb_info(ictxt,me,np) + icomm = psb_get_mpi_comm(ictxt) + mrank = psb_get_mpi_rank(ictxt,me) + mnp = np + if (present(display_inp)) then + display_ = display_inp + else + display_ = .false. + end if + + verlocptr(:) = verlocptr(:) - 1 + verlocind(:) = verlocind(:) - 1 + verdistance(:) = verdistance(:) -1 + + if (dump_input) then + block + integer(psb_ipk_) :: iout=20,info,i,j,k,nr + character(len=80) :: fname + write(fname,'(a,i4.4,a,i4.4,a)') 'verlocptr-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verlocptr ' + write(iout,*) nlver + do i=1, nlver+1 + write(iout,*) verlocptr(i) + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + write(fname,'(a,i4.4,a,i4.4,a)') 'edges-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verlocind/edgeweight ' + write(iout,*) nlver + do i=1, nlver + do j=verlocptr(i)+1,verlocptr(i+1) + write(iout,*) i-1, verlocind(j),edgelocweight(j) + end do + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + if (me==0) then + write(fname,'(a,i4.4,a,i4.4,a)') 'verdistance-l',cnt,'-i',me,'.mtx' + open(iout,file=fname,iostat=info) + if (info == 0) then + write(iout,'(a)') '%verdistance' + write(iout,*) np + do i=1, np+1 + write(iout,*) verdistance(i) + end do + close(iout) + else + write(psb_err_unit,*) 'Error: could not open ',fname,' for output' + end if + end if + end block + cnt = cnt + 1 + end if + + if (debug.or.display_) then + do i=0,np-1 + if (me == i) then + write(6,*) 'Process: ',me,' : Input into matching: nlver ',nlver,' nledge ',nledge + write(6,*) 'Process: ',me,' : VERDISTANCE 0-base : ',verdistance(1:np+1) + write(6,*) 'Process: ',me,' : VERLOCPTR 0-base : ',verlocptr(1:nlver+1) + write(6,*) 'Process: ',me,' : VERLOCIND 0-base : ',verlocind(1:nledge) + write(6,*) 'Process: ',me,' : EDGELOCWEIGHT : ',edgelocweight(1:nledge) + write(6,*) 'Process: ',me,' : Initial MATE : ',mate(1:nlver) + flush(6) + end if + call psb_barrier(ictxt) + end do + end if + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*)' Calling MatchBoxP ' + end if + + call MatchBoxPC(nlver,nledge,verlocptr,verlocind,edgelocweight,& + & verdistance, mate, mrank, mnp, icomm,& + & msgindsent,msgactualsent,msgpercent,& + & ph0_time, ph1_time, ph2_time, ph1_card, ph2_card) + verlocptr(:) = verlocptr(:) + 1 + verlocind(:) = verlocind(:) + 1 + verdistance(:) = verdistance(:) + 1 + + if (debug_sync) then + call psb_barrier(ictxt) + if (me == 0) write(0,*)' Done MatchBoxP ' + end if + + if (debug_out) then + do k=0,np-1 + if (me == k) then + write(6,*) 'Process: ',me,' : from Matching (0-base): ',info + do i=1,nlver + write(6,*) '(',i,',',mate(i),')' + !mate(i) = mate(i) +1 + ! + end do + flush(6) + end if + call psb_barrier(ictxt) + end do + end if + where(mate>=0) mate = mate + 1 + + end subroutine sPMatchBox + +end module smatchboxp_mod diff --git a/amgprec/amg_s_parmatch_aggregator_mod.F90 b/amgprec/amg_s_parmatch_aggregator_mod.F90 new file mode 100644 index 00000000..81a83aff --- /dev/null +++ b/amgprec/amg_s_parmatch_aggregator_mod.F90 @@ -0,0 +1,713 @@ +! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from amg4psblas-extension +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! +! +! The aggregator object hosts the aggregation method for building +! the multilevel hierarchy. This variant is based on the hybrid method +! presented in +! +! +! sm - class(amg_T_base_smoother_type), allocatable +! The current level preconditioner (aka smoother). +! parms - type(amg_RTml_parms) +! The parameters defining the multilevel strategy. +! ac - The local part of the current-level matrix, built by +! coarsening the previous-level matrix. +! desc_ac - type(psb_desc_type). +! The communication descriptor associated to the matrix +! stored in ac. +! base_a - type(psb_Tspmat_type), pointer. +! Pointer (really a pointer!) to the local part of the current +! matrix (so we have a unified treatment of residuals). +! We need this to avoid passing explicitly the current matrix +! to the routine which applies the preconditioner. +! base_desc - type(psb_desc_type), pointer. +! Pointer to the communication descriptor associated to the +! matrix pointed by base_a. +! map - Stores the maps (restriction and prolongation) between the +! vector spaces associated to the index spaces of the previous +! and current levels. +! +! Methods: +! Most methods follow the encapsulation hierarchy: they take whatever action +! is appropriate for the current object, then call the corresponding method for +! the contained object. +! As an example: the descr() method prints out a description of the +! level. It starts by invoking the descr() method of the parms object, +! then calls the descr() method of the smoother object. +! +! descr - Prints a description of the object. +! default - Set default values +! dump - Dump to file object contents +! set - Sets various parameters; when a request is unknown +! it is passed to the smoother object for further processing. +! check - Sanity checks. +! sizeof - Total memory occupation in bytes +! get_nzeros - Number of nonzeros +! +! + +module amg_s_parmatch_aggregator_mod + use amg_s_base_aggregator_mod + use smatchboxp_mod + + type, extends(amg_s_base_aggregator_type) :: amg_s_parmatch_aggregator_type + integer(psb_ipk_) :: matching_alg + integer(psb_ipk_) :: n_sweeps ! When n_sweeps >1 we need an auxiliary descriptor + integer(psb_ipk_) :: orig_aggr_size + integer(psb_ipk_) :: jacobi_sweeps + real(psb_spk_), allocatable :: w(:), w_nxt(:) + type(psb_sspmat_type), allocatable :: prol, restr + type(psb_sspmat_type), allocatable :: ac, base_a, rwa + type(psb_desc_type), allocatable :: desc_ac, desc_ax, base_desc, rwdesc + integer(psb_ipk_) :: max_csize + integer(psb_ipk_) :: max_nlevels + logical :: reproducible_matching = .false. + logical :: need_symmetrize = .false. + logical :: unsmoothed_hierarchy = .true. + contains + procedure, pass(ag) :: bld_tprol => amg_s_parmatch_aggregator_build_tprol + procedure, pass(ag) :: mat_bld => amg_s_parmatch_aggregator_mat_bld + procedure, pass(ag) :: mat_asb => amg_s_parmatch_aggregator_mat_asb + procedure, pass(ag) :: inner_mat_asb => amg_s_parmatch_aggregator_inner_mat_asb + procedure, pass(ag) :: bld_map => amg_s_parmatch_aggregator_bld_map + procedure, pass(ag) :: csetc => s_parmatch_aggr_csetc + procedure, pass(ag) :: cseti => s_parmatch_aggr_cseti + procedure, pass(ag) :: default => s_parmatch_aggr_set_default + procedure, pass(ag) :: sizeof => s_parmatch_aggregator_sizeof + procedure, pass(ag) :: update_next => s_parmatch_aggregator_update_next + procedure, pass(ag) :: bld_wnxt => s_parmatch_bld_wnxt + procedure, pass(ag) :: bld_default_w => s_bld_default_w + procedure, pass(ag) :: set_c_default_w => s_set_prm_c_default_w + procedure, pass(ag) :: descr => s_parmatch_aggregator_descr + procedure, pass(ag) :: clone => s_parmatch_aggregator_clone + procedure, pass(ag) :: free => s_parmatch_aggregator_free + procedure, nopass :: fmt => s_parmatch_aggregator_fmt + procedure, nopass :: xt_desc => amg_s_parmatch_aggregator_xt_desc + end type amg_s_parmatch_aggregator_type + +!!$ interface +!!$ subroutine glob_transpose(ain,desc_r,desc_c,atrans,desc_rx,info) +!!$ import :: psb_desc_type, psb_ld_coo_sparse_mat, psb_ipk_ +!!$ implicit none +!!$ type(psb_ld_coo_sparse_mat), intent(in) :: ain +!!$ type(psb_ld_coo_sparse_mat), intent(out) :: atrans +!!$ type(psb_desc_type), intent(inout) :: desc_r, desc_c +!!$ type(psb_desc_type), intent(out) :: desc_rx +!!$ integer(psb_ipk_), intent(out) :: info +!!$ end subroutine glob_transpose +!!$ end interface + + interface + subroutine amg_s_parmatch_aggregator_build_tprol(ag,parms,ag_data,& + & a,desc_a,ilaggr,nlaggr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(amg_saggr_data), intent(in) :: ag_data + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:),nlaggr(:) + type(psb_lsspmat_type), intent(out) :: t_prol + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_aggregator_build_tprol + end interface + + interface + subroutine amg_s_parmatch_aggregator_mat_bld(ag,parms,a,desc_a,ilaggr,nlaggr,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_aggregator_mat_bld + end interface + + interface + subroutine amg_s_parmatch_aggregator_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + type(psb_sspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_aggregator_mat_asb + end interface + + + interface + subroutine amg_s_parmatch_aggregator_inner_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_sspmat_type), intent(inout) :: op_prol,op_restr + type(psb_sspmat_type), intent(inout) :: ac + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_aggregator_inner_mat_asb + end interface + + + interface + subroutine amg_s_parmatch_spmm_bld(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_spmm_bld + end interface + + interface + subroutine amg_s_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_unsmth_bld + end interface + + interface + subroutine amg_s_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_smth_bld + end interface + + interface + subroutine amg_s_parmatch_spmm_bld_ov(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data + implicit none + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_spmm_bld_ov + end interface + + interface + subroutine amg_s_parmatch_spmm_bld_inner(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + import :: amg_s_parmatch_aggregator_type, psb_desc_type, psb_sspmat_type,& + & psb_lsspmat_type, psb_dpk_, psb_ipk_, psb_lpk_, amg_sml_parms, amg_saggr_data,& + & psb_s_csr_sparse_mat, psb_ls_csr_sparse_mat + implicit none + type(psb_s_csr_sparse_mat), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + end subroutine amg_s_parmatch_spmm_bld_inner + end interface + +!!$ interface +!!$ Subroutine amg_d_p_csr_spspmm(acsr,desc_a,bcsr,ccsr,desc_c,info,data) +!!$ import +!!$ Implicit None +!!$ type(psb_ld_csr_sparse_mat),intent(in) :: acsr +!!$ type(psb_ld_csr_sparse_mat),intent(inout) :: bcsr +!!$ type(psb_ld_csr_sparse_mat),intent(out) :: ccsr +!!$ type(psb_desc_type),intent(in) :: desc_a +!!$ type(psb_desc_type),intent(inout) :: desc_c +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_), intent(in), optional :: data +!!$ end Subroutine amg_d_p_csr_spspmm +!!$ end interface + + private :: is_legal_malg, is_legal_csize, is_legal_nsweeps, is_legal_nlevels + +contains + + subroutine s_bld_default_w(ag,nr) + use psb_realloc_mod + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_ipk_), intent(in) :: nr + integer(psb_ipk_) :: info + call psb_realloc(nr,ag%w,info) + if (info /= psb_success_) return + ag%w = done + !call ag%set_c_default_w() + end subroutine s_bld_default_w + + subroutine s_set_prm_c_default_w(ag) + use psb_realloc_mod + use iso_c_binding + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_ipk_) :: info + + !write(0,*) 'prm_c_deafult_w ' + call psb_safe_ab_cpy(ag%w,ag%w_nxt,info) + + end subroutine s_set_prm_c_default_w + + subroutine s_parmatch_bld_wnxt(ag,ilaggr,valaggr,nx) + use psb_realloc_mod + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + integer(psb_lpk_), intent(in) :: ilaggr(:) + real(psb_spk_), intent(in) :: valaggr(:) + integer(psb_ipk_), intent(in) :: nx + + integer(psb_ipk_) :: info,i,j + + ! The vector was already fixed in the call to BCMatch. + !write(0,*) 'Executing bld_wnxt ',nx + call psb_realloc(nx,ag%w_nxt,info) + + end subroutine s_parmatch_bld_wnxt + + function s_parmatch_aggregator_fmt() result(val) + implicit none + character(len=32) :: val + + val = "Parallel Matching aggregation" + end function s_parmatch_aggregator_fmt + + function amg_s_parmatch_aggregator_xt_desc() result(val) + implicit none + logical :: val + + val = .true. + end function amg_s_parmatch_aggregator_xt_desc + + function s_parmatch_aggregator_sizeof(ag) result(val) + use psb_realloc_mod + implicit none + class(amg_s_parmatch_aggregator_type), intent(in) :: ag + integer(psb_epk_) :: val + + val = 4 + val = val + psb_size(ag%w) + psb_size(ag%w_nxt) + if (allocated(ag%ac)) val = val + ag%ac%sizeof() + if (allocated(ag%base_a)) val = val + ag%base_a%sizeof() + if (allocated(ag%prol)) val = val + ag%prol%sizeof() + if (allocated(ag%restr)) val = val + ag%restr%sizeof() + if (allocated(ag%desc_ac)) val = val + ag%desc_ac%sizeof() + if (allocated(ag%base_desc)) val = val + ag%base_desc%sizeof() + if (allocated(ag%desc_ax)) val = val + ag%desc_ax%sizeof() + + end function s_parmatch_aggregator_sizeof + + subroutine s_parmatch_aggregator_descr(ag,parms,iout,info) + implicit none + class(amg_s_parmatch_aggregator_type), intent(in) :: ag + type(amg_sml_parms), intent(in) :: parms + integer(psb_ipk_), intent(in) :: iout + integer(psb_ipk_), intent(out) :: info + + write(iout,*) 'Parallel Matching Aggregator' + write(iout,*) ' Number of matching sweeps: ',ag%n_sweeps + write(iout,*) ' Matching algorithm : MatchBoxP (PREIS)' + write(iout,*) 'Aggregator object type: ',ag%fmt() + call parms%mldescr(iout,info) + + return + end subroutine s_parmatch_aggregator_descr + + function is_legal_malg(alg) result(val) + logical :: val + integer(psb_ipk_) :: alg + + val = (0==alg) + end function is_legal_malg + + function is_legal_csize(csize) result(val) + logical :: val + integer(psb_ipk_) :: csize + + val = ((-1==csize).or.(csize >0)) + end function is_legal_csize + + function is_legal_nsweeps(nsw) result(val) + logical :: val + integer(psb_ipk_) :: nsw + + val = (1<=nsw) + end function is_legal_nsweeps + + function is_legal_nlevels(nlv) result(val) + logical :: val + integer(psb_ipk_) :: nlv + + val = (1<=nlv) + end function is_legal_nlevels + + + subroutine s_parmatch_aggregator_update_next(ag,agnext,info) + use psb_realloc_mod + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + class(amg_s_base_aggregator_type), target, intent(inout) :: agnext + integer(psb_ipk_), intent(out) :: info + + ! + ! + select type(agnext) + class is (amg_s_parmatch_aggregator_type) + if (.not.is_legal_malg(agnext%matching_alg)) & + & agnext%matching_alg = ag%matching_alg + if (.not.is_legal_nsweeps(agnext%n_sweeps))& + & agnext%n_sweeps = ag%n_sweeps + if (.not.is_legal_csize(agnext%max_csize))& + & agnext%max_csize = ag%max_csize + if (.not.is_legal_nlevels(agnext%max_nlevels))& + & agnext%max_nlevels = ag%max_nlevels + ! Is this going to generate shallow copies/memory leaks/double frees? + ! To be investigated further. + call psb_safe_ab_cpy(ag%w_nxt,agnext%w,info) + call agnext%set_c_default_w() + if (ag%unsmoothed_hierarchy) then + agnext%unsmoothed_hierarchy = .true. + call move_alloc(ag%rwdesc,agnext%base_desc) + call move_alloc(ag%rwa,agnext%base_a) + end if + + class default + ! What should we do here? + end select + info = 0 + end subroutine s_parmatch_aggregator_update_next + + subroutine s_parmatch_aggr_csetc(ag,what,val,info,idx) + + Implicit None + + ! Arguments + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + character(len=*), intent(in) :: what + character(len=*), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: idx + integer(psb_ipk_) :: err_act, iwhat + character(len=20) :: name='s_parmatch_aggr_cseti' + info = psb_success_ + + ! For now we ignore IDX + + select case(psb_toupper(trim(what))) + case('PRMC_REPRODUCIBLE_MATCHING') + select case(psb_toupper(trim(val))) + case('F','FALSE') + ag%reproducible_matching = .false. + case('REPRODUCIBLE','TRUE','T') + ag%reproducible_matching =.true. + end select + case('PRMC_NEED_SYMMETRIZE') + select case(psb_toupper(trim(val))) + case('FALSE','F') + ag%need_symmetrize = .false. + case('SYMMETRIZE','TRUE','T') + ag%need_symmetrize =.true. + end select + case('PRMC_UNSMOOTHED_HIERARCHY') + select case(psb_toupper(trim(val))) + case('F','FALSE') + ag%unsmoothed_hierarchy = .false. + case('T','TRUE') + ag%unsmoothed_hierarchy =.true. + end select + case default + ! Do nothing + end select + return + end subroutine s_parmatch_aggr_csetc + + subroutine s_parmatch_aggr_cseti(ag,what,val,info,idx) + + Implicit None + + ! Arguments + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + character(len=*), intent(in) :: what + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_), intent(in), optional :: idx + integer(psb_ipk_) :: err_act, iwhat + character(len=20) :: name='s_parmatch_aggr_cseti' + info = psb_success_ + + ! For now we ignore IDX + + select case(psb_toupper(trim(what))) + case('PRMC_MATCH_ALG') + ag%matching_alg=val + case('PRMC_SWEEPS') + ag%n_sweeps=val + case('AGGR_SIZE') + ag%orig_aggr_size = val + ag%n_sweeps=max(1,ceiling(log(val*1.0)/log(2.0))) + case('PRMC_MAX_CSIZE') + ag%max_csize=val + case('PRMC_MAX_NLEVELS') + ag%max_nlevels=val + case('PRMC_W_SIZE') + call ag%bld_default_w(val) + case('PRMC_REPRODUCIBLE_MATCHING') + ag%reproducible_matching = (val == 1) + case('PRMC_NEED_SYMMETRIZE') + ag%need_symmetrize = (val == 1) + case('PRMC_UNSMOOTHED_HIERARCHY') + ag%unsmoothed_hierarchy = (val == 1) + case default + ! Do nothing + end select + return + end subroutine s_parmatch_aggr_cseti + + subroutine s_parmatch_aggr_set_default(ag) + + Implicit None + + ! Arguments + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + character(len=20) :: name='s_parmatch_aggr_set_default' + ag%matching_alg = 0 + ag%n_sweeps = 1 + ag%jacobi_sweeps = 0 + ag%max_nlevels = 36 + ag%max_csize = -1 + ! + ! Apparently BootCMatch works better + ! by keeping all entries + ! + ag%do_clean_zeros = .false. + + return + + end subroutine s_parmatch_aggr_set_default + + subroutine s_parmatch_aggregator_free(ag,info) + use iso_c_binding + implicit none + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + integer(psb_ipk_), intent(out) :: info + + info = 0 + if ((info == 0).and.allocated(ag%w)) deallocate(ag%w,stat=info) + if ((info == 0).and.allocated(ag%w_nxt)) deallocate(ag%w_nxt,stat=info) + if ((info == 0).and.allocated(ag%prol)) then + call ag%prol%free(); deallocate(ag%prol,stat=info) + end if + if ((info == 0).and.allocated(ag%restr)) then + call ag%restr%free(); deallocate(ag%restr,stat=info) + end if + if ((info == 0).and.allocated(ag%ac)) then + call ag%ac%free(); deallocate(ag%ac,stat=info) + end if + if ((info == 0).and.allocated(ag%base_a)) then + call ag%base_a%free(); deallocate(ag%base_a,stat=info) + end if + if ((info == 0).and.allocated(ag%rwa)) then + call ag%rwa%free(); deallocate(ag%rwa,stat=info) + end if + if ((info == 0).and.allocated(ag%desc_ac)) then + call ag%desc_ac%free(info); deallocate(ag%desc_ac,stat=info) + end if + if ((info == 0).and.allocated(ag%desc_ax)) then + call ag%desc_ax%free(info); deallocate(ag%desc_ax,stat=info) + end if + if ((info == 0).and.allocated(ag%base_desc)) then + call ag%base_desc%free(info); deallocate(ag%base_desc,stat=info) + end if + if ((info == 0).and.allocated(ag%rwdesc)) then + call ag%rwdesc%free(info); deallocate(ag%rwdesc,stat=info) + end if + + end subroutine s_parmatch_aggregator_free + + subroutine s_parmatch_aggregator_clone(ag,agnext,info) + implicit none + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + class(amg_s_base_aggregator_type), allocatable, intent(inout) :: agnext + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(agnext)) then + call agnext%free(info) + if (info == 0) deallocate(agnext,stat=info) + end if + if (info /= 0) return + allocate(agnext,source=ag,stat=info) + select type(agnext) + class is (amg_s_parmatch_aggregator_type) + call agnext%set_c_default_w() + class default + ! Should never ever get here + info = -1 + end select + end subroutine s_parmatch_aggregator_clone + + subroutine amg_s_parmatch_aggregator_bld_map(ag,desc_a,desc_ac,ilaggr,nlaggr,& + & op_restr,op_prol,map,info) + use psb_base_mod + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_desc_type), intent(in), target :: desc_a, desc_ac + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_sspmat_type), intent(inout) :: op_prol, op_restr + type(psb_slinmap_type), intent(out) :: map + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_parmatch_aggregator_bld_map' + + call psb_erractionsave(err_act) + ! + ! Copy the prolongation/restriction matrices into the descriptor map. + ! op_restr => PR^T i.e. restriction operator + ! op_prol => PR i.e. prolongation operator + ! + ! For parmatch have an explicit copy of the descriptors + ! + if (allocated(ag%desc_ax)) then +!!$ write(0,*) 'Building linmap with ag%desc_ax ',ag%desc_ax%get_local_rows(),ag%desc_ax%get_local_cols(),& +!!$ & desc_ac%get_local_rows(),desc_ac%get_local_cols() + map = psb_linmap(psb_map_gen_linear_,ag%desc_ax,& + & desc_ac,op_restr,op_prol,ilaggr,nlaggr) + else + map = psb_linmap(psb_map_gen_linear_,desc_a,& + & desc_ac,op_restr,op_prol,ilaggr,nlaggr) + end if + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_Free') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + end subroutine amg_s_parmatch_aggregator_bld_map + +end module amg_s_parmatch_aggregator_mod diff --git a/amgprec/impl/aggregator/Makefile b/amgprec/impl/aggregator/Makefile index 16b2a2f8..257fb4c4 100644 --- a/amgprec/impl/aggregator/Makefile +++ b/amgprec/impl/aggregator/Makefile @@ -1,12 +1,13 @@ include ../../../Make.inc LIBDIR=../../../lib INCDIR=../../../include -MODDIR=../../../modules +MODDIR=../../../modules HERE=../.. FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) +CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/. -#CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm +#CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm OBJS= \ amg_s_dec_aggregator_mat_asb.o \ @@ -40,16 +41,37 @@ amg_z_symdec_aggregator_tprol.o \ amg_z_map_to_tprol.o amg_z_soc1_map_bld.o amg_z_soc2_map_bld.o\ amg_z_rap.o amg_z_ptap_bld.o \ amg_zaggrmat_minnrg_bld.o\ -amg_zaggrmat_nosmth_bld.o amg_zaggrmat_smth_bld.o +amg_zaggrmat_nosmth_bld.o amg_zaggrmat_smth_bld.o \ +amg_d_parmatch_aggregator_mat_bld.o \ +amg_d_parmatch_aggregator_mat_asb.o \ +amg_d_parmatch_aggregator_inner_mat_asb.o \ +amg_d_parmatch_aggregator_tprol.o \ +amg_d_parmatch_spmm_bld.o \ +amg_d_parmatch_spmm_bld_ov.o \ +amg_d_parmatch_unsmth_bld.o \ +amg_d_parmatch_smth_bld.o \ +amg_d_parmatch_spmm_bld_inner.o \ +amg_s_parmatch_aggregator_mat_bld.o \ +amg_s_parmatch_aggregator_mat_asb.o \ +amg_s_parmatch_aggregator_inner_mat_asb.o \ +amg_s_parmatch_aggregator_tprol.o \ +amg_s_parmatch_spmm_bld.o \ +amg_s_parmatch_spmm_bld_ov.o \ +amg_s_parmatch_unsmth_bld.o \ +amg_s_parmatch_smth_bld.o \ +amg_s_parmatch_spmm_bld_inner.o + +MPCOBJS=MatchBoxPC.o \ +algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o LIBNAME=libamg_prec.a -lib: $(OBJS) +lib: $(OBJS) $(MPCOBJS) $(AR) $(HERE)/$(LIBNAME) $(OBJS) $(RANLIB) $(HERE)/$(LIBNAME) -mpobjs: +mpobjs: (make $(MPFOBJS) F90="$(MPF90)" F90COPT="$(F90COPT)") (make $(MPCOBJS) CC="$(MPCC)" CCOPT="$(CCOPT)") diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp new file mode 100644 index 00000000..95aa4ef7 --- /dev/null +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -0,0 +1,97 @@ +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +#include +#include +#include + +#include "MatchBoxPC.h" +#ifdef __cplusplus +extern "C" { +#endif + + +void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MilanInt icomm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { + MPI_Comm C_comm=MPI_Comm_f2c(icomm); +#ifdef DEBUG + fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", + myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); +#endif + dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +} + +void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanFloat* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MilanInt icomm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { + MPI_Comm C_comm=MPI_Comm_f2c(icomm); +#ifdef DEBUG + fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", + myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); +#endif + salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +} + +#ifdef __cplusplus +} +#endif diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h new file mode 100644 index 00000000..a353a486 --- /dev/null +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -0,0 +1,178 @@ +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ + +/* + Feature: Message Aggregation: + Request messages from the Initialization phase are aggregated and only + one message per processor is sent. + Data structures for message aggregation are similar to the data structures + for storing compressed matrices/graphs - Pointers + actual messages. + Assumption: processor indices are numbered from zero to P-1. + */ + +/* Special feature: Mate is proportional to the size of local number of verices */ + +#ifndef _matchboxpC_H_ +#define _matchboxpC_H_ +//Turn on a lot of debugging information with this switch: +//#define PRINT_DEBUG_INFO_ +#include +#include +#include +#include +#include +// #include "matchboxp.h" +#include "primitiveDataTypeDefinitions.h" +#include "dataStrStaticQueue.h" + +using namespace std; + +#ifdef __cplusplus +extern "C" { +#endif + +#define MilanMpiLongInt MPI_LONG_LONG + +#ifndef _primitiveDataType_Definition_ +#define _primitiveDataType_Definition_ + //Regular integer: + #ifndef INTEGER_H + #define INTEGER_H + typedef int32_t MilanInt; + #endif + + //Regular long integer: + #ifndef LONG_INT_H + #define LONG_INT_H + #ifdef BIT64 + typedef int64_t MilanLongInt; + typedef MPI_LONG MilanMpiLongInt; + #else + typedef int32_t MilanLongInt; + typedef MPI_INT MilanMpiLongInt; + #endif + #endif + + //Regular boolean + #ifndef BOOL_H + #define BOOL_H + typedef bool MilanBool; + #endif + + //Regular double and absolute value computation: + #ifndef REAL_H + #define REAL_H + typedef double MilanReal; + typedef MPI_DOUBLE MilanMpiReal; + inline MilanReal MilanAbs(MilanReal value) + { + return fabs(value); + } + #endif + + //Regular float and absolute value computation: + #ifndef FLOAT_H + #define FLOAT_H + typedef float MilanFloat; + typedef MPI_FLOAT MilanMpiFloat; + inline MilanFloat MilanAbsFloat(MilanFloat value) + { + return fabs(value); + } + #endif + + //// Define the limits: + #ifndef LIMITS_H + #define LIMITS_H + //Integer Maximum and Minimum: + // #define MilanIntMax INT_MAX + // #define MilanIntMin INT_MIN + #define MilanIntMax INT32_MAX + #define MilanIntMin INT32_MIN + + #ifdef BIT64 + #define MilanLongIntMax INT64_MAX + #define MilanLongIntMin -INT64_MAX + #else + #define MilanLongIntMax INT32_MAX + #define MilanLongIntMin -INT32_MAX + #endif + + #endif + + // +INFINITY + const double PLUS_INFINITY = numeric_limits::infinity(); + const double MINUS_INFINITY = -PLUS_INFINITY; + //#define MilanRealMax LDBL_MAX + #define MilanRealMax PLUS_INFINITY + #define MilanRealMin MINUS_INFINITY +#endif + +//Function of find the owner of a ghost vertex using binary search: +inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, + MilanInt myRank, MilanInt numProcs); + + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC +( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ); + + void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC +( +MilanLongInt NLVer, MilanLongInt NLEdge, +MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanFloat* edgeLocWeight, +MilanLongInt* verDistance, +MilanLongInt* Mate, +MilanInt myRank, MilanInt numProcs, MPI_Comm comm, +MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent, +MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, +MilanLongInt* ph1_card, MilanLongInt* ph2_card ); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp new file mode 100644 index 00000000..c1fc07c6 --- /dev/null +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp @@ -0,0 +1,2574 @@ +#include "MatchBoxPC.h" +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////// DOMINATING EDGES MODEL /////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// +/* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate() + * + * Date : New update: Feb 17, 2019, Richland, Washington. + * Date : Original development: May 17, 2009, E&CS Bldg. + * + * Purpose : Compute Approximate Maximum Weight Matching in Linear Time + * + * Args : inputMatrix - instance of Compressed-Col format of Matrix + * Mate - The Mate array + * + * Returns : By Value: (void) + * By Reference: Mate + * + * Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge. + * Assumption: The Mate Array is empty. + */ + +/* + NLVer = #of vertices, NLEdge = #of edges + CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers) + verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process + Mate = A vector of size |V_p| (local subgraph) to store the output (matching) + MPI: myRank, numProcs, comm, + Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world + Statistics: ph0_time, ph1_time, ph2_time: Runtimes + Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) + */ + +//MPI type map +template MPI_Datatype TypeMap(); +template<> inline MPI_Datatype TypeMap() { return MPI_LONG_LONG; } +template<> inline MPI_Datatype TypeMap() { return MPI_INT; } +template<> inline MPI_Datatype TypeMap() { return MPI_DOUBLE; } +template<> inline MPI_Datatype TypeMap() { return MPI_FLOAT; } + +// DOUBLE PRECISION VERSION +//WARNING: The vertex block on a given rank is contiguous +void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, + MilanReal* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, + MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<::iterator verLocPtr = inputSubGraph.getVerPtr_b(); + //vector::iterator verLocInd = inputSubGraph.getVerInd_b(); + //vector::iterator edgeLocWeight = inputSubGraph.getEdgeWt_b(); + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex + // index that starts with zero to |Vg| - 1 + map::iterator storedAlready; + vector Counter; //Store the edge count for each ghost vertex + MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< EndIndex) ) { //Find a ghost + storedAlready = Ghost2LocalMap.find( insertMe ); + if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added + //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + numGhostEdges++; + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]< verGhostPtr, verGhostInd, tempCounter; + //Mate array for ghost vertices: + vector GMate; //Proportional to the number of ghost vertices + try { + verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector + tempCounter.reserve(numGhostVertices); //Pointer Vector + verGhostInd.reserve(numGhostEdges); //Index Vector + GMate.reserve(numGhostVertices); //Ghost Mate Vector + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector + tempCounter.resize(numGhostVertices, 0); //Temporary Counter + verGhostInd.resize(numGhostEdges, -1); //Index Vector + GMate.resize(numGhostVertices, -1); //Temporary Counter + verGhostPtr[0] = 0; //The first value +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) + cout< EndIndex) ) { //Find a ghost + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert + verGhostInd[insertMe] = v+StartIndex; //Add the adjacency + tempCounter[Ghost2LocalMap[w]]++; //Increment the counter + } //End of if((w < StartIndex) || (w > EndIndex)) + } //End of for(k) + } //End of for (v) + tempCounter.clear(); //Do not need this any more +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< Message; // [ u, v, message_type ] + Message.resize(3,-1); + const MilanLongInt REQUEST = 1; + const MilanLongInt SUCCESS = 2; + const MilanLongInt FAILURE = 3; + const MilanLongInt SIZEINFO = 4; + MilanLongInt message_type = 0; + //Data structures for Message Bundling: + //Although up to two messages can be sent along any cross edge, + //only one message will be sent in the initialization phase - + //one of: REQUEST/FAILURE/SUCCESS + vector QLocalVtx, QGhostVtx, QMsgType; + vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + vector PCounter; + MilanLongInt NumMessagesBundled=0; + MilanInt ghostOwner=0; // Changed by Fabio to be an integer, addresses needs to be integers! + try { + QLocalVtx.reserve(numGhostEdges); //Local Vertex + QGhostVtx.reserve(numGhostEdges); //Ghost Vertex + QMsgType.reserve(numGhostEdges); //Message Type (Request/Failure) + QOwner.reserve(numGhostEdges); //Owner of the ghost: COmpute once and use later + PCounter.reserve( numProcs); //Store How many messages will be sent to each processor + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + PCounter.resize(numProcs, 0); //Only initialize the counter variable + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< candidateMate; + try { + candidateMate.reserve(NLVer+numGhostVertices); //Dominating edge + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + candidateMate.resize(NLVer+numGhostVertices, -1); + //The Queue Data Structure for the Dominating Set: + staticQueue U(NLVer+numGhostVertices); //Max size is the number of vertices +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k] > heaviestEdgeWt) || + ((edgeLocWeight[k] == heaviestEdgeWt)&&(w < verLocInd[k])) ) { + heaviestEdgeWt = edgeLocWeight[k]; + w = verLocInd[k]; + } + } //End of for loop + candidateMate[v] = w; + + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost vertex + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { // w is a local vertex + if ( candidateMate[w-StartIndex] == (v+StartIndex) ) { + Mate[v] = w; //v is local + Mate[w-StartIndex] = v+StartIndex; //w is local + //Q.push_back(u); + U.push_back(v+StartIndex); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only the Local Vertices + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //If Local Vertex: + if ( (vEndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[v]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[v-StartIndex] >= 0 ) // Already matched + continue; + } //End of else + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is a local vertex + Mate[w-StartIndex] = v; //w is a local vertex + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is local + //Build the Message Packet: + //Message[0] = u; //LOCAL + //Message[1] = v; //GHOST + //Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("< PCumulative, PMessageBundle, PSizeInfoMessages; + MilanLongInt myIndex=0; + try { + PMessageBundle.reserve(NumMessagesBundled*3); //Three integers per message + PCumulative.reserve(numProcs+1); //Similar to Row Pointer vector in CSR data structure + PSizeInfoMessages.reserve(numProcs*3); //Buffer to hold the Size info message packets + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + PMessageBundle.resize(NumMessagesBundled*3, -1);//Initialize + PCumulative.resize(numProcs+1, 0); //Only initialize the counter variable + PSizeInfoMessages.resize(numProcs*3, 0); + + for (MilanInt i=0; i SRequest; //Requests that are used for each send message + vector SStatus; //Status of sent messages, used in MPI_Wait + MilanLongInt MessageIndex=0; //Pointer for current message + try { + SRequest.reserve(numProcs*2); //At most two messages per processor + SStatus.reserve(numProcs*2);//At most two messages per processor + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + MPI_Request myReq; //A sample request + SRequest.resize(numProcs*2,myReq); + MPI_Status myStat; //A sample status + SStatus.resize(numProcs*2,myStat); + //Send the Messages + for (MilanInt i=0; i 0 ) { //Send only if it is a nonempty packet + MPI_Isend(&PSizeInfoMessages[i*3+0], 3, TypeMap(), i, ComputeTag, comm, &SRequest[MessageIndex]); + msgActual++; + MessageIndex++; + //Now Send the message with the data packet: +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), i, BundleTag, comm, &SRequest[MessageIndex]); + MessageIndex++; + } //End of if size > 0 + } + //Free up temporary memory: + PCumulative.clear(); + QLocalVtx.clear(); + QGhostVtx.clear(); + QMsgType.clear(); + QOwner.clear(); + PCounter.clear(); +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), comm, &OneMessageSize); //Size of one message packet + //How many messages to send? + //Potentially three kinds of messages will be sent/received: + //Request, Success, Failure. + //But only two will be sent from a given processor. + //Substract the number of messages that have already been sent as bundled messages: + MilanLongInt numMessagesToSend = numGhostEdges*2 - NumMessagesBundled; + MilanInt BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend; + + MilanLongInt *Buffer=0; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Buffer = (MilanLongInt *) malloc(BufferSize); //Allocate memory + if ( Buffer == 0 ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate for send buffer on process "< ReceiveBuffer; + MilanLongInt bundleSize=0, bundleCounter=0; + try { + ReceiveBuffer.reserve(numGhostEdges*2*3); //Three integers per cross edge + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + while ( true ) { +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only If a Local Vertex + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //v is a Local Vertex: + if ( Mate[v-StartIndex] >= 0 ) // v is already matched + continue; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //no dominating edge found: w == -1 + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of If (candidateMate[v-StartIndex] == u) + } //End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor v is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is a local vertex + //Build the Message Packet: + Message[0] = u; //LOCAL + Message[1] = v; //GHOST + Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; +#ifdef DEBUG_GHOST_ + if ((uEndIndex)) { + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// + + //// BREAK IF NO MESSAGES EXPECTED ///////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on Slave: "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on processor "<NLVer)) { + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of if ( candidateMate[v-StartIndex] == u )e + } //End of if ( Mate[v] == -1 ) + } //End of REQUEST + else { //CASE II: SUCCESS + if ( message_type == SUCCESS ) { +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) +#ifdef DEBUG_GHOST_ + if ((v<0)||(vNLVer)) { + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //No dominant edge found + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of if ( candidateMate[v-StartIndex] == u ) + } //End of if ( Mate[v] == -1 ) + } //End of if ( message_type == SUCCESS ) + else { //CASE III: FAILURE +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of else: CASE III + } //End of else: CASE I + } //End of if (!MsgQ.empty()) + ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + MPI_Buffer_detach(&Buffer, &BufferSize); //Detach the Buffer + free(Buffer); //Free the memory that was allocated + } + finishTime = MPI_Wtime(); + *ph2_time = finishTime-startTime; //Time taken for Phase-2 + *ph2_card = myCard ; //Cardinality at the end of Phase-2 + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0) { + *msgPercent = ((double)NumMessagesBundled/(double)(msgInd))*100.0; + } else { + *msgPercent = 0; + } +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("<::iterator verLocPtr = inputSubGraph.getVerPtr_b(); + //vector::iterator verLocInd = inputSubGraph.getVerInd_b(); + //vector::iterator edgeLocWeight = inputSubGraph.getEdgeWt_b(); + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex + // index that starts with zero to |Vg| - 1 + map::iterator storedAlready; + vector Counter; //Store the edge count for each ghost vertex + MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< EndIndex) ) { //Find a ghost + storedAlready = Ghost2LocalMap.find( insertMe ); + if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added + //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + numGhostEdges++; + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]< verGhostPtr, verGhostInd, tempCounter; + //Mate array for ghost vertices: + vector GMate; //Proportional to the number of ghost vertices + try { + verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector + tempCounter.reserve(numGhostVertices); //Pointer Vector + verGhostInd.reserve(numGhostEdges); //Index Vector + GMate.reserve(numGhostVertices); //Ghost Mate Vector + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector + tempCounter.resize(numGhostVertices, 0); //Temporary Counter + verGhostInd.resize(numGhostEdges, -1); //Index Vector + GMate.resize(numGhostVertices, -1); //Temporary Counter + verGhostPtr[0] = 0; //The first value +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) + cout< EndIndex) ) { //Find a ghost + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert + verGhostInd[insertMe] = v+StartIndex; //Add the adjacency + tempCounter[Ghost2LocalMap[w]]++; //Increment the counter + } //End of if((w < StartIndex) || (w > EndIndex)) + } //End of for(k) + } //End of for (v) + tempCounter.clear(); //Do not need this any more +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< Message; // [ u, v, message_type ] + Message.resize(3,-1); + const MilanLongInt REQUEST = 1; + const MilanLongInt SUCCESS = 2; + const MilanLongInt FAILURE = 3; + const MilanLongInt SIZEINFO = 4; + MilanLongInt message_type = 0; + //Data structures for Message Bundling: + //Although up to two messages can be sent along any cross edge, + //only one message will be sent in the initialization phase - + //one of: REQUEST/FAILURE/SUCCESS + vector QLocalVtx, QGhostVtx, QMsgType; + vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + vector PCounter; + MilanLongInt NumMessagesBundled=0; + MilanInt ghostOwner=0; // Changed by Fabio to be an integer, addresses needs to be integers! + try { + QLocalVtx.reserve(numGhostEdges); //Local Vertex + QGhostVtx.reserve(numGhostEdges); //Ghost Vertex + QMsgType.reserve(numGhostEdges); //Message Type (Request/Failure) + QOwner.reserve(numGhostEdges); //Owner of the ghost: COmpute once and use later + PCounter.reserve( numProcs); //Store How many messages will be sent to each processor + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + PCounter.resize(numProcs, 0); //Only initialize the counter variable + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< candidateMate; + try { + candidateMate.reserve(NLVer+numGhostVertices); //Dominating edge + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + candidateMate.resize(NLVer+numGhostVertices, -1); + //The Queue Data Structure for the Dominating Set: + staticQueue U(NLVer+numGhostVertices); //Max size is the number of vertices +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k] > heaviestEdgeWt) || + ((edgeLocWeight[k] == heaviestEdgeWt)&&(w < verLocInd[k])) ) { + heaviestEdgeWt = edgeLocWeight[k]; + w = verLocInd[k]; + } + } //End of for loop + candidateMate[v] = w; + + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost vertex + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { // w is a local vertex + if ( candidateMate[w-StartIndex] == (v+StartIndex) ) { + Mate[v] = w; //v is local + Mate[w-StartIndex] = v+StartIndex; //w is local + //Q.push_back(u); + U.push_back(v+StartIndex); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only the Local Vertices + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //If Local Vertex: + if ( (vEndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[v]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[v-StartIndex] >= 0 ) // Already matched + continue; + } //End of else + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is a local vertex + Mate[w-StartIndex] = v; //w is a local vertex + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is local + //Build the Message Packet: + //Message[0] = u; //LOCAL + //Message[1] = v; //GHOST + //Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("< PCumulative, PMessageBundle, PSizeInfoMessages; + MilanLongInt myIndex=0; + try { + PMessageBundle.reserve(NumMessagesBundled*3); //Three integers per message + PCumulative.reserve(numProcs+1); //Similar to Row Pointer vector in CSR data structure + PSizeInfoMessages.reserve(numProcs*3); //Buffer to hold the Size info message packets + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + PMessageBundle.resize(NumMessagesBundled*3, -1);//Initialize + PCumulative.resize(numProcs+1, 0); //Only initialize the counter variable + PSizeInfoMessages.resize(numProcs*3, 0); + + for (MilanInt i=0; i SRequest; //Requests that are used for each send message + vector SStatus; //Status of sent messages, used in MPI_Wait + MilanLongInt MessageIndex=0; //Pointer for current message + try { + SRequest.reserve(numProcs*2); //At most two messages per processor + SStatus.reserve(numProcs*2);//At most two messages per processor + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + MPI_Request myReq; //A sample request + SRequest.resize(numProcs*2,myReq); + MPI_Status myStat; //A sample status + SStatus.resize(numProcs*2,myStat); + //Send the Messages + for (MilanInt i=0; i 0 ) { //Send only if it is a nonempty packet + MPI_Isend(&PSizeInfoMessages[i*3+0], 3, TypeMap(), i, ComputeTag, comm, &SRequest[MessageIndex]); + msgActual++; + MessageIndex++; + //Now Send the message with the data packet: +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), i, BundleTag, comm, &SRequest[MessageIndex]); + MessageIndex++; + } //End of if size > 0 + } + //Free up temporary memory: + PCumulative.clear(); + QLocalVtx.clear(); + QGhostVtx.clear(); + QMsgType.clear(); + QOwner.clear(); + PCounter.clear(); +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), comm, &OneMessageSize); //Size of one message packet + //How many messages to send? + //Potentially three kinds of messages will be sent/received: + //Request, Success, Failure. + //But only two will be sent from a given processor. + //Substract the number of messages that have already been sent as bundled messages: + MilanLongInt numMessagesToSend = numGhostEdges*2 - NumMessagesBundled; + MilanInt BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend; + + MilanLongInt *Buffer=0; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Buffer = (MilanLongInt *) malloc(BufferSize); //Allocate memory + if ( Buffer == 0 ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate for send buffer on process "< ReceiveBuffer; + MilanLongInt bundleSize=0, bundleCounter=0; + try { + ReceiveBuffer.reserve(numGhostEdges*2*3); //Three integers per cross edge + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + while ( true ) { +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only If a Local Vertex + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //v is a Local Vertex: + if ( Mate[v-StartIndex] >= 0 ) // v is already matched + continue; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //no dominating edge found: w == -1 + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of If (candidateMate[v-StartIndex] == u) + } //End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor v is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is a local vertex + //Build the Message Packet: + Message[0] = u; //LOCAL + Message[1] = v; //GHOST + Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; +#ifdef DEBUG_GHOST_ + if ((uEndIndex)) { + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// + + //// BREAK IF NO MESSAGES EXPECTED ///////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on Slave: "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on processor "<NLVer)) { + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of if ( candidateMate[v-StartIndex] == u )e + } //End of if ( Mate[v] == -1 ) + } //End of REQUEST + else { //CASE II: SUCCESS + if ( message_type == SUCCESS ) { +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) +#ifdef DEBUG_GHOST_ + if ((v<0)||(vNLVer)) { + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //No dominant edge found + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of if ( candidateMate[v-StartIndex] == u ) + } //End of if ( Mate[v] == -1 ) + } //End of if ( message_type == SUCCESS ) + else { //CASE III: FAILURE +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of else: CASE III + } //End of else: CASE I + } //End of if (!MsgQ.empty()) + ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + MPI_Buffer_detach(&Buffer, &BufferSize); //Detach the Buffer + free(Buffer); //Free the memory that was allocated + } + finishTime = MPI_Wtime(); + *ph2_time = finishTime-startTime; //Time taken for Phase-2 + *ph2_card = myCard ; //Cardinality at the end of Phase-2 + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0) { + *msgPercent = ((double)NumMessagesBundled/(double)(msgInd))*100.0; + } else { + *msgPercent = 0; + } +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("< vtxIndex ) + End = Current - 1; + else //CASE 3: + Start = Current + 1; + } + } //End of While() + if ( Current == 0 ) + return (Current); + else { + if ( mVerDistance[Current] > vtxIndex ) + return (Current-1); + else + return (Current); + } //End of else + return (-1); //It should not reach here! +} //End of findOwnerOfGhost() diff --git a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_inner_mat_asb.f90 b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_inner_mat_asb.f90 new file mode 100644 index 00000000..f166f725 --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_inner_mat_asb.f90 @@ -0,0 +1,231 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_d_parmatch_aggregator_mat_asb.f90 +! +! Subroutine: amg_d_parmatch_aggregator_mat_asb +! Version: real +! +! +! From a given AC to final format, generating DESC_AC. +! This is quite involved, because in the context of aggregation based +! on parallel matching we are building the matrix hierarchy within BLD_TPROL +! as we go, especially if we have multiple sweeps, hence this code is called +! in two completely different contexts: +! 1. Within bld_tprol for the internal hierarchy +! 2. Outside, from amg_hierarchy_bld +! The solution we have found is for bld_tprol to copy its output +! into special components ag%ac ag%desc_ac etc so that: +! 1. if they are allocated, it means that bld_tprol has been already invoked, we are in +! amg_hierarchy_bld and we only need to copy them +! 2. If they are not allocated, we are within bld_tprol, and we need to actually +! perform the various needed steps. +! +! Arguments: +! ag - type(amg_d_parmatch_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_dml_parms), input +! The aggregation parameters +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_dspmat_type), inout +! The coarse matrix +! desc_ac - type(psb_desc_type), output. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), input/output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_d_parmatch_aggregator_inner_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + use psb_base_mod + use amg_base_prec_type + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_aggregator_inner_mat_asb + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_dspmat_type), intent(inout) :: op_prol,op_restr + type(psb_dspmat_type), intent(inout) :: ac + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + type(psb_ld_coo_sparse_mat) :: acoo, bcoo + type(psb_ld_csr_sparse_mat) :: acsr1 + integer(psb_ipk_) :: nzl, inl + integer(psb_lpk_) :: ntaggr + integer(psb_ipk_) :: err_act, debug_level, debug_unit + character(len=20) :: name='d_parmatch_inner_mat_asb' + character(len=80) :: aname + logical, parameter :: debug=.false., dump_prol_restr=.false. + + + if (psb_get_errstatus().ne.0) return + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + + if (debug) write(0,*) me,' ',trim(name),' Start:',& + & allocated(ag%ac),allocated(ag%desc_ac), allocated(ag%prol),allocated(ag%restr) + + + select case(parms%coarse_mat) + + case(amg_distr_mat_) + ! Do nothing, it has already been done in spmm_bld_ov. + + case(amg_repl_mat_) + ! + ! + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='no repl coarse_mat_ here') + goto 9999 + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +end subroutine amg_d_parmatch_aggregator_inner_mat_asb diff --git a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_asb.f90 b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_asb.f90 new file mode 100644 index 00000000..145f453e --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_asb.f90 @@ -0,0 +1,277 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_d_base_aggregator_mat_bld.f90 +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_d_parmatch_aggregator_mat_asb.f90 +! +! Subroutine: amg_d_parmatch_aggregator_mat_asb +! Version: real +! +! +! From a given AC to final format, generating DESC_AC. +! This is quite involved, because in the context of aggregation based +! on parallel matching we are building the matrix hierarchy within BLD_TPROL +! as we go, especially if we have multiple sweeps, hence this code is called +! in two completely different contexts: +! 1. Within bld_tprol for the internal hierarchy +! 2. Outside, from amg_hierarchy_bld +! The solution we have found is for bld_tprol to copy its output +! into special components ag%ac ag%desc_ac etc so that: +! 1. if they are allocated, it means that bld_tprol has been already invoked, we are in +! amg_hierarchy_bld and we only need to copy them +! 2. If they are not allocated, we are within bld_tprol, and we need to actually +! perform the various needed steps. +! +! Arguments: +! ag - type(amg_d_parmatch_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_dml_parms), input +! The aggregation parameters +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_dspmat_type), inout +! The coarse matrix +! desc_ac - type(psb_desc_type), output. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), input/output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_d_parmatch_aggregator_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + use psb_base_mod + use amg_base_prec_type + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_aggregator_mat_asb + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + type(psb_ld_coo_sparse_mat) :: tmpcoo + type(psb_ldspmat_type) :: tmp_ac + integer(psb_ipk_) :: i_nr, i_nc, i_nl, nzl + integer(psb_lpk_) :: ntaggr + integer(psb_ipk_) :: err_act, debug_level, debug_unit + character(len=20) :: name='d_parmatch_mat_asb' + character(len=80) :: aname + logical, parameter :: debug=.false., dump_prol_restr=.false., dump_ac=.false. + + + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + if (psb_get_errstatus().ne.0) then + write(0,*) me,' From:',trim(name),':',psb_get_errstatus() + return + end if + + + if (debug) write(0,*) me,' ',trim(name),' Start:',& + & allocated(ag%ac),allocated(ag%desc_ac), allocated(ag%prol),allocated(ag%restr) + + select case(parms%coarse_mat) + + case(amg_distr_mat_) + + call ac%cscnv(info,type='csr') + call op_prol%cscnv(info,type='csr') + call op_restr%cscnv(info,type='csr') + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done ac ' + + case(amg_repl_mat_) + ! + ! We are assuming here that an d matrix + ! can hold all entries + ! + if (desc_ac%get_global_rows() < huge(1_psb_ipk_) ) then + ntaggr = desc_ac%get_global_rows() + i_nr = ntaggr + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end if + + call op_prol%mv_to(tmpcoo) + nzl = tmpcoo%get_nzeros() + call psb_loc_to_glob(tmpcoo%ja(1:nzl),desc_ac,info,'I') + call op_prol%mv_from(tmpcoo) + + call op_restr%mv_to(tmpcoo) + nzl = tmpcoo%get_nzeros() + call psb_loc_to_glob(tmpcoo%ia(1:nzl),desc_ac,info,'I') + call op_restr%mv_from(tmpcoo) + + call op_prol%set_ncols(i_nr) + call op_restr%set_nrows(i_nr) + + call psb_gather(tmp_ac,ac,desc_ac,info,root=-ione,& + & dupl=psb_dupl_add_,keeploc=.false.) + call tmp_ac%mv_to(tmpcoo) + call ac%mv_from(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,mg=ntaggr,repl=.true.) + if (info == psb_success_) call psb_cdasb(desc_ac,info) + ! + ! Now that we have the descriptors and the restrictor, we should + ! update the W. But we don't, because REPL is only valid + ! at the coarsest level, so no need to carry over. + ! + + if (info /= psb_success_) goto 9999 + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +end subroutine amg_d_parmatch_aggregator_mat_asb diff --git a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_bld.f90 b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_bld.f90 new file mode 100644 index 00000000..3d5c8158 --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_mat_bld.f90 @@ -0,0 +1,275 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_d_base_aggregator_mat_bld.f90 +! +! Subroutine: amg_d_base_aggregator_mat_bld +! Version: d +! +! This routine builds the matrix associated to the current level of the +! multilevel preconditioner from the matrix associated to the previous level, +! by using the user-specified aggregation technique (therefore, it also builds the +! prolongation and restriction operators mapping the current level to the +! previous one and vice versa). +! The current level is regarded as the coarse one, while the previous as +! the fine one. This is in agreement with the fact that the routine is called, +! by amg_mlprec_bld, only on levels >=2. +! The coarse-level matrix A_C is built from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! A mapping from the nodes of the adjacency graph of A to the nodes of the +! adjacency graph of A_C has been computed by the amg_aggrmap_bld subroutine. +! The prolongator P_C is built here from this mapping, according to the +! value of p%iprcparm(amg_aggr_kind_), specified by the user through +! amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! amg_d_lev_aggrmat_bld. +! +! Currently four different prolongators are implemented, corresponding to +! four aggregation algorithms: +! 1. un-smoothed aggregation, +! 2. smoothed aggregation, +! 3. "bizarre" aggregation. +! 4. minimum energy +! 1. The non-smoothed aggregation uses as prolongator the piecewise constant +! interpolation operator corresponding to the fine-to-coarse level mapping built +! by p%aggr%bld_tprol. This is called tentative prolongator. +! 2. The smoothed aggregation uses as prolongator the operator obtained by applying +! a damped Jacobi smoother to the tentative prolongator. +! 3. The "bizarre" aggregation uses a prolongator proposed by the authors of AMG4PSBLAS. +! This prolongator still requires a deep analysis and testing and its use is +! not recommended. +! 4. Minimum energy aggregation +! +! For more details see +! M. Brezina and P. Vanek, A black-box iterative solver based on a two-level +! Schwarz method, Computing, 63 (1999), 233-263. +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based +! parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007), +! 1181-1196. +! M. Sala, R. Tuminaro: A new Petrov-Galerkin smoothed aggregation preconditioner +! for nonsymmetric linear systems, SIAM J. Sci. Comput., 31(1):143-166 (2008) +! +! +! The main structure is: +! 1. Perform sanity checks; +! 2. Compute prolongator/restrictor/AC +! +! +! Arguments: +! ag - type(amg_d_base_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_dml_parms), input +! The aggregation parameters +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_d_parmatch_aggregator_mat_bld(ag,parms,a,desc_a,ilaggr,nlaggr,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_d_inner_mod + use amg_d_prec_type + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_aggregator_mat_bld + implicit none + + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + character(len=20) :: name + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_dspmat_type) :: atmp + + name='d_parmatch_mat_bld' + if (psb_get_errstatus().ne.0) return + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + + ! + ! Build the coarse-level matrix from the fine-level one, starting from + ! the mapping defined by amg_aggrmap_bld and applying the aggregation + ! algorithm specified by + ! + + call clean_shortcuts(ag) + ! + ! When requesting smoothed aggregation we cannot use the + ! unsmoothed shortcuts + ! + select case (parms%aggr_prol) + case (amg_no_smooth_) + call amg_d_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + + case(amg_smooth_prol_) + call amg_d_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + +!!$ case(amg_biz_prol_) +!!$ call amg_daggrmat_biz_bld(a,desc_a,ilaggr,nlaggr, & +!!$ & parms,ac,desc_ac,op_prol,op_restr,info) + + case(amg_min_energy_) + call amg_daggrmat_minnrg_bld(a,desc_a,ilaggr,nlaggr, & + & parms,ac,desc_ac,op_prol,op_restr,t_prol,info) + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Invalid aggr kind') + goto 9999 + end select + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner aggrmat asb') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + subroutine clean_shortcuts(ag) + implicit none + class(amg_d_parmatch_aggregator_type), intent(inout) :: ag + integer(psb_ipk_) :: info + if (allocated(ag%prol)) then + call ag%prol%free() + deallocate(ag%prol) + end if + if (allocated(ag%restr)) then + call ag%restr%free() + deallocate(ag%restr) + end if + if (ag%unsmoothed_hierarchy) then + if (allocated(ag%ac)) call move_alloc(ag%ac, ag%rwa) + if (allocated(ag%desc_ac)) call move_alloc(ag%desc_ac,ag%rwdesc) + else + if (allocated(ag%ac)) then + call ag%ac%free() + deallocate(ag%ac) + end if + if (allocated(ag%desc_ac)) then + call ag%desc_ac%free(info) + deallocate(ag%desc_ac) + end if + end if + end subroutine clean_shortcuts + +end subroutine amg_d_parmatch_aggregator_mat_bld diff --git a/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.f90 new file mode 100644 index 00000000..b004bb3a --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_aggregator_tprol.f90 @@ -0,0 +1,565 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! File: amg_d_parmatch_aggregator_tprol.f90 +! +! Subroutine: amg_d_parmatch_aggregator_tprol +! Version: real +! +! + +subroutine amg_d_parmatch_aggregator_build_tprol(ag,parms,ag_data,& + & a,desc_a,ilaggr,nlaggr,t_prol,info) + use psb_base_mod + use amg_d_prec_type + use amg_d_inner_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_aggregator_build_tprol + use iso_c_binding + implicit none + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_dml_parms), intent(inout) :: parms + type(amg_daggr_data), intent(in) :: ag_data + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:),nlaggr(:) + type(psb_ldspmat_type), intent(out) :: t_prol + integer(psb_ipk_), intent(out) :: info + + + ! Local variables + real(psb_dpk_), allocatable :: tmpw(:), tmpwnxt(:) + integer(psb_lpk_), allocatable :: ixaggr(:), nxaggr(:), tlaggr(:), ivr(:) + type(psb_dspmat_type) :: a_tmp + integer(c_int) :: match_algorithm, n_sweeps, max_csize, max_nlevels + character(len=40) :: name, ch_err + character(len=80) :: fname, prefix_ + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + integer(psb_ipk_) :: err_act, ierr + integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_) :: i, j, k, nr, nc + integer(psb_lpk_) :: isz, num_pcols, nrac, ncac, lname, nz, x_sweeps, csz + integer(psb_lpk_) :: psz, sizes(4) + type(psb_d_csr_sparse_mat), target :: csr_prol, csr_pvi, csr_prod_res, acsr + type(psb_ld_csr_sparse_mat), target :: lcsr_prol + type(psb_desc_type), allocatable :: desc_acv(:) + type(psb_ld_coo_sparse_mat) :: tmpcoo, transp_coo + type(psb_dspmat_type), allocatable :: acv(:) + type(psb_dspmat_type), allocatable :: prolv(:), restrv(:) + type(psb_ldspmat_type) :: tmp_prol, tmp_pg, tmp_restr + type(psb_desc_type) :: tmp_desc_ac, tmp_desc_ax, tmp_desc_p + integer(psb_ipk_), save :: idx_mboxp=-1, idx_spmmbld=-1, idx_sweeps_mult=-1 + logical, parameter :: dump=.false., do_timings=.true., debug=.false., & + & dump_prol_restr=.false. + + name='d_parmatch_tprol' + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + if (psb_get_errstatus().ne.0) then + write(0,*) me,trim(name),' Err_status :',psb_get_errstatus() + return + end if + if (debug) write(0,*) me,trim(name),' Start ' + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + + if ((do_timings).and.(idx_mboxp==-1)) & + & idx_mboxp = psb_get_timer_idx("PMC_TPROL: MatchBoxP") + if ((do_timings).and.(idx_spmmbld==-1)) & + & idx_spmmbld = psb_get_timer_idx("PMC_TPROL: spmm_bld") + if ((do_timings).and.(idx_sweeps_mult==-1)) & + & idx_sweeps_mult = psb_get_timer_idx("PMC_TPROL: sweeps_mult") + + + call amg_check_def(parms%ml_cycle,'Multilevel cycle',& + & amg_mult_ml_,is_legal_ml_cycle) + call amg_check_def(parms%par_aggr_alg,'Aggregation',& + & amg_dec_aggr_,is_legal_ml_par_aggr_alg) + call amg_check_def(parms%aggr_ord,'Ordering',& + & amg_aggr_ord_nat_,is_legal_ml_aggr_ord) + call amg_check_def(parms%aggr_thresh,'Aggr_Thresh',dzero,is_legal_d_aggr_thrs) + match_algorithm = ag%matching_alg + n_sweeps = ag%n_sweeps + if (2**n_sweeps /= ag%orig_aggr_size) then + if (me == 0) then + write(debug_unit, *) 'Warning: AGGR_SIZE reset to value ',2**n_sweeps + end if + end if + if (ag%max_csize > 0) then + max_csize = ag%max_csize + else + max_csize = ag_data%min_coarse_size + end if + if (ag%max_nlevels > 0) then + max_nlevels = ag%max_nlevels + else + max_nlevels = ag_data%max_levs + end if + if (.true.) then + block + integer(psb_ipk_) :: ipv(2) + ipv(1) = max_csize + ipv(2) = n_sweeps + call psb_bcast(ictxt,ipv) + max_csize = ipv(1) + n_sweeps = ipv(2) + end block + else + call psb_bcast(ictxt,max_csize) + call psb_bcast(ictxt,n_sweeps) + end if + if (n_sweeps /= ag%n_sweeps) then + write(0,*) me,' Inconsistent N_SWEEPS ',n_sweeps,ag%n_sweeps + end if +!!$ if (me==0) write(0,*) 'Matching sweeps: ',n_sweeps + n_sweeps = max(1,n_sweeps) + if (debug) write(0,*) me,' Copies, with n_sweeps: ',n_sweeps,max_csize + if (ag%unsmoothed_hierarchy.and.allocated(ag%base_a)) then + call ag%base_a%cp_to(acsr) + if (ag%do_clean_zeros) call acsr%clean_zeros(info) + nr = acsr%get_nrows() + if (psb_size(ag%w) < nr) call ag%bld_default_w(nr) + isz = acsr%get_ncols() + + call psb_realloc(isz,ixaggr,info) + if (info == psb_success_) & + & allocate(acv(0:n_sweeps), desc_acv(0:n_sweeps),& + & prolv(n_sweeps), restrv(n_sweeps),stat=info) + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end if + + + call acv(0)%mv_from(acsr) + call ag%base_desc%clone(desc_acv(0),info) + + else + call a%cp_to(acsr) + if (ag%do_clean_zeros) call acsr%clean_zeros(info) + nr = acsr%get_nrows() + if (psb_size(ag%w) < nr) call ag%bld_default_w(nr) + isz = acsr%get_ncols() + + call psb_realloc(isz,ixaggr,info) + if (info == psb_success_) & + & allocate(acv(0:n_sweeps), desc_acv(0:n_sweeps),& + & prolv(n_sweeps), restrv(n_sweeps),stat=info) + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end if + + + call acv(0)%mv_from(acsr) + call desc_a%clone(desc_acv(0),info) + end if + + nrac = desc_acv(0)%get_local_rows() + ncac = desc_acv(0)%get_local_cols() + if (debug) write(0,*) me,' On input to level: ',nrac, ncac + if (allocated(ag%prol)) then + call ag%prol%free() + deallocate(ag%prol) + end if + if (allocated(ag%restr)) then + call ag%restr%free() + deallocate(ag%restr) + end if + + if (dump) then + block + type(psb_ldspmat_type) :: lac + ivr = desc_acv(0)%get_global_indices(owned=.false.) + prefix_ = "input_a" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call acv(0)%print(fname,head='Debug aggregates') + call lac%cp_from(acv(0)) + write(fname(lname+1:lname+13),'(a,i3.3,a)') '_p',me, '-glb.mtx' + call lac%print(fname,head='Debug aggregates',iv=ivr) + call lac%free() + end block + end if + + call psb_geall(tmpw,desc_acv(0),info) + + tmpw(1:nr) = ag%w(1:nr) + + call psb_geasb(tmpw,desc_acv(0),info) + + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'N_sweeps ',n_sweeps,nr,desc_acv(0)%is_ok(),max_csize + end if + + ! + ! Prepare ag%ac, ag%desc_ac, ag%prol, ag%restr to enable + ! shortcuts in mat_bld and mat_asb + ! and ag%desc_ax which will be needed in backfix. + ! + x_sweeps = -1 + sweeps_loop: do i=1, n_sweeps + if (debug) then + call psb_barrier(ictxt) + if (me==0) write(0,*) me,trim(name),' Start sweeps_loop iteration:',i,' of ',n_sweeps + end if + + ! + ! Building prol and restr because this algorithm is not decoupled + ! On exit from matchbox_build_prol, prolv(i) is in global numbering + ! + ! + if (debug) write(0,*) me,' Into matchbox_build_prol ',info + if (do_timings) call psb_tic(idx_mboxp) + call dmatchboxp_build_prol(tmpw,acv(i-1),desc_acv(i-1),ixaggr,nxaggr,tmp_prol,info,& + & symmetrize=ag%need_symmetrize,reproducible=ag%reproducible_matching) + if (do_timings) call psb_toc(idx_mboxp) + if (debug) write(0,*) me,' Out from matchbox_build_prol ',info + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit bld_tprol',info + + + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Calling spmm_bld NSW>1:',i,& + & desc_acv(i-1)%get_local_rows(),desc_acv(i-1)%get_local_cols(),& + & desc_acv(i-1)%get_global_rows() + end if + if (i == n_sweeps) call tmp_prol%clone(tmp_pg,info) + if (do_timings) call psb_tic(idx_spmmbld) + ! + ! On entry, prolv(i) is in global numbering, + ! + call amg_d_parmatch_spmm_bld_ov(acv(i-1),desc_acv(i-1),ixaggr,nxaggr,parms,& + & acv(i),desc_acv(i), prolv(i),restrv(1),tmp_prol,info) + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit from bld_ov(i)',info + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done spmm_bld:',i + end if + + if (do_timings) call psb_toc(idx_spmmbld) + ! Keep a copy of prolv(i) in global numbering for the time being, will + ! need it to build the final + ! if (i == n_sweeps) call prolv(i)%clone(tmp_prol,info) +!!$ write(0,*) name,' Call mat_asb sweep:',i,n_sweeps + call ag%inner_mat_asb(parms,acv(i-1),desc_acv(i-1),& + & acv(i),desc_acv(i),prolv(i),restrv(1),info) + +!!$ write(0,*) me,' From in_mat_asb:',& +!!$ & prolv(i)%get_nrows(),prolv(i)%get_ncols(),& +!!$ & restrv(1)%get_nrows(),restrv(1)%get_ncols(),& +!!$ & desc_acv(i)%get_local_rows(), desc_acv(i)%get_local_cols() + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done mat_asb:',i,sum(nxaggr),max_csize,info + csz = sum(nxaggr) + call psb_bcast(ictxt,csz) + if (csz /= sum(nxaggr)) write(0,*) me,trim(name),' Mismatch matasb',& + & csz,sum(nxaggr),max_csize + end if + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on entry to tmpwnxt 2' + + + ! + ! Fix wnxt + ! + if (info == 0) call psb_geall(tmpwnxt,desc_acv(i),info) + if (info == 0) call psb_geasb(tmpwnxt,desc_acv(i),info,scratch=.true.) + if (info == 0) call psb_halo(tmpw,desc_acv(i-1),info) +!!$ write(0,*) trestr%get_nrows(),size(tmpwnxt),trestr%get_ncols(),size(tmpw) + + if (info == 0) call psb_csmm(done,restrv(1),tmpw,dzero,tmpwnxt,info) + + if (info /= psb_success_) then + write(0,*)me,trim(name),'Error from mat_asb/tmpw ',info + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mat_asb 2') + goto 9999 + end if + + + if (i == 1) then + nrac = desc_acv(1)%get_local_rows() +!!$ write(0,*) 'Copying output w_nxt ',nrac + call psb_realloc(nrac,ag%w_nxt,info) + ag%w_nxt(1:nrac) = tmpwnxt(1:nrac) + ! + ! ILAGGR is fixed later on, but + ! get a copy in case of an early exit + ! + call psb_safe_ab_cpy(ixaggr,ilaggr,info) + end if + call psb_safe_ab_cpy(nxaggr,nlaggr,info) + call move_alloc(tmpwnxt,tmpw) + if (debug) then + if (csz /= sum(nlaggr)) write(0,*) me,trim(name),' Mismatch 2 matasb',& + & csz,sum(nlaggr),max_csize, info + end if + call acv(i-1)%free() + if ((sum(nlaggr) <= max_csize).or.(any(nlaggr==0))) then + !if (me==0) write(0,*) 'Early exit ',any(nlaggr==0),sum(nlaggr),max_csize,i + x_sweeps = i + exit sweeps_loop + end if + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done sweeps_loop iteration:',i,' of ',n_sweeps + end if + + end do sweeps_loop + + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done sweeps_loop:',x_sweeps + end if +!!$ write(0,*) me,name,' : End of aggregation sweeps ',& +!!$ & n_sweeps,' Final size:',max_csize,desc_acv(n_sweeps)%get_local_rows(),t_prol%get_ncols(),tmp_prol%get_ncols() + if (x_sweeps<=0) x_sweeps = n_sweeps + + if (do_timings) call psb_tic(idx_sweeps_mult) + ! + ! Ok, now we have all the prolongators, including the last one in global numbering. + ! Build the product of all prolongators. Need a tmp_desc_ax + ! which is correct but most of the time overdimensioned + ! + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + ! + block + integer(psb_ipk_) :: i, nnz + integer(psb_lpk_) :: ncol, ncsave + if (.not.allocated(ag%ac)) allocate(ag%ac) + if (.not.allocated(ag%desc_ac)) allocate(ag%desc_ac) + call desc_acv(x_sweeps)%clone(ag%desc_ac,info) + call desc_acv(x_sweeps)%free(info) + call acv(x_sweeps)%move_alloc(ag%ac,info) +!!$ call acv(x_sweeps)%clone(ag%ac,info) + if (.not.allocated(ag%prol)) allocate(ag%prol) + if (.not.allocated(ag%restr)) allocate(ag%restr) + + !call desc_acv(x_sweeps)%clone(ag%desc_ac,info) + call psb_cd_reinit(ag%desc_ac,info) + ncsave = ag%desc_ac%get_global_rows() + ! + ! Note: prolv(i) is already in local numbering + ! because of the call to mat_asb in the loop above. + ! + call prolv(x_sweeps)%mv_to(csr_prol) + !call csr_prol%set_ncols(ag%desc_ac%get_local_cols()) + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Enter prolongator product loop ',x_sweeps + end if + + do i=x_sweeps-1, 1, -1 + call prolv(i)%mv_to(csr_pvi) + if (psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 1' +!!$ write(0,*) me,' SPMM1 ',csr_pvi%get_nrows(),csr_pvi%get_ncols(),& +!!$ & csr_prol%get_nrows(),csr_prol%get_ncols() + call psb_par_spspmm(csr_pvi,desc_acv(i),csr_prol,csr_prod_res,ag%desc_ac,info) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 2',info + call csr_pvi%free() + call csr_prod_res%mv_to_fmt(csr_prol,info) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 3',info + call csr_prol%set_ncols(ag%desc_ac%get_local_cols()) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 4' + end do + call csr_prol%mv_to_lfmt(lcsr_prol,info) + nnz = lcsr_prol%get_nzeros() + call ag%desc_ac%l2gip(lcsr_prol%ja(1:nnz),info) + call lcsr_prol%set_ncols(ncsave) + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Done prolongator product loop ',x_sweeps + end if + ! + ! Fix ILAGGR here by copying from CSR_PROL%JA + ! + block + integer(psb_ipk_) :: nr + nr = lcsr_prol%get_nrows() + if (nnz /= nr) then + write(0,*) me,name,' Issue with prolongator? ',nr,nnz + end if + call psb_realloc(nr,ilaggr,info) + ilaggr(1:nnz) = lcsr_prol%ja(1:nnz) + end block + call tmp_prol%mv_from(lcsr_prol) + call psb_cdasb(ag%desc_ac,info) + call ag%ac%set_ncols(ag%desc_ac%get_local_cols()) + end block + + call tmp_prol%move_alloc(t_prol,info) + call t_prol%set_ncols(ag%desc_ac%get_local_cols()) + call t_prol%set_nrows(desc_acv(0)%get_local_rows()) + + nrac = ag%desc_ac%get_local_rows() + ncac = ag%desc_ac%get_local_cols() + call psb_realloc(nrac,ag%w_nxt,info) + ag%w_nxt(1:nrac) = tmpw(1:nrac) + + + if (do_timings) call psb_toc(idx_sweeps_mult) + + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Out of build loop ',x_sweeps,': Output size:',sum(nlaggr) + end if + + + !call psb_set_debug_level(0) + if (dump) then + block + ivr = desc_acv(x_sweeps)%get_global_indices(owned=.false.) + prefix_ = "final_ac" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call acv(x_sweeps)%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+13),'(a,i3.3,a)') '_p',me, '-glb.mtx' + call acv(x_sweeps)%print(fname,head='Debug aggregates',iv=ivr) + prefix_ = "final_tp" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call t_prol%print(fname,head='Tentative prolongator') + end block + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_bootCMatch_if') + goto 9999 + end if +!!$ write(0,*)me,' ',name,' Getting out with info ',info,& +!!$ & allocated(ilaggr),psb_size(ilaggr),allocated(nlaggr),psb_size(nlaggr) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +contains + subroutine do_l1_jacobi(nsweeps,w,a,desc_a) + integer(psb_ipk_), intent(in) :: nsweeps + real(psb_dpk_), intent(inout) :: w(:) + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + + end subroutine do_l1_jacobi +end subroutine amg_d_parmatch_aggregator_build_tprol diff --git a/amgprec/impl/aggregator/amg_d_parmatch_smth_bld.f90 b/amgprec/impl/aggregator/amg_d_parmatch_smth_bld.f90 new file mode 100644 index 00000000..7dfa3389 --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_smth_bld.f90 @@ -0,0 +1,414 @@ +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_daggrmat_smth_bld.F90 +! +! Subroutine: amg_daggrmat_smth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! The prolongator P_C is built according to a smoothed aggregation algorithm, +! i.e. it is obtained by applying a damped Jacobi smoother to the piecewise +! constant interpolation operator P corresponding to the fine-to-coarse level +! mapping built by the amg_aggrmap_bld subroutine: +! +! P_C = (I - omega*D^(-1)A) * P, +! +! where D is the diagonal matrix with main diagonal equal to the main diagonal +! of A, and omega is a suitable smoothing parameter. An estimate of the spectral +! radius of D^(-1)A, to be used in the computation of omega, is provided, +! according to the value of p%parms%aggr_omega_alg, specified by the user +! through amg_dprecinit and amg_zprecset. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat, +! specified by the user through amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! aggregator%mat_bld. +! +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_d_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_dml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_d_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_base_prec_type + use amg_d_inner_mod + use amg_d_base_aggregator_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_smth_bld + implicit none + + ! Arguments + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, ip, & + & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw + integer(psb_ipk_) :: inaggr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_ld_coo_sparse_mat) :: tmpcoo, ac_coo, lcoo_restr + type(psb_d_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_d_csr_sparse_mat) :: acsrf, csr_prol, acsr, tcsr + real(psb_dpk_), allocatable :: adiag(:) + real(psb_dpk_), allocatable :: arwsum(:) + logical :: filter_mat + integer(psb_ipk_) :: debug_level, debug_unit, err_act + integer(psb_ipk_), parameter :: ncmax=16 + real(psb_dpk_) :: anorm, omega, tmp, dg, theta + logical, parameter :: debug_new=.false., dump_r=.false., dump_p=.false., debug=.false. + character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1, idx_phase3=-1 + integer(psb_ipk_), save :: idx_cdasb=-1, idx_ptap=-1 + + name='amg_parmatch_smth_bld' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + !debug_level = 2 + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + theta = parms%aggr_thresh + !write(0,*) me,' ',trim(name),' Start ',idx_spspmm + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("PMC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("PMC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("PMC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("PMC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("PMC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("PMC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("PMC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("PMC_SMTH_BLD: ptap_bld ") + + if (do_timings) call psb_tic(idx_phase1) + + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + filter_mat = (parms%aggr_filter == amg_filter_mat_) + + ! + ! naggr: number of local aggregates + ! nrow: local rows. + ! + if (dump_p) then + block + integer(psb_lpk_), allocatable :: ivr(:), ivc(:) + integer(psb_lpk_) :: i + character(len=132) :: aname + type(psb_ldspmat_type) :: aglob + type(psb_dspmat_type) :: atmp +!!$ call a%cp_to(acsr) +!!$ call atmp%cp_from(acsr) + write(0,*) me,' ',trim(name),' Dumping inp_prol/restr' + write(aname,'(a,i0,a,i0,a)') 'tprol-',desc_a%get_global_rows(),'-p',me,'.mtx' + call t_prol%print(fname=aname,head='Test ') + end block + end if + + if (do_timings) call psb_tic(idx_refine) + ! Get the diagonal D + adiag = a%get_diag(info) + if (info == psb_success_) & + & call psb_realloc(ncol,adiag,info) + if (info == psb_success_) & + & call psb_halo(adiag,desc_a,info) + if (info == psb_success_) call a%cp_to(acsr) + + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_getdiag') + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Initial copies done.' + + call acsr%cp_to_fmt(acsrf,info) + + if (filter_mat) then + ! + ! Build the filtered matrix Af from A + ! + + do i=1, nrow + tmp = dzero + jd = -1 + do j=acsrf%irp(i),acsrf%irp(i+1)-1 + if (acsrf%ja(j) == i) jd = j + if (abs(acsrf%val(j)) < theta*sqrt(abs(adiag(i)*adiag(acsrf%ja(j))))) then + tmp=tmp+acsrf%val(j) + acsrf%val(j)=dzero + endif + + enddo + if (jd == -1) then + write(0,*) 'Wrong input: we need the diagonal!!!!', i + else + acsrf%val(jd)=acsrf%val(jd)-tmp + end if + enddo + ! Take out zeroed terms + call acsrf%clean_zeros(info) + end if + + + do i=1,size(adiag) + if (adiag(i) /= dzero) then + adiag(i) = done / adiag(i) + else + adiag(i) = done + end if + end do + if (do_timings) call psb_toc(idx_refine) + + if (parms%aggr_omega_alg == amg_eig_est_) then + + if (parms%aggr_eig == amg_max_norm_) then + allocate(arwsum(nrow)) + call acsr%arwsum(arwsum) + anorm = maxval(abs(adiag(1:nrow)*arwsum(1:nrow))) + call psb_amx(ictxt,anorm) + omega = 4.d0/(3.d0*anorm) + parms%aggr_omega_val = omega + + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_aggr_eig_') + goto 9999 + end if + + else if (parms%aggr_omega_alg == amg_user_choice_) then + + omega = parms%aggr_omega_val + + else if (parms%aggr_omega_alg /= amg_user_choice_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') + goto 9999 + end if + + + call acsrf%scal(adiag,info) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Filtering and scaling done.',info + if (info /= psb_success_) goto 9999 + + inaggr = naggr + + call t_prol%cp_to(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,nl=inaggr) + nzl = tmpcoo%get_nzeros() + call desc_ac%indxmap%g2lip_ins(tmpcoo%ja(1:nzl),info) + call tmpcoo%set_ncols(desc_ac%get_local_cols()) + call tmpcoo%mv_to_ifmt(tcsr,info) + ! + ! Build the smoothed prolongator using either A or Af + ! csr_prol = (I-w*D*A) Prol csr_prol = (I-w*D*Af) Prol + ! This is always done through the variable acsrf which + ! is a bit less readable, but saves space and one extra matrix copy + ! + call omega_smooth(omega,acsrf) + if (do_timings) call psb_toc(idx_phase1) + + if (do_timings) call psb_tic(idx_spspmm) + call psb_par_spspmm(acsrf,desc_a,tcsr,csr_prol,desc_ac,info) + call tcsr%free() + if (do_timings) call psb_toc(idx_spspmm) + if (do_timings) call psb_tic(idx_phase2) + + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') + goto 9999 + end if + ! + ! Now that we have the smoothed prolongator, we can + ! compute the triple product. + ! + if (do_timings) call psb_tic(idx_cdasb) + call psb_cdasb(desc_ac,info) + if (do_timings) call psb_toc(idx_cdasb) + call psb_cd_reinit(desc_ac,info) + + call csr_prol%mv_to_coo(coo_prol,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done SPSPMM 1' + + if (do_timings) call psb_tic(idx_ptap) + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + call amg_ptap_bld(acsr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info,desc_ax=ag%desc_ax) + if (do_timings) call psb_toc(idx_ptap) + + call op_prol%mv_from(coo_prol) + call op_restr%mv_from(coo_restr) + + + if (debug) write(0,*) me,' ',trim(name),' After mv_from',psb_get_errstatus() + if (debug) write(0,*) me,' ',trim(name),' ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros(),naggr,ntaggr + ! write(0,*) me,' ',trim(name),' Final AC newstyle ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros() + + if (dump_r) then + block + integer(psb_lpk_), allocatable :: ivr(:), ivc(:) + integer(psb_lpk_) :: i + character(len=132) :: aname + type(psb_ldspmat_type) :: aglob + type(psb_dspmat_type) :: atmp + write(0,*) me,' ',trim(name),' Dumping prol/restr' + ivc = [(i,i=1,desc_a%get_local_cols())] + call desc_a%l2gip(ivc,info) + ivr = [(i,i=1,desc_ac%get_local_cols())] + call desc_ac%l2gip(ivr,info) + + write(aname,'(a,i0,a,i0,a)') 'restr-',desc_ac%get_global_rows(),'-p',me,'.mtx' + + call op_restr%print(fname=aname,head='Test ',ivc=ivc) +!!$ write(aname,'(a,i0,a,i0,a)') 'prol-',desc_ac%get_global_rows(),'-p',me,'.mtx' +!!$ call op_prol%print(fname=aname,head='Test ') +!!$ call psb_gather(aglob,atmp,desc_a,info) +!!$ if (me==psb_root_) then +!!$ write(aname,'(a,i0,a)') 'a-inp-g-',aglob%get_nrows(),'.mtx' +!!$ call aglob%print(fname=aname,head='Test ') +!!$ end if + + end block + end if + if (do_timings) call psb_toc(idx_phase2) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done smooth_aggregate ' + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_error_handler(err_act) + return + +contains + + subroutine omega_smooth(omega,acsr) + implicit none + real(psb_dpk_),intent(in) :: omega + type(psb_d_csr_sparse_mat), intent(inout) :: acsr + ! + integer(psb_ipk_) :: i,j + do i=1,acsr%get_nrows() + do j=acsr%irp(i),acsr%irp(i+1)-1 + if (acsr%ja(j) == i) then + acsr%val(j) = done - omega*acsr%val(j) + else + acsr%val(j) = - omega*acsr%val(j) + end if + end do + end do + end subroutine omega_smooth + +end subroutine amg_d_parmatch_smth_bld diff --git a/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld.f90 b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld.f90 new file mode 100644 index 00000000..b051d935 --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld.f90 @@ -0,0 +1,194 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_daggrmat_nosmth_bld.F90 +! +! Subroutine: amg_daggrmat_nosmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_d_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_dml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_d_parmatch_spmm_bld(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_d_inner_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_spmm_bld + implicit none + + ! Arguments + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np,me + character(len=20) :: name + type(psb_d_csr_sparse_mat) :: acsr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nzl, ip, & + & naggr, nzt, naggrm1, naggrp1, i, k + integer(psb_ipk_) :: inaggr, nzlp + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + + name='amg_parmatch_spmm_bld' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + call a%cp_to(acsr) + + call amg_d_parmatch_spmm_bld_inner(acsr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="SPMM_BLD_INNER") + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done spmm_bld ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_d_parmatch_spmm_bld diff --git a/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.f90 b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.f90 new file mode 100644 index 00000000..e1166405 --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_inner.f90 @@ -0,0 +1,210 @@ +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_daggrmat_nosmth_bld.F90 +! +! Subroutine: amg_daggrmat_nosmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_d_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_dml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_d_parmatch_spmm_bld_inner(a_csr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_d_inner_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_spmm_bld_inner + implicit none + + ! Arguments + type(psb_d_csr_sparse_mat), intent(inout) :: a_csr + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(out) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me, ndx + character(len=40) :: name + type(psb_ld_coo_sparse_mat) :: tmpcoo + type(psb_d_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_d_csr_sparse_mat) :: ac_csr, csr_restr + type(psb_desc_type), target :: tmp_desc + type(psb_ldspmat_type) :: lac + integer(psb_ipk_) :: debug_level, debug_unit, naggr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nrl, nzl, ip, & + & nzt, naggrm1, naggrp1, i, k + integer(psb_lpk_), allocatable :: ia(:),ja(:) + !integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza, nrpsave, ncpsave, nzpsave + logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_prolcnv=-1, idx_proltrans=-1, idx_asb=-1 + + name='amg_parmatch_spmm_bld_inner' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("SPMM_BLD: spspmm ") + if ((do_timings).and.(idx_prolcnv==-1)) & + & idx_prolcnv = psb_get_timer_idx("SPMM_BLD: prolcnv ") + if ((do_timings).and.(idx_proltrans==-1)) & + & idx_proltrans = psb_get_timer_idx("SPMM_BLD: proltrans") + if ((do_timings).and.(idx_asb==-1)) & + & idx_asb = psb_get_timer_idx("SPMM_BLD: asb ") + + if (do_timings) call psb_tic(idx_prolcnv) + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + + ! + ! Here T_PROL should be arriving with GLOBAL indices on the cols + ! and LOCAL indices on the rows. + ! + if (debug) write(0,*) me,' ',trim(name),' Size check on entry New: ',& + & op_prol%get_fmt(),op_prol%get_nrows(),op_prol%get_ncols(),op_prol%get_nzeros(),& + & nrow,ntaggr,naggr + + call t_prol%cp_to(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + nzl = tmpcoo%get_nzeros() + if (debug) write(0,*) me,' ',trim(name),' coo_prol: ',& + & tmpcoo%ia(1:min(10,nzl)),' :',tmpcoo%ja(1:min(10,nzl)) + call desc_ac%indxmap%g2lip_ins(tmpcoo%ja(1:nzl),info) + call tmpcoo%set_ncols(desc_ac%get_local_cols()) + call tmpcoo%cp_to_icoo(coo_prol,info) + + call amg_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info) + + nzl = coo_prol%get_nzeros() + if (debug) write(0,*) me,' ',trim(name),' coo_prol: ',& + & coo_prol%ia(1:min(10,nzl)),' :',coo_prol%ja(1:min(10,nzl)) + + call op_prol%mv_from(coo_prol) + call op_restr%mv_from(coo_restr) + + if (debug) then + write(0,*) me,' ',trim(name),' Checkpoint at exit' + call psb_barrier(ictxt) + write(0,*) me,' ',trim(name),' Checkpoint through' + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Build ac = op_restr x a3') + goto 9999 + end if + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done smooth_aggregate ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_d_parmatch_spmm_bld_inner diff --git a/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_ov.f90 b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_ov.f90 new file mode 100644 index 00000000..318012ff --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_spmm_bld_ov.f90 @@ -0,0 +1,180 @@ +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_daggrmat_nosmth_bld_ov.F90 +! +! Subroutine: amg_daggrmat_nosmth_bld_ov +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld_ov. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_d_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_dml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_d_parmatch_spmm_bld_ov(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_d_inner_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_spmm_bld_ov + implicit none + + ! Arguments + type(psb_dspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_dspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_d_csr_sparse_mat) :: acsr + type(psb_ld_coo_sparse_mat) :: coo_prol, coo_restr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nzl, ip, & + & naggr, nzt, naggrm1, naggrp1, i, k + integer(psb_ipk_) :: inaggr, nzlp + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false., new_version=.true. + + name='amg_parmatch_spmm_bld_ov' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + call a%mv_to(acsr) + + call amg_d_parmatch_spmm_bld_inner(acsr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit from bld_inner',info + +!!$ else +!!$ naggr = nlaggr(me+1) +!!$ ntaggr = sum(nlaggr) +!!$ naggrm1 = sum(nlaggr(1:me)) +!!$ naggrp1 = sum(nlaggr(1:me+1)) +!!$ call op_prol%mv_to(coo_prol) +!!$ inaggr = naggr +!!$ call psb_cdall(ictxt,desc_ac,info,nl=inaggr) +!!$ nzlp = coo_prol%get_nzeros() +!!$ call desc_ac%indxmap%g2lip_ins(coo_prol%ja(1:nzlp),info) +!!$ call coo_prol%set_ncols(desc_ac%get_local_cols()) +!!$ call amg_spmm_bld_inner(acsr,desc_a,nlaggr,parms,ac,& +!!$ & coo_prol,desc_ac,coo_restr,info) +!!$ call psb_cdasb(desc_ac,info) +!!$ !call desc_ac%free(info) +!!$ !write(0,*) me, 'Size of nlaggr',size(nlaggr),nlaggr(1) +!!$ call op_prol%mv_from(coo_prol) +!!$ call op_restr%mv_from(coo_restr) +!!$ +!!$ end if + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="SPMM_BLD_INNER") + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done spmm_bld ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_d_parmatch_spmm_bld_ov diff --git a/amgprec/impl/aggregator/amg_d_parmatch_unsmth_bld.f90 b/amgprec/impl/aggregator/amg_d_parmatch_unsmth_bld.f90 new file mode 100644 index 00000000..1072a8cb --- /dev/null +++ b/amgprec/impl/aggregator/amg_d_parmatch_unsmth_bld.f90 @@ -0,0 +1,251 @@ +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_d_parmatch_unsmth_bld.F90 +! +! Subroutine: amg_d_parmatch_unsmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! The prolongator P_C is built according to a smoothed aggregation algorithm, +! i.e. it is obtained by applying a damped Jacobi smoother to the piecewise +! constant interpolation operator P corresponding to the fine-to-coarse level +! mapping built by the amg_aggrmap_bld subroutine: +! +! P_C = (I - omega*D^(-1)A) * P, +! +! where D is the diagonal matrix with main diagonal equal to the main diagonal +! of A, and omega is a suitable smoothing parameter. An estimate of the spectral +! radius of D^(-1)A, to be used in the computation of omega, is provided, +! according to the value of p%parms%aggr_omega_alg, specified by the user +! through amg_dprecinit and amg_zprecset. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat, +! specified by the user through amg_dprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! aggregator%mat_bld. +! +! +! Arguments: +! a - type(psb_dspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_d_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_dml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_dspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_dspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_dspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_d_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_base_prec_type + use amg_d_inner_mod + use amg_d_base_aggregator_mod + use amg_d_parmatch_aggregator_mod, amg_protect_name => amg_d_parmatch_unsmth_bld + implicit none + + ! Arguments + class(amg_d_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_dspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_ldspmat_type), intent(inout) :: t_prol + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, ip, & + & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw + integer(psb_ipk_) :: inaggr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_ld_coo_sparse_mat) :: lcoo_prol + type(psb_d_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_d_csr_sparse_mat) :: acsr + type(psb_d_csr_sparse_mat) :: csr_prol, acsr3, csr_restr, ac_csr + real(psb_dpk_), allocatable :: adiag(:) + real(psb_dpk_), allocatable :: arwsum(:) + logical :: filter_mat + integer(psb_ipk_) :: debug_level, debug_unit, err_act + integer(psb_ipk_), parameter :: ncmax=16 + real(psb_dpk_) :: anorm, omega, tmp, dg, theta + logical, parameter :: debug_new=.false., dump_r=.false., dump_p=.false., debug=.false. + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1 + character(len=80) :: filename + + name='amg_parmatch_unsmth_bld' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + theta = parms%aggr_thresh + !write(0,*) me,' ',trim(name),' Start ' + + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("PMC_UNSMTH_BLD: par_spspmm") + + ! + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + !write(0,*) me,' ',trim(name),' input sizes',nlaggr(:),':',naggr + + call a%cp_to(acsr) + call t_prol%mv_to(lcoo_prol) + + inaggr = naggr + call psb_cdall(ictxt,desc_ac,info,nl=inaggr) + nzl = lcoo_prol%get_nzeros() + call desc_ac%indxmap%g2lip_ins(lcoo_prol%ja(1:nzl),info) + call lcoo_prol%set_ncols(desc_ac%get_local_cols()) + call lcoo_prol%cp_to_icoo(coo_prol,info) + + if (debug) call check_coo(me,trim(name)//' Check 1 on coo_prol:',coo_prol) + + call psb_cdasb(desc_ac,info) + call psb_cd_reinit(desc_ac,info) + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + + call amg_ptap_bld(acsr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info,desc_ax=ag%desc_ax) + + call op_restr%cp_from(coo_restr) + call op_prol%mv_from(coo_prol) + + if (debug) write(0,*) me,' ',trim(name),' After mv_from',psb_get_errstatus() + if (debug) write(0,*) me,' ',trim(name),' ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros(),naggr,ntaggr + ! write(0,*) me,' ',trim(name),' Final AC newstyle ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros() + + if (debug) then + write(0,*) me,' ',trim(name),' Checkpoint at exit' + call psb_barrier(ictxt) + write(0,*) me,' ',trim(name),' Checkpoint through' + block + character(len=128) :: fname, prefix_ + integer :: lname + prefix_ = "unsmth_bld_" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+10),'(a,i3.3,a)') '_p_',me, '.mtx' + call op_prol%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+10),'(a,i3.3,a)') '_r_',me, '.mtx' + call op_restr%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+11),'(a,i3.3,a)') '_ac_',me, '.mtx' + call ac%print(fname,head='Debug aggregates') + end block + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Build ac = coo_restr x am3') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_error_handler(err_act) + return + +contains + subroutine check_coo(me,string,coo) + implicit none + integer(psb_ipk_) :: me + type(psb_d_coo_sparse_mat) :: coo + character(len=*) :: string + integer(psb_lpk_) :: nr,nc,nz + nr = coo%get_nrows() + nc = coo%get_ncols() + nz = coo%get_nzeros() + write(0,*) me,string,nr,nc,& + & minval(coo%ia(1:nz)),maxval(coo%ia(1:nz)),& + & minval(coo%ja(1:nz)),maxval(coo%ja(1:nz)) + end subroutine check_coo + +end subroutine amg_d_parmatch_unsmth_bld diff --git a/amgprec/impl/aggregator/amg_s_parmatch_aggregator_inner_mat_asb.f90 b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_inner_mat_asb.f90 new file mode 100644 index 00000000..aeccc1f7 --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_inner_mat_asb.f90 @@ -0,0 +1,231 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_s_parmatch_aggregator_mat_asb.f90 +! +! Subroutine: amg_s_parmatch_aggregator_mat_asb +! Version: real +! +! +! From a given AC to final format, generating DESC_AC. +! This is quite involved, because in the context of aggregation based +! on parallel matching we are building the matrix hierarchy within BLD_TPROL +! as we go, especially if we have multiple sweeps, hence this code is called +! in two completely different contexts: +! 1. Within bld_tprol for the internal hierarchy +! 2. Outside, from amg_hierarchy_bld +! The solution we have found is for bld_tprol to copy its output +! into special components ag%ac ag%desc_ac etc so that: +! 1. if they are allocated, it means that bld_tprol has been already invoked, we are in +! amg_hierarchy_bld and we only need to copy them +! 2. If they are not allocated, we are within bld_tprol, and we need to actually +! perform the various needed steps. +! +! Arguments: +! ag - type(amg_s_parmatch_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_sml_parms), input +! The aggregation parameters +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_sspmat_type), inout +! The coarse matrix +! desc_ac - type(psb_desc_type), output. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), input/output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_s_parmatch_aggregator_inner_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + use psb_base_mod + use amg_base_prec_type + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_aggregator_inner_mat_asb + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + type(psb_sspmat_type), intent(inout) :: op_prol,op_restr + type(psb_sspmat_type), intent(inout) :: ac + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + type(psb_ls_coo_sparse_mat) :: acoo, bcoo + type(psb_ls_csr_sparse_mat) :: acsr1 + integer(psb_ipk_) :: nzl, inl + integer(psb_lpk_) :: ntaggr + integer(psb_ipk_) :: err_act, debug_level, debug_unit + character(len=20) :: name='d_parmatch_inner_mat_asb' + character(len=80) :: aname + logical, parameter :: debug=.false., dump_prol_restr=.false. + + + if (psb_get_errstatus().ne.0) return + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + + if (debug) write(0,*) me,' ',trim(name),' Start:',& + & allocated(ag%ac),allocated(ag%desc_ac), allocated(ag%prol),allocated(ag%restr) + + + select case(parms%coarse_mat) + + case(amg_distr_mat_) + ! Do nothing, it has already been done in spmm_bld_ov. + + case(amg_repl_mat_) + ! + ! + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='no repl coarse_mat_ here') + goto 9999 + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +end subroutine amg_s_parmatch_aggregator_inner_mat_asb diff --git a/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_asb.f90 b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_asb.f90 new file mode 100644 index 00000000..fac367f2 --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_asb.f90 @@ -0,0 +1,277 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_s_base_aggregator_mat_bld.f90 +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_s_parmatch_aggregator_mat_asb.f90 +! +! Subroutine: amg_s_parmatch_aggregator_mat_asb +! Version: real +! +! +! From a given AC to final format, generating DESC_AC. +! This is quite involved, because in the context of aggregation based +! on parallel matching we are building the matrix hierarchy within BLD_TPROL +! as we go, especially if we have multiple sweeps, hence this code is called +! in two completely different contexts: +! 1. Within bld_tprol for the internal hierarchy +! 2. Outside, from amg_hierarchy_bld +! The solution we have found is for bld_tprol to copy its output +! into special components ag%ac ag%desc_ac etc so that: +! 1. if they are allocated, it means that bld_tprol has been already invoked, we are in +! amg_hierarchy_bld and we only need to copy them +! 2. If they are not allocated, we are within bld_tprol, and we need to actually +! perform the various needed steps. +! +! Arguments: +! ag - type(amg_s_parmatch_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_sml_parms), input +! The aggregation parameters +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_sspmat_type), inout +! The coarse matrix +! desc_ac - type(psb_desc_type), output. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), input/output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_s_parmatch_aggregator_mat_asb(ag,parms,a,desc_a,& + & ac,desc_ac, op_prol,op_restr,info) + use psb_base_mod + use amg_base_prec_type + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_aggregator_mat_asb + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + type(psb_sspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + ! + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + type(psb_ls_coo_sparse_mat) :: tmpcoo + type(psb_lsspmat_type) :: tmp_ac + integer(psb_ipk_) :: i_nr, i_nc, i_nl, nzl + integer(psb_lpk_) :: ntaggr + integer(psb_ipk_) :: err_act, debug_level, debug_unit + character(len=20) :: name='d_parmatch_mat_asb' + character(len=80) :: aname + logical, parameter :: debug=.false., dump_prol_restr=.false., dump_ac=.false. + + + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + if (psb_get_errstatus().ne.0) then + write(0,*) me,' From:',trim(name),':',psb_get_errstatus() + return + end if + + + if (debug) write(0,*) me,' ',trim(name),' Start:',& + & allocated(ag%ac),allocated(ag%desc_ac), allocated(ag%prol),allocated(ag%restr) + + select case(parms%coarse_mat) + + case(amg_distr_mat_) + + call ac%cscnv(info,type='csr') + call op_prol%cscnv(info,type='csr') + call op_restr%cscnv(info,type='csr') + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done ac ' + + case(amg_repl_mat_) + ! + ! We are assuming here that an d matrix + ! can hold all entries + ! + if (desc_ac%get_global_rows() < huge(1_psb_ipk_) ) then + ntaggr = desc_ac%get_global_rows() + i_nr = ntaggr + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end if + + call op_prol%mv_to(tmpcoo) + nzl = tmpcoo%get_nzeros() + call psb_loc_to_glob(tmpcoo%ja(1:nzl),desc_ac,info,'I') + call op_prol%mv_from(tmpcoo) + + call op_restr%mv_to(tmpcoo) + nzl = tmpcoo%get_nzeros() + call psb_loc_to_glob(tmpcoo%ia(1:nzl),desc_ac,info,'I') + call op_restr%mv_from(tmpcoo) + + call op_prol%set_ncols(i_nr) + call op_restr%set_nrows(i_nr) + + call psb_gather(tmp_ac,ac,desc_ac,info,root=-ione,& + & dupl=psb_dupl_add_,keeploc=.false.) + call tmp_ac%mv_to(tmpcoo) + call ac%mv_from(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,mg=ntaggr,repl=.true.) + if (info == psb_success_) call psb_cdasb(desc_ac,info) + ! + ! Now that we have the descriptors and the restrictor, we should + ! update the W. But we don't, because REPL is only valid + ! at the coarsest level, so no need to carry over. + ! + + if (info /= psb_success_) goto 9999 + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_coarse_mat_') + goto 9999 + end select + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + +end subroutine amg_s_parmatch_aggregator_mat_asb diff --git a/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_bld.f90 b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_bld.f90 new file mode 100644 index 00000000..2f072b7d --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_mat_bld.f90 @@ -0,0 +1,275 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_s_base_aggregator_mat_bld.f90 +! +! Subroutine: amg_s_base_aggregator_mat_bld +! Version: s +! +! This routine builds the matrix associated to the current level of the +! multilevel preconditioner from the matrix associated to the previous level, +! by using the user-specified aggregation technique (therefore, it also builds the +! prolongation and restriction operators mapping the current level to the +! previous one and vice versa). +! The current level is regarded as the coarse one, while the previous as +! the fine one. This is in agreement with the fact that the routine is called, +! by amg_mlprec_bld, only on levels >=2. +! The coarse-level matrix A_C is built from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! A mapping from the nodes of the adjacency graph of A to the nodes of the +! adjacency graph of A_C has been computed by the amg_aggrmap_bld subroutine. +! The prolongator P_C is built here from this mapping, according to the +! value of p%iprcparm(amg_aggr_kind_), specified by the user through +! amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! amg_s_lev_aggrmat_bld. +! +! Currently four different prolongators are implemented, corresponding to +! four aggregation algorithms: +! 1. un-smoothed aggregation, +! 2. smoothed aggregation, +! 3. "bizarre" aggregation. +! 4. minimum energy +! 1. The non-smoothed aggregation uses as prolongator the piecewise constant +! interpolation operator corresponding to the fine-to-coarse level mapping built +! by p%aggr%bld_tprol. This is called tentative prolongator. +! 2. The smoothed aggregation uses as prolongator the operator obtained by applying +! a damped Jacobi smoother to the tentative prolongator. +! 3. The "bizarre" aggregation uses a prolongator proposed by the authors of AMG4PSBLAS. +! This prolongator still requires a deep analysis and testing and its use is +! not recommended. +! 4. Minimum energy aggregation +! +! For more details see +! M. Brezina and P. Vanek, A black-box iterative solver based on a two-level +! Schwarz method, Computing, 63 (1999), 233-263. +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of PSBLAS-based +! parallel two-level Schwarz preconditioners, Appl. Num. Math., 57 (2007), +! 1181-1196. +! M. Sala, R. Tuminaro: A new Petrov-Galerkin smoothed aggregation preconditioner +! for nonsymmetric linear systems, SIAM J. Sci. Comput., 31(1):143-166 (2008) +! +! +! The main structure is: +! 1. Perform sanity checks; +! 2. Compute prolongator/restrictor/AC +! +! +! Arguments: +! ag - type(amg_s_base_aggregator_type), input/output. +! The aggregator object +! parms - type(amg_sml_parms), input +! The aggregation parameters +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_s_parmatch_aggregator_mat_bld(ag,parms,a,desc_a,ilaggr,nlaggr,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_s_inner_mod + use amg_s_prec_type + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_aggregator_mat_bld + implicit none + + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + character(len=20) :: name + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + integer(psb_ipk_) :: err_act + integer(psb_ipk_) :: debug_level, debug_unit + type(psb_sspmat_type) :: atmp + + name='d_parmatch_mat_bld' + if (psb_get_errstatus().ne.0) return + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + + + ! + ! Build the coarse-level matrix from the fine-level one, starting from + ! the mapping defined by amg_aggrmap_bld and applying the aggregation + ! algorithm specified by + ! + + call clean_shortcuts(ag) + ! + ! When requesting smoothed aggregation we cannot use the + ! unsmoothed shortcuts + ! + select case (parms%aggr_prol) + case (amg_no_smooth_) + call amg_s_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + + case(amg_smooth_prol_) + call amg_s_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + +!!$ case(amg_biz_prol_) +!!$ call amg_saggrmat_biz_bld(a,desc_a,ilaggr,nlaggr, & +!!$ & parms,ac,desc_ac,op_prol,op_restr,info) + + case(amg_min_energy_) + call amg_saggrmat_minnrg_bld(a,desc_a,ilaggr,nlaggr, & + & parms,ac,desc_ac,op_prol,op_restr,t_prol,info) + + case default + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='Invalid aggr kind') + goto 9999 + end select + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='Inner aggrmat asb') + goto 9999 + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +contains + subroutine clean_shortcuts(ag) + implicit none + class(amg_s_parmatch_aggregator_type), intent(inout) :: ag + integer(psb_ipk_) :: info + if (allocated(ag%prol)) then + call ag%prol%free() + deallocate(ag%prol) + end if + if (allocated(ag%restr)) then + call ag%restr%free() + deallocate(ag%restr) + end if + if (ag%unsmoothed_hierarchy) then + if (allocated(ag%ac)) call move_alloc(ag%ac, ag%rwa) + if (allocated(ag%desc_ac)) call move_alloc(ag%desc_ac,ag%rwdesc) + else + if (allocated(ag%ac)) then + call ag%ac%free() + deallocate(ag%ac) + end if + if (allocated(ag%desc_ac)) then + call ag%desc_ac%free(info) + deallocate(ag%desc_ac) + end if + end if + end subroutine clean_shortcuts + +end subroutine amg_s_parmatch_aggregator_mat_bld diff --git a/amgprec/impl/aggregator/amg_s_parmatch_aggregator_tprol.f90 b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_tprol.f90 new file mode 100644 index 00000000..e8d8fecc --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_aggregator_tprol.f90 @@ -0,0 +1,565 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! MLD2P4 version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! File: amg_s_parmatch_aggregator_tprol.f90 +! +! Subroutine: amg_s_parmatch_aggregator_tprol +! Version: real +! +! + +subroutine amg_s_parmatch_aggregator_build_tprol(ag,parms,ag_data,& + & a,desc_a,ilaggr,nlaggr,t_prol,info) + use psb_base_mod + use amg_s_prec_type + use amg_s_inner_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_aggregator_build_tprol + use iso_c_binding + implicit none + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(amg_sml_parms), intent(inout) :: parms + type(amg_saggr_data), intent(in) :: ag_data + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), allocatable, intent(out) :: ilaggr(:),nlaggr(:) + type(psb_lsspmat_type), intent(out) :: t_prol + integer(psb_ipk_), intent(out) :: info + + + ! Local variables + real(psb_spk_), allocatable :: tmpw(:), tmpwnxt(:) + integer(psb_lpk_), allocatable :: ixaggr(:), nxaggr(:), tlaggr(:), ivr(:) + type(psb_sspmat_type) :: a_tmp + integer(c_int) :: match_algorithm, n_sweeps, max_csize, max_nlevels + character(len=40) :: name, ch_err + character(len=80) :: fname, prefix_ + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + integer(psb_ipk_) :: err_act, ierr + integer(psb_ipk_) :: debug_level, debug_unit + integer(psb_ipk_) :: i, j, k, nr, nc + integer(psb_lpk_) :: isz, num_pcols, nrac, ncac, lname, nz, x_sweeps, csz + integer(psb_lpk_) :: psz, sizes(4) + type(psb_s_csr_sparse_mat), target :: csr_prol, csr_pvi, csr_prod_res, acsr + type(psb_ls_csr_sparse_mat), target :: lcsr_prol + type(psb_desc_type), allocatable :: desc_acv(:) + type(psb_ls_coo_sparse_mat) :: tmpcoo, transp_coo + type(psb_sspmat_type), allocatable :: acv(:) + type(psb_sspmat_type), allocatable :: prolv(:), restrv(:) + type(psb_lsspmat_type) :: tmp_prol, tmp_pg, tmp_restr + type(psb_desc_type) :: tmp_desc_ac, tmp_desc_ax, tmp_desc_p + integer(psb_ipk_), save :: idx_mboxp=-1, idx_spmmbld=-1, idx_sweeps_mult=-1 + logical, parameter :: dump=.false., do_timings=.true., debug=.false., & + & dump_prol_restr=.false. + + name='s_parmatch_tprol' + ictxt = desc_a%get_context() + call psb_info(ictxt,me,np) + if (psb_get_errstatus().ne.0) then + write(0,*) me,trim(name),' Err_status :',psb_get_errstatus() + return + end if + if (debug) write(0,*) me,trim(name),' Start ' + call psb_erractionsave(err_act) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + info = psb_success_ + + if ((do_timings).and.(idx_mboxp==-1)) & + & idx_mboxp = psb_get_timer_idx("PMC_TPROL: MatchBoxP") + if ((do_timings).and.(idx_spmmbld==-1)) & + & idx_spmmbld = psb_get_timer_idx("PMC_TPROL: spmm_bld") + if ((do_timings).and.(idx_sweeps_mult==-1)) & + & idx_sweeps_mult = psb_get_timer_idx("PMC_TPROL: sweeps_mult") + + + call amg_check_def(parms%ml_cycle,'Multilevel cycle',& + & amg_mult_ml_,is_legal_ml_cycle) + call amg_check_def(parms%par_aggr_alg,'Aggregation',& + & amg_dec_aggr_,is_legal_ml_par_aggr_alg) + call amg_check_def(parms%aggr_ord,'Ordering',& + & amg_aggr_ord_nat_,is_legal_ml_aggr_ord) + call amg_check_def(parms%aggr_thresh,'Aggr_Thresh',szero,is_legal_s_aggr_thrs) + match_algorithm = ag%matching_alg + n_sweeps = ag%n_sweeps + if (2**n_sweeps /= ag%orig_aggr_size) then + if (me == 0) then + write(debug_unit, *) 'Warning: AGGR_SIZE reset to value ',2**n_sweeps + end if + end if + if (ag%max_csize > 0) then + max_csize = ag%max_csize + else + max_csize = ag_data%min_coarse_size + end if + if (ag%max_nlevels > 0) then + max_nlevels = ag%max_nlevels + else + max_nlevels = ag_data%max_levs + end if + if (.true.) then + block + integer(psb_ipk_) :: ipv(2) + ipv(1) = max_csize + ipv(2) = n_sweeps + call psb_bcast(ictxt,ipv) + max_csize = ipv(1) + n_sweeps = ipv(2) + end block + else + call psb_bcast(ictxt,max_csize) + call psb_bcast(ictxt,n_sweeps) + end if + if (n_sweeps /= ag%n_sweeps) then + write(0,*) me,' Inconsistent N_SWEEPS ',n_sweeps,ag%n_sweeps + end if +!!$ if (me==0) write(0,*) 'Matching sweeps: ',n_sweeps + n_sweeps = max(1,n_sweeps) + if (debug) write(0,*) me,' Copies, with n_sweeps: ',n_sweeps,max_csize + if (ag%unsmoothed_hierarchy.and.allocated(ag%base_a)) then + call ag%base_a%cp_to(acsr) + if (ag%do_clean_zeros) call acsr%clean_zeros(info) + nr = acsr%get_nrows() + if (psb_size(ag%w) < nr) call ag%bld_default_w(nr) + isz = acsr%get_ncols() + + call psb_realloc(isz,ixaggr,info) + if (info == psb_success_) & + & allocate(acv(0:n_sweeps), desc_acv(0:n_sweeps),& + & prolv(n_sweeps), restrv(n_sweeps),stat=info) + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end if + + + call acv(0)%mv_from(acsr) + call ag%base_desc%clone(desc_acv(0),info) + + else + call a%cp_to(acsr) + if (ag%do_clean_zeros) call acsr%clean_zeros(info) + nr = acsr%get_nrows() + if (psb_size(ag%w) < nr) call ag%bld_default_w(nr) + isz = acsr%get_ncols() + + call psb_realloc(isz,ixaggr,info) + if (info == psb_success_) & + & allocate(acv(0:n_sweeps), desc_acv(0:n_sweeps),& + & prolv(n_sweeps), restrv(n_sweeps),stat=info) + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='psb_realloc' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + + end if + + + call acv(0)%mv_from(acsr) + call desc_a%clone(desc_acv(0),info) + end if + + nrac = desc_acv(0)%get_local_rows() + ncac = desc_acv(0)%get_local_cols() + if (debug) write(0,*) me,' On input to level: ',nrac, ncac + if (allocated(ag%prol)) then + call ag%prol%free() + deallocate(ag%prol) + end if + if (allocated(ag%restr)) then + call ag%restr%free() + deallocate(ag%restr) + end if + + if (dump) then + block + type(psb_lsspmat_type) :: lac + ivr = desc_acv(0)%get_global_indices(owned=.false.) + prefix_ = "input_a" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call acv(0)%print(fname,head='Debug aggregates') + call lac%cp_from(acv(0)) + write(fname(lname+1:lname+13),'(a,i3.3,a)') '_p',me, '-glb.mtx' + call lac%print(fname,head='Debug aggregates',iv=ivr) + call lac%free() + end block + end if + + call psb_geall(tmpw,desc_acv(0),info) + + tmpw(1:nr) = ag%w(1:nr) + + call psb_geasb(tmpw,desc_acv(0),info) + + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'N_sweeps ',n_sweeps,nr,desc_acv(0)%is_ok(),max_csize + end if + + ! + ! Prepare ag%ac, ag%desc_ac, ag%prol, ag%restr to enable + ! shortcuts in mat_bld and mat_asb + ! and ag%desc_ax which will be needed in backfix. + ! + x_sweeps = -1 + sweeps_loop: do i=1, n_sweeps + if (debug) then + call psb_barrier(ictxt) + if (me==0) write(0,*) me,trim(name),' Start sweeps_loop iteration:',i,' of ',n_sweeps + end if + + ! + ! Building prol and restr because this algorithm is not decoupled + ! On exit from matchbox_build_prol, prolv(i) is in global numbering + ! + ! + if (debug) write(0,*) me,' Into matchbox_build_prol ',info + if (do_timings) call psb_tic(idx_mboxp) + call smatchboxp_build_prol(tmpw,acv(i-1),desc_acv(i-1),ixaggr,nxaggr,tmp_prol,info,& + & symmetrize=ag%need_symmetrize,reproducible=ag%reproducible_matching) + if (do_timings) call psb_toc(idx_mboxp) + if (debug) write(0,*) me,' Out from matchbox_build_prol ',info + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit bld_tprol',info + + + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Calling spmm_bld NSW>1:',i,& + & desc_acv(i-1)%get_local_rows(),desc_acv(i-1)%get_local_cols(),& + & desc_acv(i-1)%get_global_rows() + end if + if (i == n_sweeps) call tmp_prol%clone(tmp_pg,info) + if (do_timings) call psb_tic(idx_spmmbld) + ! + ! On entry, prolv(i) is in global numbering, + ! + call amg_s_parmatch_spmm_bld_ov(acv(i-1),desc_acv(i-1),ixaggr,nxaggr,parms,& + & acv(i),desc_acv(i), prolv(i),restrv(1),tmp_prol,info) + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit from bld_ov(i)',info + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done spmm_bld:',i + end if + + if (do_timings) call psb_toc(idx_spmmbld) + ! Keep a copy of prolv(i) in global numbering for the time being, will + ! need it to build the final + ! if (i == n_sweeps) call prolv(i)%clone(tmp_prol,info) +!!$ write(0,*) name,' Call mat_asb sweep:',i,n_sweeps + call ag%inner_mat_asb(parms,acv(i-1),desc_acv(i-1),& + & acv(i),desc_acv(i),prolv(i),restrv(1),info) + +!!$ write(0,*) me,' From in_mat_asb:',& +!!$ & prolv(i)%get_nrows(),prolv(i)%get_ncols(),& +!!$ & restrv(1)%get_nrows(),restrv(1)%get_ncols(),& +!!$ & desc_acv(i)%get_local_rows(), desc_acv(i)%get_local_cols() + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done mat_asb:',i,sum(nxaggr),max_csize,info + csz = sum(nxaggr) + call psb_bcast(ictxt,csz) + if (csz /= sum(nxaggr)) write(0,*) me,trim(name),' Mismatch matasb',& + & csz,sum(nxaggr),max_csize + end if + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on entry to tmpwnxt 2' + + + ! + ! Fix wnxt + ! + if (info == 0) call psb_geall(tmpwnxt,desc_acv(i),info) + if (info == 0) call psb_geasb(tmpwnxt,desc_acv(i),info,scratch=.true.) + if (info == 0) call psb_halo(tmpw,desc_acv(i-1),info) +!!$ write(0,*) trestr%get_nrows(),size(tmpwnxt),trestr%get_ncols(),size(tmpw) + + if (info == 0) call psb_csmm(sone,restrv(1),tmpw,szero,tmpwnxt,info) + + if (info /= psb_success_) then + write(0,*)me,trim(name),'Error from mat_asb/tmpw ',info + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err='mat_asb 2') + goto 9999 + end if + + + if (i == 1) then + nrac = desc_acv(1)%get_local_rows() +!!$ write(0,*) 'Copying output w_nxt ',nrac + call psb_realloc(nrac,ag%w_nxt,info) + ag%w_nxt(1:nrac) = tmpwnxt(1:nrac) + ! + ! ILAGGR is fixed later on, but + ! get a copy in case of an early exit + ! + call psb_safe_ab_cpy(ixaggr,ilaggr,info) + end if + call psb_safe_ab_cpy(nxaggr,nlaggr,info) + call move_alloc(tmpwnxt,tmpw) + if (debug) then + if (csz /= sum(nlaggr)) write(0,*) me,trim(name),' Mismatch 2 matasb',& + & csz,sum(nlaggr),max_csize, info + end if + call acv(i-1)%free() + if ((sum(nlaggr) <= max_csize).or.(any(nlaggr==0))) then + !if (me==0) write(0,*) 'Early exit ',any(nlaggr==0),sum(nlaggr),max_csize,i + x_sweeps = i + exit sweeps_loop + end if + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done sweeps_loop iteration:',i,' of ',n_sweeps + end if + + end do sweeps_loop + + if (debug) then + call psb_barrier(ictxt) +!!$ write(0,*) name,' Call spmm_bld sweep:',i,n_sweeps + if (me==0) write(0,*) me,trim(name),' Done sweeps_loop:',x_sweeps + end if +!!$ write(0,*) me,name,' : End of aggregation sweeps ',& +!!$ & n_sweeps,' Final size:',max_csize,desc_acv(n_sweeps)%get_local_rows(),t_prol%get_ncols(),tmp_prol%get_ncols() + if (x_sweeps<=0) x_sweeps = n_sweeps + + if (do_timings) call psb_tic(idx_sweeps_mult) + ! + ! Ok, now we have all the prolongators, including the last one in global numbering. + ! Build the product of all prolongators. Need a tmp_desc_ax + ! which is correct but most of the time overdimensioned + ! + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + ! + block + integer(psb_ipk_) :: i, nnz + integer(psb_lpk_) :: ncol, ncsave + if (.not.allocated(ag%ac)) allocate(ag%ac) + if (.not.allocated(ag%desc_ac)) allocate(ag%desc_ac) + call desc_acv(x_sweeps)%clone(ag%desc_ac,info) + call desc_acv(x_sweeps)%free(info) + call acv(x_sweeps)%move_alloc(ag%ac,info) +!!$ call acv(x_sweeps)%clone(ag%ac,info) + if (.not.allocated(ag%prol)) allocate(ag%prol) + if (.not.allocated(ag%restr)) allocate(ag%restr) + + !call desc_acv(x_sweeps)%clone(ag%desc_ac,info) + call psb_cd_reinit(ag%desc_ac,info) + ncsave = ag%desc_ac%get_global_rows() + ! + ! Note: prolv(i) is already in local numbering + ! because of the call to mat_asb in the loop above. + ! + call prolv(x_sweeps)%mv_to(csr_prol) + !call csr_prol%set_ncols(ag%desc_ac%get_local_cols()) + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Enter prolongator product loop ',x_sweeps + end if + + do i=x_sweeps-1, 1, -1 + call prolv(i)%mv_to(csr_pvi) + if (psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 1' +!!$ write(0,*) me,' SPMM1 ',csr_pvi%get_nrows(),csr_pvi%get_ncols(),& +!!$ & csr_prol%get_nrows(),csr_prol%get_ncols() + call psb_par_spspmm(csr_pvi,desc_acv(i),csr_prol,csr_prod_res,ag%desc_ac,info) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 2',info + call csr_pvi%free() + call csr_prod_res%mv_to_fmt(csr_prol,info) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 3',info + call csr_prol%set_ncols(ag%desc_ac%get_local_cols()) + if ((info /=0).or.psb_errstatus_fatal()) write(0,*) me,' Fatal error in prolongator loop 4' + end do + call csr_prol%mv_to_lfmt(lcsr_prol,info) + nnz = lcsr_prol%get_nzeros() + call ag%desc_ac%l2gip(lcsr_prol%ja(1:nnz),info) + call lcsr_prol%set_ncols(ncsave) + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Done prolongator product loop ',x_sweeps + end if + ! + ! Fix ILAGGR here by copying from CSR_PROL%JA + ! + block + integer(psb_ipk_) :: nr + nr = lcsr_prol%get_nrows() + if (nnz /= nr) then + write(0,*) me,name,' Issue with prolongator? ',nr,nnz + end if + call psb_realloc(nr,ilaggr,info) + ilaggr(1:nnz) = lcsr_prol%ja(1:nnz) + end block + call tmp_prol%mv_from(lcsr_prol) + call psb_cdasb(ag%desc_ac,info) + call ag%ac%set_ncols(ag%desc_ac%get_local_cols()) + end block + + call tmp_prol%move_alloc(t_prol,info) + call t_prol%set_ncols(ag%desc_ac%get_local_cols()) + call t_prol%set_nrows(desc_acv(0)%get_local_rows()) + + nrac = ag%desc_ac%get_local_rows() + ncac = ag%desc_ac%get_local_cols() + call psb_realloc(nrac,ag%w_nxt,info) + ag%w_nxt(1:nrac) = tmpw(1:nrac) + + + if (do_timings) call psb_toc(idx_sweeps_mult) + + if (debug) then + call psb_barrier(ictxt) + if (me == 0) write(0,*) 'Out of build loop ',x_sweeps,': Output size:',sum(nlaggr) + end if + + + !call psb_set_debug_level(0) + if (dump) then + block + ivr = desc_acv(x_sweeps)%get_global_indices(owned=.false.) + prefix_ = "final_ac" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call acv(x_sweeps)%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+13),'(a,i3.3,a)') '_p',me, '-glb.mtx' + call acv(x_sweeps)%print(fname,head='Debug aggregates',iv=ivr) + prefix_ = "final_tp" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+9),'(a,i3.3,a)') '_p',me, '.mtx' + call t_prol%print(fname,head='Tentative prolongator') + end block + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='amg_bootCMatch_if') + goto 9999 + end if +!!$ write(0,*)me,' ',name,' Getting out with info ',info,& +!!$ & allocated(ilaggr),psb_size(ilaggr),allocated(nlaggr),psb_size(nlaggr) + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return +contains + subroutine do_l1_jacobi(nsweeps,w,a,desc_a) + integer(psb_ipk_), intent(in) :: nsweeps + real(psb_dpk_), intent(inout) :: w(:) + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + + end subroutine do_l1_jacobi +end subroutine amg_s_parmatch_aggregator_build_tprol diff --git a/amgprec/impl/aggregator/amg_s_parmatch_smth_bld.f90 b/amgprec/impl/aggregator/amg_s_parmatch_smth_bld.f90 new file mode 100644 index 00000000..aeeb3fbc --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_smth_bld.f90 @@ -0,0 +1,414 @@ +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_saggrmat_smth_bld.F90 +! +! Subroutine: amg_saggrmat_smth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! The prolongator P_C is built according to a smoothed aggregation algorithm, +! i.e. it is obtained by applying a damped Jacobi smoother to the piecewise +! constant interpolation operator P corresponding to the fine-to-coarse level +! mapping built by the amg_aggrmap_bld subroutine: +! +! P_C = (I - omega*D^(-1)A) * P, +! +! where D is the diagonal matrix with main diagonal equal to the main diagonal +! of A, and omega is a suitable smoothing parameter. An estimate of the spectral +! radius of D^(-1)A, to be used in the computation of omega, is provided, +! according to the value of p%parms%aggr_omega_alg, specified by the user +! through amg_sprecinit and amg_zprecset. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat, +! specified by the user through amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! aggregator%mat_bld. +! +! +! Arguments: +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_s_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_sml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_s_parmatch_smth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_base_prec_type + use amg_s_inner_mod + use amg_s_base_aggregator_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_smth_bld + implicit none + + ! Arguments + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: op_prol,ac,op_restr + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, ip, & + & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw + integer(psb_ipk_) :: inaggr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_ls_coo_sparse_mat) :: tmpcoo, ac_coo, lcoo_restr + type(psb_s_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_s_csr_sparse_mat) :: acsrf, csr_prol, acsr, tcsr + real(psb_spk_), allocatable :: adiag(:) + real(psb_spk_), allocatable :: arwsum(:) + logical :: filter_mat + integer(psb_ipk_) :: debug_level, debug_unit, err_act + integer(psb_ipk_), parameter :: ncmax=16 + real(psb_spk_) :: anorm, omega, tmp, dg, theta + logical, parameter :: debug_new=.false., dump_r=.false., dump_p=.false., debug=.false. + character(len=80) :: filename + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1, idx_phase3=-1 + integer(psb_ipk_), save :: idx_cdasb=-1, idx_ptap=-1 + + name='amg_parmatch_smth_bld' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + !debug_level = 2 + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + theta = parms%aggr_thresh + !write(0,*) me,' ',trim(name),' Start ',idx_spspmm + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("PMC_SMTH_BLD: par_spspmm") + if ((do_timings).and.(idx_phase1==-1)) & + & idx_phase1 = psb_get_timer_idx("PMC_SMTH_BLD: phase1 ") + if ((do_timings).and.(idx_phase2==-1)) & + & idx_phase2 = psb_get_timer_idx("PMC_SMTH_BLD: phase2 ") + if ((do_timings).and.(idx_phase3==-1)) & + & idx_phase3 = psb_get_timer_idx("PMC_SMTH_BLD: phase3 ") + if ((do_timings).and.(idx_gtrans==-1)) & + & idx_gtrans = psb_get_timer_idx("PMC_SMTH_BLD: gtrans ") + if ((do_timings).and.(idx_refine==-1)) & + & idx_refine = psb_get_timer_idx("PMC_SMTH_BLD: refine ") + if ((do_timings).and.(idx_cdasb==-1)) & + & idx_cdasb = psb_get_timer_idx("PMC_SMTH_BLD: cdasb ") + if ((do_timings).and.(idx_ptap==-1)) & + & idx_ptap = psb_get_timer_idx("PMC_SMTH_BLD: ptap_bld ") + + if (do_timings) call psb_tic(idx_phase1) + + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + filter_mat = (parms%aggr_filter == amg_filter_mat_) + + ! + ! naggr: number of local aggregates + ! nrow: local rows. + ! + if (dump_p) then + block + integer(psb_lpk_), allocatable :: ivr(:), ivc(:) + integer(psb_lpk_) :: i + character(len=132) :: aname + type(psb_lsspmat_type) :: aglob + type(psb_sspmat_type) :: atmp +!!$ call a%cp_to(acsr) +!!$ call atmp%cp_from(acsr) + write(0,*) me,' ',trim(name),' Dumping inp_prol/restr' + write(aname,'(a,i0,a,i0,a)') 'tprol-',desc_a%get_global_rows(),'-p',me,'.mtx' + call t_prol%print(fname=aname,head='Test ') + end block + end if + + if (do_timings) call psb_tic(idx_refine) + ! Get the diagonal D + adiag = a%get_diag(info) + if (info == psb_success_) & + & call psb_realloc(ncol,adiag,info) + if (info == psb_success_) & + & call psb_halo(adiag,desc_a,info) + if (info == psb_success_) call a%cp_to(acsr) + + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_getdiag') + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Initial copies done.' + + call acsr%cp_to_fmt(acsrf,info) + + if (filter_mat) then + ! + ! Build the filtered matrix Af from A + ! + + do i=1, nrow + tmp = dzero + jd = -1 + do j=acsrf%irp(i),acsrf%irp(i+1)-1 + if (acsrf%ja(j) == i) jd = j + if (abs(acsrf%val(j)) < theta*sqrt(abs(adiag(i)*adiag(acsrf%ja(j))))) then + tmp=tmp+acsrf%val(j) + acsrf%val(j)=dzero + endif + + enddo + if (jd == -1) then + write(0,*) 'Wrong input: we need the diagonal!!!!', i + else + acsrf%val(jd)=acsrf%val(jd)-tmp + end if + enddo + ! Take out zeroed terms + call acsrf%clean_zeros(info) + end if + + + do i=1,size(adiag) + if (adiag(i) /= dzero) then + adiag(i) = done / adiag(i) + else + adiag(i) = done + end if + end do + if (do_timings) call psb_toc(idx_refine) + + if (parms%aggr_omega_alg == amg_eig_est_) then + + if (parms%aggr_eig == amg_max_norm_) then + allocate(arwsum(nrow)) + call acsr%arwsum(arwsum) + anorm = maxval(abs(adiag(1:nrow)*arwsum(1:nrow))) + call psb_amx(ictxt,anorm) + omega = 4.d0/(3.d0*anorm) + parms%aggr_omega_val = omega + + else + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_aggr_eig_') + goto 9999 + end if + + else if (parms%aggr_omega_alg == amg_user_choice_) then + + omega = parms%aggr_omega_val + + else if (parms%aggr_omega_alg /= amg_user_choice_) then + info = psb_err_internal_error_ + call psb_errpush(info,name,a_err='invalid amg_aggr_omega_alg_') + goto 9999 + end if + + + call acsrf%scal(adiag,info) + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & ' Filtering and scaling done.',info + if (info /= psb_success_) goto 9999 + + inaggr = naggr + + call t_prol%cp_to(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,nl=inaggr) + nzl = tmpcoo%get_nzeros() + call desc_ac%indxmap%g2lip_ins(tmpcoo%ja(1:nzl),info) + call tmpcoo%set_ncols(desc_ac%get_local_cols()) + call tmpcoo%mv_to_ifmt(tcsr,info) + ! + ! Build the smoothed prolongator using either A or Af + ! csr_prol = (I-w*D*A) Prol csr_prol = (I-w*D*Af) Prol + ! This is always done through the variable acsrf which + ! is a bit less readable, but saves space and one extra matrix copy + ! + call omega_smooth(omega,acsrf) + if (do_timings) call psb_toc(idx_phase1) + + if (do_timings) call psb_tic(idx_spspmm) + call psb_par_spspmm(acsrf,desc_a,tcsr,csr_prol,desc_ac,info) + call tcsr%free() + if (do_timings) call psb_toc(idx_spspmm) + if (do_timings) call psb_tic(idx_phase2) + + if(info /= psb_success_) then + call psb_errpush(psb_err_from_subroutine_,name,a_err='spspmm 1') + goto 9999 + end if + ! + ! Now that we have the smoothed prolongator, we can + ! compute the triple product. + ! + if (do_timings) call psb_tic(idx_cdasb) + call psb_cdasb(desc_ac,info) + if (do_timings) call psb_toc(idx_cdasb) + call psb_cd_reinit(desc_ac,info) + + call csr_prol%mv_to_coo(coo_prol,info) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done SPSPMM 1' + + if (do_timings) call psb_tic(idx_ptap) + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + call amg_ptap_bld(acsr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info,desc_ax=ag%desc_ax) + if (do_timings) call psb_toc(idx_ptap) + + call op_prol%mv_from(coo_prol) + call op_restr%mv_from(coo_restr) + + + if (debug) write(0,*) me,' ',trim(name),' After mv_from',psb_get_errstatus() + if (debug) write(0,*) me,' ',trim(name),' ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros(),naggr,ntaggr + ! write(0,*) me,' ',trim(name),' Final AC newstyle ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros() + + if (dump_r) then + block + integer(psb_lpk_), allocatable :: ivr(:), ivc(:) + integer(psb_lpk_) :: i + character(len=132) :: aname + type(psb_lsspmat_type) :: aglob + type(psb_sspmat_type) :: atmp + write(0,*) me,' ',trim(name),' Dumping prol/restr' + ivc = [(i,i=1,desc_a%get_local_cols())] + call desc_a%l2gip(ivc,info) + ivr = [(i,i=1,desc_ac%get_local_cols())] + call desc_ac%l2gip(ivr,info) + + write(aname,'(a,i0,a,i0,a)') 'restr-',desc_ac%get_global_rows(),'-p',me,'.mtx' + + call op_restr%print(fname=aname,head='Test ',ivc=ivc) +!!$ write(aname,'(a,i0,a,i0,a)') 'prol-',desc_ac%get_global_rows(),'-p',me,'.mtx' +!!$ call op_prol%print(fname=aname,head='Test ') +!!$ call psb_gather(aglob,atmp,desc_a,info) +!!$ if (me==psb_root_) then +!!$ write(aname,'(a,i0,a)') 'a-inp-g-',aglob%get_nrows(),'.mtx' +!!$ call aglob%print(fname=aname,head='Test ') +!!$ end if + + end block + end if + if (do_timings) call psb_toc(idx_phase2) + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done smooth_aggregate ' + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_error_handler(err_act) + return + +contains + + subroutine omega_smooth(omega,acsr) + implicit none + real(psb_spk_),intent(in) :: omega + type(psb_s_csr_sparse_mat), intent(inout) :: acsr + ! + integer(psb_ipk_) :: i,j + do i=1,acsr%get_nrows() + do j=acsr%irp(i),acsr%irp(i+1)-1 + if (acsr%ja(j) == i) then + acsr%val(j) = done - omega*acsr%val(j) + else + acsr%val(j) = - omega*acsr%val(j) + end if + end do + end do + end subroutine omega_smooth + +end subroutine amg_s_parmatch_smth_bld diff --git a/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld.f90 b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld.f90 new file mode 100644 index 00000000..2ae4589a --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld.f90 @@ -0,0 +1,194 @@ +! ! +! +! AMG4PSBLAS version 1.0 +! Algebraic Multigrid Package +! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! +! (C) Copyright 2021 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Fabio Durastante +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! moved here from +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_saggrmat_nosmth_bld.F90 +! +! Subroutine: amg_saggrmat_nosmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_s_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_sml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_s_parmatch_spmm_bld(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_s_inner_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_spmm_bld + implicit none + + ! Arguments + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np,me + character(len=20) :: name + type(psb_s_csr_sparse_mat) :: acsr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nzl, ip, & + & naggr, nzt, naggrm1, naggrp1, i, k + integer(psb_ipk_) :: inaggr, nzlp + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false. + + name='amg_parmatch_spmm_bld' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + call a%cp_to(acsr) + + call amg_s_parmatch_spmm_bld_inner(acsr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="SPMM_BLD_INNER") + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done spmm_bld ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_s_parmatch_spmm_bld diff --git a/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_inner.f90 b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_inner.f90 new file mode 100644 index 00000000..854dd552 --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_inner.f90 @@ -0,0 +1,210 @@ +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_saggrmat_nosmth_bld.F90 +! +! Subroutine: amg_saggrmat_nosmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_s_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_sml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_s_parmatch_spmm_bld_inner(a_csr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_s_inner_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_spmm_bld_inner + implicit none + + ! Arguments + type(psb_s_csr_sparse_mat), intent(inout) :: a_csr + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(out) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me, ndx + character(len=40) :: name + type(psb_ls_coo_sparse_mat) :: tmpcoo + type(psb_s_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_s_csr_sparse_mat) :: ac_csr, csr_restr + type(psb_desc_type), target :: tmp_desc + type(psb_lsspmat_type) :: lac + integer(psb_ipk_) :: debug_level, debug_unit, naggr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nrl, nzl, ip, & + & nzt, naggrm1, naggrp1, i, k + integer(psb_lpk_), allocatable :: ia(:),ja(:) + !integer(psb_lpk_) :: nrsave, ncsave, nzsave, nza, nrpsave, ncpsave, nzpsave + logical, parameter :: do_timings=.true., oldstyle=.false., debug=.false. + integer(psb_ipk_), save :: idx_spspmm=-1, idx_prolcnv=-1, idx_proltrans=-1, idx_asb=-1 + + name='amg_parmatch_spmm_bld_inner' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("SPMM_BLD: spspmm ") + if ((do_timings).and.(idx_prolcnv==-1)) & + & idx_prolcnv = psb_get_timer_idx("SPMM_BLD: prolcnv ") + if ((do_timings).and.(idx_proltrans==-1)) & + & idx_proltrans = psb_get_timer_idx("SPMM_BLD: proltrans") + if ((do_timings).and.(idx_asb==-1)) & + & idx_asb = psb_get_timer_idx("SPMM_BLD: asb ") + + if (do_timings) call psb_tic(idx_prolcnv) + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + + ! + ! Here T_PROL should be arriving with GLOBAL indices on the cols + ! and LOCAL indices on the rows. + ! + if (debug) write(0,*) me,' ',trim(name),' Size check on entry New: ',& + & op_prol%get_fmt(),op_prol%get_nrows(),op_prol%get_ncols(),op_prol%get_nzeros(),& + & nrow,ntaggr,naggr + + call t_prol%cp_to(tmpcoo) + + call psb_cdall(ictxt,desc_ac,info,nl=naggr) + nzl = tmpcoo%get_nzeros() + if (debug) write(0,*) me,' ',trim(name),' coo_prol: ',& + & tmpcoo%ia(1:min(10,nzl)),' :',tmpcoo%ja(1:min(10,nzl)) + call desc_ac%indxmap%g2lip_ins(tmpcoo%ja(1:nzl),info) + call tmpcoo%set_ncols(desc_ac%get_local_cols()) + call tmpcoo%cp_to_icoo(coo_prol,info) + + call amg_ptap_bld(a_csr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info) + + nzl = coo_prol%get_nzeros() + if (debug) write(0,*) me,' ',trim(name),' coo_prol: ',& + & coo_prol%ia(1:min(10,nzl)),' :',coo_prol%ja(1:min(10,nzl)) + + call op_prol%mv_from(coo_prol) + call op_restr%mv_from(coo_restr) + + if (debug) then + write(0,*) me,' ',trim(name),' Checkpoint at exit' + call psb_barrier(ictxt) + write(0,*) me,' ',trim(name),' Checkpoint through' + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Build ac = op_restr x a3') + goto 9999 + end if + + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done smooth_aggregate ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_s_parmatch_spmm_bld_inner diff --git a/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_ov.f90 b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_ov.f90 new file mode 100644 index 00000000..dfac2619 --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_spmm_bld_ov.f90 @@ -0,0 +1,180 @@ +! +! +! AMG4PSBLAS Extensions +! +! (C) Copyright 2019 +! +! Salvatore Filippone Cranfield University +! Pasqua D'Ambra IAC-CNR, Naples, IT +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_saggrmat_nosmth_bld_ov.F90 +! +! Subroutine: amg_saggrmat_nosmth_bld_ov +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is the piecewise constant interpolation operator corresponding +! the fine-to-coarse level mapping built by amg_aggrmap_bld_ov. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat +! specified by the user through amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! +! For details see +! P. D'Ambra, D. di Serafino and S. Filippone, On the development of +! PSBLAS-based parallel two-level Schwarz preconditioners, Appl. Num. Math., +! 57 (2007), 1181-1196. +! +! +! Arguments: +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_s_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_sml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +! +subroutine amg_s_parmatch_spmm_bld_ov(a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_s_inner_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_spmm_bld_ov + implicit none + + ! Arguments + type(psb_sspmat_type), intent(inout) :: a + type(psb_desc_type), intent(in) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_sspmat_type), intent(inout) :: ac, op_prol, op_restr + type(psb_desc_type), intent(out) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_ipk_) :: err_act + + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_s_csr_sparse_mat) :: acsr + type(psb_ls_coo_sparse_mat) :: coo_prol, coo_restr + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, nzl, ip, & + & naggr, nzt, naggrm1, naggrp1, i, k + integer(psb_ipk_) :: inaggr, nzlp + integer(psb_ipk_) :: debug_level, debug_unit + logical, parameter :: debug=.false., new_version=.true. + + name='amg_parmatch_spmm_bld_ov' + if(psb_get_errstatus().ne.0) return + info=psb_success_ + call psb_erractionsave(err_act) + + + ictxt = desc_a%get_context() + call psb_info(ictxt, me, np) + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + call a%mv_to(acsr) + + call amg_s_parmatch_spmm_bld_inner(acsr,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + if (psb_errstatus_fatal()) write(0,*)me,trim(name),'Error fatal on exit from bld_inner',info + +!!$ else +!!$ naggr = nlaggr(me+1) +!!$ ntaggr = sum(nlaggr) +!!$ naggrm1 = sum(nlaggr(1:me)) +!!$ naggrp1 = sum(nlaggr(1:me+1)) +!!$ call op_prol%mv_to(coo_prol) +!!$ inaggr = naggr +!!$ call psb_cdall(ictxt,desc_ac,info,nl=inaggr) +!!$ nzlp = coo_prol%get_nzeros() +!!$ call desc_ac%indxmap%g2lip_ins(coo_prol%ja(1:nzlp),info) +!!$ call coo_prol%set_ncols(desc_ac%get_local_cols()) +!!$ call amg_spmm_bld_inner(acsr,desc_a,nlaggr,parms,ac,& +!!$ & coo_prol,desc_ac,coo_restr,info) +!!$ call psb_cdasb(desc_ac,info) +!!$ !call desc_ac%free(info) +!!$ !write(0,*) me, 'Size of nlaggr',size(nlaggr),nlaggr(1) +!!$ call op_prol%mv_from(coo_prol) +!!$ call op_restr%mv_from(coo_restr) +!!$ +!!$ end if + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + call psb_errpush(info,name,a_err="SPMM_BLD_INNER") + goto 9999 + end if + + if (debug_level >= psb_debug_outer_) & + & write(debug_unit,*) me,' ',trim(name),& + & 'Done spmm_bld ' + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + + return + +end subroutine amg_s_parmatch_spmm_bld_ov diff --git a/amgprec/impl/aggregator/amg_s_parmatch_unsmth_bld.f90 b/amgprec/impl/aggregator/amg_s_parmatch_unsmth_bld.f90 new file mode 100644 index 00000000..7fcba845 --- /dev/null +++ b/amgprec/impl/aggregator/amg_s_parmatch_unsmth_bld.f90 @@ -0,0 +1,251 @@ +! +! +! AMG4PSBLAS version 2.2 +! MultiLevel Domain Decomposition Parallel Preconditioners Package +! based on PSBLAS (Parallel Sparse BLAS version 3.5) +! +! (C) Copyright 2008-2018 +! +! Salvatore Filippone +! Pasqua D'Ambra +! Daniela di Serafino +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the AMG4PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AMG4PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: amg_s_parmatch_unsmth_bld.F90 +! +! Subroutine: amg_s_parmatch_unsmth_bld +! Version: real +! +! This routine builds a coarse-level matrix A_C from a fine-level matrix A +! by using the Galerkin approach, i.e. +! +! A_C = P_C^T A P_C, +! +! where P_C is a prolongator from the coarse level to the fine one. +! +! The prolongator P_C is built according to a smoothed aggregation algorithm, +! i.e. it is obtained by applying a damped Jacobi smoother to the piecewise +! constant interpolation operator P corresponding to the fine-to-coarse level +! mapping built by the amg_aggrmap_bld subroutine: +! +! P_C = (I - omega*D^(-1)A) * P, +! +! where D is the diagonal matrix with main diagonal equal to the main diagonal +! of A, and omega is a suitable smoothing parameter. An estimate of the spectral +! radius of D^(-1)A, to be used in the computation of omega, is provided, +! according to the value of p%parms%aggr_omega_alg, specified by the user +! through amg_sprecinit and amg_zprecset. +! +! The coarse-level matrix A_C is distributed among the parallel processes or +! replicated on each of them, according to the value of p%parms%coarse_mat, +! specified by the user through amg_sprecinit and amg_zprecset. +! On output from this routine the entries of AC, op_prol, op_restr +! are still in "global numbering" mode; this is fixed in the calling routine +! aggregator%mat_bld. +! +! +! Arguments: +! a - type(psb_sspmat_type), input. +! The sparse matrix structure containing the local part of +! the fine-level matrix. +! desc_a - type(psb_desc_type), input. +! The communication descriptor of the fine-level matrix. +! p - type(amg_s_onelev_type), input/output. +! The 'one-level' data structure that will contain the local +! part of the matrix to be built as well as the information +! concerning the prolongator and its transpose. +! parms - type(amg_sml_parms), input +! Parameters controlling the choice of algorithm +! ac - type(psb_sspmat_type), output +! The coarse matrix on output +! +! ilaggr - integer, dimension(:), input +! The mapping between the row indices of the coarse-level +! matrix and the row indices of the fine-level matrix. +! ilaggr(i)=j means that node i in the adjacency graph +! of the fine-level matrix is mapped onto node j in the +! adjacency graph of the coarse-level matrix. Note that the indices +! are assumed to be shifted so as to make sure the ranges on +! the various processes do not overlap. +! nlaggr - integer, dimension(:) input +! nlaggr(i) contains the aggregates held by process i. +! op_prol - type(psb_sspmat_type), input/output +! The tentative prolongator on input, the computed prolongator on output +! +! op_restr - type(psb_sspmat_type), output +! The restrictor operator; normally, it is the transpose of the prolongator. +! +! info - integer, output. +! Error code. +! +subroutine amg_s_parmatch_unsmth_bld(ag,a,desc_a,ilaggr,nlaggr,parms,& + & ac,desc_ac,op_prol,op_restr,t_prol,info) + use psb_base_mod + use amg_base_prec_type + use amg_s_inner_mod + use amg_s_base_aggregator_mod + use amg_s_parmatch_aggregator_mod, amg_protect_name => amg_s_parmatch_unsmth_bld + implicit none + + ! Arguments + class(amg_s_parmatch_aggregator_type), target, intent(inout) :: ag + type(psb_sspmat_type), intent(in) :: a + type(psb_desc_type), intent(inout) :: desc_a + integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) + type(amg_sml_parms), intent(inout) :: parms + type(psb_sspmat_type), intent(inout) :: op_prol,ac,op_restr + type(psb_lsspmat_type), intent(inout) :: t_prol + type(psb_desc_type), intent(inout) :: desc_ac + integer(psb_ipk_), intent(out) :: info + + ! Local variables + integer(psb_lpk_) :: nrow, nglob, ncol, ntaggr, ip, & + & naggr, nzl,naggrm1,naggrp1, i, j, k, jd, icolF, nrw + integer(psb_ipk_) :: inaggr + type(psb_ctxt_type) :: ictxt + integer(psb_ipk_) :: np, me + character(len=20) :: name + type(psb_ls_coo_sparse_mat) :: lcoo_prol + type(psb_s_coo_sparse_mat) :: coo_prol, coo_restr + type(psb_s_csr_sparse_mat) :: acsr + type(psb_s_csr_sparse_mat) :: csr_prol, acsr3, csr_restr, ac_csr + real(psb_spk_), allocatable :: adiag(:) + real(psb_spk_), allocatable :: arwsum(:) + logical :: filter_mat + integer(psb_ipk_) :: debug_level, debug_unit, err_act + integer(psb_ipk_), parameter :: ncmax=16 + real(psb_spk_) :: anorm, omega, tmp, dg, theta + logical, parameter :: debug_new=.false., dump_r=.false., dump_p=.false., debug=.false. + logical, parameter :: do_timings=.false. + integer(psb_ipk_), save :: idx_spspmm=-1 + character(len=80) :: filename + + name='amg_parmatch_unsmth_bld' + info=psb_success_ + call psb_erractionsave(err_act) + if (psb_errstatus_fatal()) then + info = psb_err_internal_error_; goto 9999 + end if + debug_unit = psb_get_debug_unit() + debug_level = psb_get_debug_level() + + ictxt = desc_a%get_context() + + call psb_info(ictxt, me, np) + + nglob = desc_a%get_global_rows() + nrow = desc_a%get_local_rows() + ncol = desc_a%get_local_cols() + + theta = parms%aggr_thresh + !write(0,*) me,' ',trim(name),' Start ' + + if ((do_timings).and.(idx_spspmm==-1)) & + & idx_spspmm = psb_get_timer_idx("PMC_UNSMTH_BLD: par_spspmm") + + ! + naggr = nlaggr(me+1) + ntaggr = sum(nlaggr) + naggrm1 = sum(nlaggr(1:me)) + naggrp1 = sum(nlaggr(1:me+1)) + !write(0,*) me,' ',trim(name),' input sizes',nlaggr(:),':',naggr + + call a%cp_to(acsr) + call t_prol%mv_to(lcoo_prol) + + inaggr = naggr + call psb_cdall(ictxt,desc_ac,info,nl=inaggr) + nzl = lcoo_prol%get_nzeros() + call desc_ac%indxmap%g2lip_ins(lcoo_prol%ja(1:nzl),info) + call lcoo_prol%set_ncols(desc_ac%get_local_cols()) + call lcoo_prol%cp_to_icoo(coo_prol,info) + + if (debug) call check_coo(me,trim(name)//' Check 1 on coo_prol:',coo_prol) + + call psb_cdasb(desc_ac,info) + call psb_cd_reinit(desc_ac,info) + if (.not.allocated(ag%desc_ax)) allocate(ag%desc_ax) + + call amg_ptap_bld(acsr,desc_a,nlaggr,parms,ac,& + & coo_prol,desc_ac,coo_restr,info,desc_ax=ag%desc_ax) + + call op_restr%cp_from(coo_restr) + call op_prol%mv_from(coo_prol) + + if (debug) write(0,*) me,' ',trim(name),' After mv_from',psb_get_errstatus() + if (debug) write(0,*) me,' ',trim(name),' ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros(),naggr,ntaggr + ! write(0,*) me,' ',trim(name),' Final AC newstyle ',ac%get_fmt(),ac%get_nrows(),ac%get_ncols(),ac%get_nzeros() + + if (debug) then + write(0,*) me,' ',trim(name),' Checkpoint at exit' + call psb_barrier(ictxt) + write(0,*) me,' ',trim(name),' Checkpoint through' + block + character(len=128) :: fname, prefix_ + integer :: lname + prefix_ = "unsmth_bld_" + lname = len_trim(prefix_) + fname = trim(prefix_) + write(fname(lname+1:lname+10),'(a,i3.3,a)') '_p_',me, '.mtx' + call op_prol%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+10),'(a,i3.3,a)') '_r_',me, '.mtx' + call op_restr%print(fname,head='Debug aggregates') + write(fname(lname+1:lname+11),'(a,i3.3,a)') '_ac_',me, '.mtx' + call ac%print(fname,head='Debug aggregates') + end block + end if + + if (info /= psb_success_) then + call psb_errpush(psb_err_internal_error_,name,a_err='Build ac = coo_restr x am3') + goto 9999 + end if + + + call psb_erractionrestore(err_act) + return + +9999 continue + call psb_errpush(info,name) + call psb_error_handler(err_act) + return + +contains + subroutine check_coo(me,string,coo) + implicit none + integer(psb_ipk_) :: me + type(psb_s_coo_sparse_mat) :: coo + character(len=*) :: string + integer(psb_lpk_) :: nr,nc,nz + nr = coo%get_nrows() + nc = coo%get_ncols() + nz = coo%get_nzeros() + write(0,*) me,string,nr,nc,& + & minval(coo%ia(1:nz)),maxval(coo%ia(1:nz)),& + & minval(coo%ja(1:nz)),maxval(coo%ja(1:nz)) + end subroutine check_coo + +end subroutine amg_s_parmatch_unsmth_bld diff --git a/amgprec/impl/aggregator/dataStrStaticQueue.h b/amgprec/impl/aggregator/dataStrStaticQueue.h new file mode 100755 index 00000000..eecbffeb --- /dev/null +++ b/amgprec/impl/aggregator/dataStrStaticQueue.h @@ -0,0 +1,199 @@ +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ + +#ifndef _static_Queue_ +#define _static_Queue_ + +#include "primitiveDataTypeDefinitions.h" +#include "preProcessorDirectives.h" + +using namespace std; + +/* ------------------------------------------------------------------------- */ +/* STATIC QUEUE/STACK CLASS */ +/* +Objective: + * Provide a Static Queue/Stack Implementation: +Rationale: + * Since dynamic memory allocation can be expensive, we want to provide Queue + implementation that initializes data only once. +Assumption: + * The maximum size of the number of elements that can ever be in the Queue is + know apriori. + * Supports elements only of type Integer (regular or long) + * Will add one extra element to the vector ( maxSize + 1 ) to wrap around +Functions Provided: + - Default Constructor : O(C) + - Constructor with given size : O(N) (will have N+1 capacity, for wrap around) + - push_back(i) : O(C) + - front() : O(C) + - pop_front() : O(C) : !!! Modified, will return the element !!! + - empty() : O(C) + - clear() : O(C) //O(N): in regular case + - back() : O(C) + - pop_back() : O(C) : !!! Modified, will return the element !!! + - size(): O(C) +*/ +class staticQueue +{ + private: + vector squeue; + MilanLongInt squeueHead; + MilanLongInt squeueTail; + MilanLongInt NumNodes; + + //Prevent Assignment and Pass by Value: + staticQueue(const staticQueue& src); + staticQueue& operator=(const staticQueue& rhs); + + public: + //Constructors and Destructors + staticQueue() { squeueHead = 0; squeueTail = 0; NumNodes = 0; } //Default Constructor + staticQueue(MilanLongInt maxSize) //MaximumSize + { + squeueHead = 0; //Head of the static Stack + squeueTail = 0; //Tail of the Statuc Stack + NumNodes = maxSize; + try + { + squeue.reserve(NumNodes+1); //The number of nodes plus one to swap around + } + catch ( length_error ) + { + cerr<<"Within Function: staticQueue(MilanLongInt maxSize) \n"; + cerr<<"Error: Not enough memory to allocate for Queue \n"; + exit(1); + } + squeue.resize( NumNodes+1, -1 ); //Initialize the stack with -1 + } + ~staticQueue() {}; //The destructor + + //Access: + MilanLongInt front() { return squeue[squeueHead]; } //Non destructive + MilanLongInt back() + { + if ( squeueTail == 0 ) //make it wrap around + return squeue[NumNodes]; + else + return squeue[squeueTail-1]; + } + MilanLongInt getHead() { return squeueHead; } + MilanLongInt getTail() { return squeueTail; } + + //Manipulation: + void push_back(MilanLongInt newElement) + { + //Q.push_back(i); + squeue[squeueTail] = newElement; + squeueTail = (squeueTail+1)%(NumNodes+1); + } + MilanLongInt pop_front() // !!! Modified, will return the element !!! + { + //Q.pop_front(); + MilanLongInt U = squeue[squeueHead]; + squeueHead = (squeueHead+1)%(NumNodes+1); + return U; + } + MilanLongInt pop_back() //!!! Modified, will return the element !!! + { + //S.pop_back(); + if ( squeueTail == 0 ) //make it wrap around + squeueTail = NumNodes; + else + squeueTail = (squeueTail-1); //Remove the last element + return squeue[squeueTail]; //Needs to be here. Because, the tail always points to the + //counter after the last existing element. + } + void clear() + { + //Q.clear(); //Empty the Queue + squeueHead = 0; //Head of the static Queue + squeueTail = 0; //Tail of the Statuc Queue + } + + //Query: + MilanBool empty() + { + //Q.empty(); + if ( squeueHead == squeueTail ) + return true; + else + return false; + } //end of empty() + MilanLongInt size() + { + //Q.size(); + MilanLongInt size = 0; + if ( squeueHead == squeueTail ) + return size; + else + if ( squeueHead < squeueTail ) + return ( squeueTail - squeueHead ); + else + return ( NumNodes + 1 - squeueHead + squeueTail ); + } //End of size() + void display() + { + //Q.display(); + MilanLongInt i=0; + cout<<"Queue: "< +#include +#include +#include + +//System/C +#include +#include //Exception Handling +#include +#include + +#include +#include + + + +#include //Defines the LDBL_MAX and LDBL_MIN in gcc +#include +#include +#include +#include + +//STL +#include +#include +#include +#include + +//MPI: +#include "mpi.h" + + + +#endif diff --git a/amgprec/impl/aggregator/primitiveDataTypeDefinitions.h b/amgprec/impl/aggregator/primitiveDataTypeDefinitions.h new file mode 100755 index 00000000..fd75f265 --- /dev/null +++ b/amgprec/impl/aggregator/primitiveDataTypeDefinitions.h @@ -0,0 +1,156 @@ +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ + +#ifndef _primitiveDataType_Definition_ +#define _primitiveDataType_Definition_ + +#include "preProcessorDirectives.h" + +using namespace std; + +//Comment out these if you do not need 64 bits. +//#ifndef BIT64 +// #define BIT64 +//#endif + +//Regular integer: +#ifndef INTEGER_H +#define INTEGER_H + typedef int MilanInt; +// typedef MPI_INT MilanMpiInt; +#endif + +//Regular long Integer: +#ifndef LONG_INT_H +#define LONG_INT_H + #ifdef BIT64 + typedef int64_t MilanLongInt; +// typedef MPI_LONG MilanMpiLongInt; + #else + typedef int MilanLongInt; +// typedef MPI_INT MilanMpiLongInt; + #endif +#endif + +//Regular boolean +#ifndef BOOL_H +#define BOOL_H + typedef bool MilanBool; +#endif + +//Regular double and the Absolute Function: +#ifndef REAL_H +#define REAL_H + typedef double MilanReal; + //typedef MPI_DOUBLE MilanMpiReal; + inline MilanReal MilanAbs(MilanReal value) + { + return fabs(value); + } +#endif + +//Regular double and the Absolute Function: +#ifndef FLOAT_H +#define FLOAT_H + typedef float MilanFloat; + //typedef MPI_FLOAT MilanMpiFloat; + inline MilanFloat MilanAbsFloat(MilanFloat value) + { + return fabs(value); + } +#endif + +//// Define the limits: +#ifndef LIMITS_H +#define LIMITS_H + +//Integer Maximum and Minimum: +#define MilanIntMax INT_MAX +#define MilanIntMin INT_MIN + +#ifdef BIT64 + #define MilanLongIntMax LONG_MAX + #define MilanLongIntMin -LONG_MAX +#else + #define MilanLongIntMax INT_MAX + #define MilanLongIntMin -INT_MAX +#endif + +//Double Maximum and Minimum: +//Note: You can alternative use INFINITY defined in math.h +//It has been my experience that this is not very portable. +//Therefore I have adopted for LDBL_MAX and LDBL_MIN as +/- infinity. + +//Largest positive number: LDBL_MAX = +infinity +//Smallest positive number: LDBL_MIN +//Smallest negative number: -LDBL_MAX = -infinity +//Largest negative number: -LDBL_MIN (just next to zero on the other side?) + +// +INFINITY +const double PLUS_INFINITY = numeric_limits::infinity(); +const float FPLUS_INFINITY = numeric_limits::infinity(); +//if(numeric_limits::has_infinity) +// PLUS_INFINITY=numeric_limits::infinity(); +//else cerr<<"infinity for float isn�t supported"; + +const double MINUS_INFINITY = -PLUS_INFINITY; +const float FMINUS_INFINITY = -FPLUS_INFINITY; + + +//#define MilanRealMax LDBL_MAX +#define MilanRealMax PLUS_INFINITY +#define MilanFloatMax FPLUS_INFINITY + +// -INFINITY +//Instead of assigning smallest possible positive number, assign smallest negative number +//although we only consider postive weights, just for correctness of understand. +//#define MilanRealMin -LDBL_MAX +//#define MilanRealMin LDBL_MIN +#define MilanRealMin MINUS_INFINITY +#define MilanFloatMin FMINUS_INFINITY + +//const double PLUS_INFINITY = LDBL_MAX; //deprecated + + + +#endif + +#endif diff --git a/tests/fileread/amg_cf_sample.f90 b/tests/fileread/amg_cf_sample.f90 index 6a5d3008..bf134e9b 100644 --- a/tests/fileread/amg_cf_sample.f90 +++ b/tests/fileread/amg_cf_sample.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/fileread/amg_df_sample.f90 b/tests/fileread/amg_df_sample.f90 index 31b7f395..334f7834 100644 --- a/tests/fileread/amg_df_sample.f90 +++ b/tests/fileread/amg_df_sample.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/fileread/amg_sf_sample.f90 b/tests/fileread/amg_sf_sample.f90 index a82190c3..b45395cc 100644 --- a/tests/fileread/amg_sf_sample.f90 +++ b/tests/fileread/amg_sf_sample.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/fileread/amg_zf_sample.f90 b/tests/fileread/amg_zf_sample.f90 index 14a9793f..e352cefc 100644 --- a/tests/fileread/amg_zf_sample.f90 +++ b/tests/fileread/amg_zf_sample.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/pdegen/amg_d_pde2d.f90 b/tests/pdegen/amg_d_pde2d.f90 index d43066aa..b2783439 100644 --- a/tests/pdegen/amg_d_pde2d.f90 +++ b/tests/pdegen/amg_d_pde2d.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/pdegen/amg_d_pde3d.f90 b/tests/pdegen/amg_d_pde3d.f90 index f5d54e5f..a221dcf6 100644 --- a/tests/pdegen/amg_d_pde3d.f90 +++ b/tests/pdegen/amg_d_pde3d.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/pdegen/amg_s_pde2d.f90 b/tests/pdegen/amg_s_pde2d.f90 index 8201c6fe..09fceca1 100644 --- a/tests/pdegen/amg_s_pde2d.f90 +++ b/tests/pdegen/amg_s_pde2d.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 ! diff --git a/tests/pdegen/amg_s_pde3d.f90 b/tests/pdegen/amg_s_pde3d.f90 index c414eb8c..f1be9b80 100644 --- a/tests/pdegen/amg_s_pde3d.f90 +++ b/tests/pdegen/amg_s_pde3d.f90 @@ -2,7 +2,7 @@ ! ! AMG4PSBLAS version 1.0 ! Algebraic Multigrid Package -! based on PSBLAS (Parallel Sparse BLAS version 3.7) +! based on PSBLAS (Parallel Sparse BLAS version 3.5) ! ! (C) Copyright 2021 !