From a17f503486f6c1c00aea684132c74f89cf227ebb Mon Sep 17 00:00:00 2001 From: Cirdans-Home Date: Fri, 26 Apr 2024 10:54:43 +0000 Subject: [PATCH 1/8] First hardcoded implementation of l1 smooth aggregation --- .../impl/aggregator/amg_daggrmat_smth_bld.f90 | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 index d365bf27..2b9b1ea7 100644 --- a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 @@ -112,11 +112,11 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& implicit none ! Arguments - type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(in) :: a type(psb_desc_type), intent(inout) :: desc_a integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) - type(amg_dml_parms), intent(inout) :: parms - type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr type(psb_ldspmat_type), intent(inout) :: t_prol type(psb_desc_type), intent(inout) :: desc_ac integer(psb_ipk_), intent(out) :: info @@ -132,7 +132,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& type(psb_d_coo_sparse_mat) :: coo_prol, coo_restr type(psb_d_csr_sparse_mat) :: acsr1, acsrf, csr_prol, acsr real(psb_dpk_), allocatable :: adiag(:) - real(psb_dpk_), allocatable :: arwsum(:) + real(psb_dpk_), allocatable :: arwsum(:),l1rwsum(:) integer(psb_ipk_) :: ierr(5) logical :: filter_mat integer(psb_ipk_) :: debug_level, debug_unit, err_act @@ -141,6 +141,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& logical, parameter :: debug_new=.false. character(len=80) :: filename logical, parameter :: do_timings=.false. + logical, parameter :: do_l1correction=.true. integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 @@ -200,6 +201,21 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& if (info == psb_success_) & & call psb_halo(adiag,desc_a,info) if (info == psb_success_) call a%cp_to(acsr) + ! Get the l1-diagonal of D + if (do_l1correction) then + allocate(l1rwsum(nrow)) + call acsr%arwsum(l1rwsum) + if (info == psb_success_) & + & call psb_realloc(ncol,l1rwsum,info) + if (info == psb_success_) & + & call psb_halo(l1rwsum,desc_a,info) + ! \tilde{D}_{i,i} = \sum_{j \ne i} |a_{i,j}| + !$OMP parallel do private(i) schedule(static) + do i=1,size(adiag) + adiag(i) = adiag(i) + l1rwsum(i) - abs(adiag(i)) + end do + !$OMP end parallel do + end if if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_getdiag') @@ -230,7 +246,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& enddo if (jd == -1) then - write(0,*) 'Wrong input: we need the diagonal!!!!', i + if (.not.do_l1correction) write(0,*) 'Wrong input: we need the diagonal!!!!', i else acsrf%val(jd)=acsrf%val(jd)-tmp end if @@ -240,7 +256,6 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call acsrf%clean_zeros(info) end if - !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= dzero) then @@ -249,7 +264,8 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = done end if end do - !$OMP end parallel do + !$OMP end parallel do + if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -259,7 +275,9 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_amx(ctxt,anorm) omega = 4.d0/(3.d0*anorm) parms%aggr_omega_val = omega - + else if (do_l1correction) then + ! For l1-Jacobi this can be estimated with 1 + parms%aggr_omega_val = done else info = psb_err_internal_error_ call psb_errpush(info,name,a_err='invalid amg_aggr_eig_') @@ -323,6 +341,9 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' call psb_erractionrestore(err_act) + + if (allocated(l1rwsum)) deallocate(l1rwsum) + if (allocated(arwsum)) deallocate(arwsum) return 9999 continue From 5790aa0cbd5aa5235bbae6742f30f738ff77b687 Mon Sep 17 00:00:00 2001 From: Cirdans-Home Date: Fri, 26 Apr 2024 10:55:47 +0000 Subject: [PATCH 2/8] Revert "First hardcoded implementation of l1 smooth aggregation" This reverts commit a17f503486f6c1c00aea684132c74f89cf227ebb. --- .../impl/aggregator/amg_daggrmat_smth_bld.f90 | 37 ++++--------------- 1 file changed, 8 insertions(+), 29 deletions(-) diff --git a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 index 2b9b1ea7..d365bf27 100644 --- a/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 +++ b/amgprec/impl/aggregator/amg_daggrmat_smth_bld.f90 @@ -112,11 +112,11 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& implicit none ! Arguments - type(psb_dspmat_type), intent(in) :: a + type(psb_dspmat_type), intent(in) :: a type(psb_desc_type), intent(inout) :: desc_a integer(psb_lpk_), intent(inout) :: ilaggr(:), nlaggr(:) - type(amg_dml_parms), intent(inout) :: parms - type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr + type(amg_dml_parms), intent(inout) :: parms + type(psb_dspmat_type), intent(inout) :: op_prol,ac,op_restr type(psb_ldspmat_type), intent(inout) :: t_prol type(psb_desc_type), intent(inout) :: desc_ac integer(psb_ipk_), intent(out) :: info @@ -132,7 +132,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& type(psb_d_coo_sparse_mat) :: coo_prol, coo_restr type(psb_d_csr_sparse_mat) :: acsr1, acsrf, csr_prol, acsr real(psb_dpk_), allocatable :: adiag(:) - real(psb_dpk_), allocatable :: arwsum(:),l1rwsum(:) + real(psb_dpk_), allocatable :: arwsum(:) integer(psb_ipk_) :: ierr(5) logical :: filter_mat integer(psb_ipk_) :: debug_level, debug_unit, err_act @@ -141,7 +141,6 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& logical, parameter :: debug_new=.false. character(len=80) :: filename logical, parameter :: do_timings=.false. - logical, parameter :: do_l1correction=.true. integer(psb_ipk_), save :: idx_spspmm=-1, idx_phase1=-1, idx_gtrans=-1, idx_phase2=-1, idx_refine=-1 integer(psb_ipk_), save :: idx_phase3=-1, idx_cdasb=-1, idx_ptap=-1 @@ -201,21 +200,6 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& if (info == psb_success_) & & call psb_halo(adiag,desc_a,info) if (info == psb_success_) call a%cp_to(acsr) - ! Get the l1-diagonal of D - if (do_l1correction) then - allocate(l1rwsum(nrow)) - call acsr%arwsum(l1rwsum) - if (info == psb_success_) & - & call psb_realloc(ncol,l1rwsum,info) - if (info == psb_success_) & - & call psb_halo(l1rwsum,desc_a,info) - ! \tilde{D}_{i,i} = \sum_{j \ne i} |a_{i,j}| - !$OMP parallel do private(i) schedule(static) - do i=1,size(adiag) - adiag(i) = adiag(i) + l1rwsum(i) - abs(adiag(i)) - end do - !$OMP end parallel do - end if if(info /= psb_success_) then call psb_errpush(psb_err_from_subroutine_,name,a_err='sp_getdiag') @@ -246,7 +230,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& enddo if (jd == -1) then - if (.not.do_l1correction) write(0,*) 'Wrong input: we need the diagonal!!!!', i + write(0,*) 'Wrong input: we need the diagonal!!!!', i else acsrf%val(jd)=acsrf%val(jd)-tmp end if @@ -256,6 +240,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call acsrf%clean_zeros(info) end if + !$OMP parallel do private(i) schedule(static) do i=1,size(adiag) if (adiag(i) /= dzero) then @@ -264,8 +249,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& adiag(i) = done end if end do - !$OMP end parallel do - + !$OMP end parallel do if (parms%aggr_omega_alg == amg_eig_est_) then if (parms%aggr_eig == amg_max_norm_) then @@ -275,9 +259,7 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& call psb_amx(ctxt,anorm) omega = 4.d0/(3.d0*anorm) parms%aggr_omega_val = omega - else if (do_l1correction) then - ! For l1-Jacobi this can be estimated with 1 - parms%aggr_omega_val = done + else info = psb_err_internal_error_ call psb_errpush(info,name,a_err='invalid amg_aggr_eig_') @@ -341,9 +323,6 @@ subroutine amg_daggrmat_smth_bld(a,desc_a,ilaggr,nlaggr,parms,& & write(debug_unit,*) me,' ',trim(name),& & 'Done smooth_aggregate ' call psb_erractionrestore(err_act) - - if (allocated(l1rwsum)) deallocate(l1rwsum) - if (allocated(arwsum)) deallocate(arwsum) return 9999 continue From 3e3b3431316558de0fa69d4e7b0af15574363744 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 10 May 2024 15:02:36 +0200 Subject: [PATCH 3/8] Fix potential overflow issue in SOC_MAP_BLD --- amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 | 3 ++- amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 | 3 ++- 8 files changed, 16 insertions(+), 8 deletions(-) diff --git a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 index 53892ebc..24720675 100644 --- a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 index b250e434..57ed8893 100644 --- a/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_c_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 index fba80c10..200d630c 100644 --- a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 index 345cd1ad..e2b7ea0c 100644 --- a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_d_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 index 857c6ff3..0f8bb7dd 100644 --- a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 index ef7f5707..99047468 100644 --- a/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_s_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 index 50fe70a2..7961921a 100644 --- a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 index c6ac226e..35d02fd0 100644 --- a/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_z_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) From ab5eaac5ed5b6bd9dc4db7dc1d8cd2f0552fc95e Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 30 May 2024 08:12:48 -0400 Subject: [PATCH 4/8] Cosmetic changes --- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index b086edad..b758dc69 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -126,8 +126,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( fflush(stdout); #endif - MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank - MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank + // The starting vertex owned by the current rank + MilanLongInt StartIndex = verDistance[myRank]; + // The ending vertex owned by the current rank + MilanLongInt EndIndex = verDistance[myRank + 1] - 1; MPI_Status computeStatus; @@ -145,7 +147,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( // only one message will be sent in the initialization phase - // one of: REQUEST/FAILURE/SUCCESS vector QLocalVtx, QGhostVtx, QMsgType; - vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + // Changed by Fabio to be an integer, addresses needs to be integers! + vector QOwner; MilanLongInt *PCounter = new MilanLongInt[numProcs]; for (int i = 0; i < numProcs; i++) @@ -153,7 +156,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NumMessagesBundled = 0; // TODO when the last computational section will be refactored this could be eliminated - MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers! + // Changed by Fabio to be an integer, addresses needs to be integers! + MilanInt ghostOwner = 0; MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; @@ -168,9 +172,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt myCard = 0; // Build the Ghost Vertex Set: Vg - map Ghost2LocalMap; // Map each ghost vertex to a local vertex - vector Counter; // Store the edge count for each ghost vertex - MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices + // Map each ghost vertex to a local vertex + map Ghost2LocalMap; + // Store the edge count for each ghost vertex + vector Counter; + // Number of Ghost vertices + MilanLongInt numGhostVertices = 0, numGhostEdges = 0; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; From 897c5229a624024fb5307f547456c07e581b0fd3 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Thu, 30 May 2024 08:13:08 -0400 Subject: [PATCH 5/8] Improve behaviour of OpenMP matching --- .../impl/aggregator/processExposedVertex.cpp | 51 ++++++++++--------- amgprec/impl/aggregator/queueTransfer.cpp | 3 +- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index c7ac4703..81ce23eb 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -113,32 +113,35 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, } // End of if(w >=0) else { - // This piece of code is executed a really small amount of times - adj11 = verLocPtr[v]; - adj12 = verLocPtr[v + 1]; - for (k1 = adj11; k1 < adj12; k1++) { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) { // A ghost - +#pragma omp critical(adjuse) + { + // This piece of code is executed a really small number of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - (*msgInd)++; - (*NumMessagesBundled)++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - - privateQLocalVtx.push_back(v + StartIndex); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + } } // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index e51095da..64a60157 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -17,8 +17,6 @@ void queuesTransfer(vector &U, U.insert(U.end(), privateU.begin(), privateU.end()); } - privateU.clear(); - #pragma omp critical(sendMessageTransfer) { @@ -28,6 +26,7 @@ void queuesTransfer(vector &U, QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); } + privateU.clear(); privateQLocalVtx.clear(); privateQGhostVtx.clear(); privateQMsgType.clear(); From fb802c62cd51624ee16130cf8d7c42b76a8c7c95 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 30 May 2024 17:25:25 +0200 Subject: [PATCH 6/8] Merge PSBCXXDEFINES into AMGCXXDEFINES --- Make.inc.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Make.inc.in b/Make.inc.in index 3638f486..9ac10ee7 100644 --- a/Make.inc.in +++ b/Make.inc.in @@ -75,7 +75,7 @@ CDEFINES=$(AMGCDEFINES) AMGFDEFINES=@AMGFDEFINES@ $(PSBFDEFINES) FDEFINES=$(AMGFDEFINES) -CXXDEFINES=@AMGCXXDEFINES@ +CXXDEFINES=@AMGCXXDEFINES@ $(PSBCXXDEFINES) @COMPILERULES@ From 13eee99ea33edb44c1ddcb2ce47932864547a5b5 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 30 May 2024 17:26:34 +0200 Subject: [PATCH 7/8] Use ifdef OPENMP --- amgprec/impl/aggregator/MatchBoxPC.cpp | 2 +- amgprec/impl/aggregator/MatchBoxPC.h | 6 +++--- ...stEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 2 +- amgprec/impl/aggregator/clean.cpp | 2 +- amgprec/impl/aggregator/computeCandidateMate.cpp | 2 +- amgprec/impl/aggregator/extractUChunk.cpp | 2 +- amgprec/impl/aggregator/findOwnerOfGhost.cpp | 2 +- amgprec/impl/aggregator/initialize.cpp | 2 +- amgprec/impl/aggregator/isAlreadyMatched.cpp | 2 +- amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp | 2 +- amgprec/impl/aggregator/processCrossEdge.cpp | 2 +- amgprec/impl/aggregator/processExposedVertex.cpp | 2 +- .../aggregator/processMatchedVerticesAndSendMessages.cpp | 2 +- amgprec/impl/aggregator/processMessages.cpp | 2 +- amgprec/impl/aggregator/queueTransfer.cpp | 2 +- amgprec/impl/aggregator/sendBundledMessages.cpp | 2 +- 16 files changed, 18 insertions(+), 18 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index 37a879be..146e8aa1 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -73,7 +73,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #endif // Rimosso per tornare al vecchio matching #define OMP -#ifdef OMP +#ifdef OPENMP fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 35cab21d..24fd3134 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -59,7 +59,7 @@ #include #include #include -#ifdef OMP +#ifdef OPENMP // OpenMP is included and used if and only if the OpenMP version of the matching // is required #include "omp.h" @@ -178,7 +178,7 @@ extern "C" #define MilanRealMin MINUS_INFINITY #endif -#ifdef OMP +#ifdef OPENMP /* These functions are only used in the experimental OMP implementation, if that is disabled there is no reason to actually compile or reference them. */ @@ -431,7 +431,7 @@ is disabled there is no reason to actually compile or reference them. */ #endif -#ifndef OMP +#ifndef OPENMP //Function of find the owner of a ghost vertex using binary search: inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs); diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index b086edad..7e332cef 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching diff --git a/amgprec/impl/aggregator/clean.cpp b/amgprec/impl/aggregator/clean.cpp index 018469e4..479dcce3 100644 --- a/amgprec/impl/aggregator/clean.cpp +++ b/amgprec/impl/aggregator/clean.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP // TODO comment void clean(MilanLongInt NLVer, diff --git a/amgprec/impl/aggregator/computeCandidateMate.cpp b/amgprec/impl/aggregator/computeCandidateMate.cpp index 39ce8db1..f70b8866 100644 --- a/amgprec/impl/aggregator/computeCandidateMate.cpp +++ b/amgprec/impl/aggregator/computeCandidateMate.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /** * Execute the research fr the Candidate Mate without controlling if the vertices are already matched. * Returns the vertices with the highest weight diff --git a/amgprec/impl/aggregator/extractUChunk.cpp b/amgprec/impl/aggregator/extractUChunk.cpp index 0986dfb6..4e50a4f3 100644 --- a/amgprec/impl/aggregator/extractUChunk.cpp +++ b/amgprec/impl/aggregator/extractUChunk.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void extractUChunk( vector &UChunkBeingProcessed, vector &U, diff --git a/amgprec/impl/aggregator/findOwnerOfGhost.cpp b/amgprec/impl/aggregator/findOwnerOfGhost.cpp index 81c18822..2723a7a3 100644 --- a/amgprec/impl/aggregator/findOwnerOfGhost.cpp +++ b/amgprec/impl/aggregator/findOwnerOfGhost.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /// Find the owner of a ghost node: MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs) diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 3f0f1a10..2c8f052d 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt *numGhostEdges, diff --git a/amgprec/impl/aggregator/isAlreadyMatched.cpp b/amgprec/impl/aggregator/isAlreadyMatched.cpp index de5f2f18..16d47a14 100644 --- a/amgprec/impl/aggregator/isAlreadyMatched.cpp +++ b/amgprec/impl/aggregator/isAlreadyMatched.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /** * //TODO documentation * @param k diff --git a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp index f5429bf4..79f253eb 100644 --- a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp +++ b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, diff --git a/amgprec/impl/aggregator/processCrossEdge.cpp b/amgprec/impl/aggregator/processCrossEdge.cpp index d7c72d42..45cddb44 100644 --- a/amgprec/impl/aggregator/processCrossEdge.cpp +++ b/amgprec/impl/aggregator/processCrossEdge.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PROCESS_CROSS_EDGE(MilanLongInt *edge, MilanLongInt *S) { diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index c7ac4703..ba07425a 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 4a9cfcba..e02dd9c7 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP //#define DEBUG_HANG_ void processMatchedVerticesAndSendMessages( MilanLongInt NLVer, diff --git a/amgprec/impl/aggregator/processMessages.cpp b/amgprec/impl/aggregator/processMessages.cpp index 6ac3f541..dc09cde1 100644 --- a/amgprec/impl/aggregator/processMessages.cpp +++ b/amgprec/impl/aggregator/processMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP //#define DEBUG_HANG_ void processMessages( diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index e51095da..51989f34 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void queuesTransfer(vector &U, vector &privateU, vector &QLocalVtx, diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index 919dc7e9..3349ce86 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void sendBundledMessages(MilanLongInt *numGhostEdges, MilanInt *BufferSize, MilanLongInt *Buffer, From 67594f8b07c5d7547af166c5040f779fc8b37b1c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 30 May 2024 17:35:15 +0200 Subject: [PATCH 8/8] Fixes for OpenMP --- amgprec/impl/aggregator/MatchBoxPC.cpp | 2 +- amgprec/impl/aggregator/processMatchedVertices.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index 146e8aa1..a43fb2f5 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -74,7 +74,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, // Rimosso per tornare al vecchio matching #define OMP #ifdef OPENMP - fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); + //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, verDistance, Mate, diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index d88199a6..77ec34bb 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void processMatchedVertices( MilanLongInt NLVer, vector &UChunkBeingProcessed,