From 02a83575a2580c8c5cac4a63a4587be840bb910e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 5 Jun 2024 13:11:41 +0200 Subject: [PATCH 1/5] Reorganize MatchBox (prepare for S OpenMP) --- amgprec/impl/aggregator/MatchBoxPC.cpp | 3 +- amgprec/impl/aggregator/MatchBoxPC.h | 499 +++++++++--------- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 10 +- amgprec/impl/aggregator/clean.cpp | 2 - .../impl/aggregator/computeCandidateMate.cpp | 6 +- amgprec/impl/aggregator/extractUChunk.cpp | 2 - amgprec/impl/aggregator/findOwnerOfGhost.cpp | 2 - amgprec/impl/aggregator/initialize.cpp | 2 - amgprec/impl/aggregator/isAlreadyMatched.cpp | 2 - .../parallelComputeCandidateMateB.cpp | 7 +- amgprec/impl/aggregator/processCrossEdge.cpp | 2 - .../impl/aggregator/processExposedVertex.cpp | 6 +- .../aggregator/processMatchedVertices.cpp | 6 +- .../processMatchedVerticesAndSendMessages.cpp | 6 +- amgprec/impl/aggregator/processMessages.cpp | 6 +- amgprec/impl/aggregator/queueTransfer.cpp | 2 - .../impl/aggregator/sendBundledMessages.cpp | 2 - 17 files changed, 265 insertions(+), 300 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index aa2658ea..65a910b1 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -72,7 +72,8 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, double tmr = MPI_Wtime(); #endif -#if defined(OPENMP) +#if 1 + // defined(OPENMP) //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 24fd3134..4214993e 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -178,264 +178,257 @@ extern "C" #define MilanRealMin MINUS_INFINITY #endif -#ifdef OPENMP /* These functions are only used in the experimental OMP implementation, if that is disabled there is no reason to actually compile or reference them. */ - // Function of find the owner of a ghost vertex using binary search: - MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, - MilanInt myRank, MilanInt numProcs); - - MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, - MilanLongInt adj2, - MilanLongInt *verLocInd, - MilanReal *edgeLocWeight); - - void queuesTransfer(vector &U, - vector &privateU, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - bool isAlreadyMatched(MilanLongInt node, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap); - - MilanLongInt computeCandidateMate(MilanLongInt adj1, - MilanLongInt adj2, - MilanReal *edgeLocWeight, - MilanLongInt k, - MilanLongInt *verLocInd, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap); - - void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt StartIndex, MilanLongInt EndIndex, - MilanLongInt *numGhostEdgesPtr, - MilanLongInt *numGhostVerticesPtr, - MilanLongInt *S, - MilanLongInt *verLocInd, - MilanLongInt *verLocPtr, - map &Ghost2LocalMap, - vector &Counter, - vector &verGhostPtr, - vector &verGhostInd, - vector &tempCounter, - vector &GMate, - vector &Message, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - MilanLongInt *&candidateMate, - vector &U, - vector &privateU, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void clean(MilanLongInt NLVer, - MilanInt myRank, - MilanLongInt MessageIndex, - vector &SRequest, - vector &SStatus, - MilanInt BufferSize, - MilanLongInt *Buffer, - MilanLongInt msgActual, - MilanLongInt *msgActualSent, - MilanLongInt msgInd, - MilanLongInt *msgIndSent, - MilanLongInt NumMessagesBundled, - MilanReal *msgPercent); - - void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanInt myRank, - MilanReal *edgeLocWeight, - MilanLongInt *candidateMate); - - void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, - MilanLongInt *candidateMate, - MilanLongInt *verLocInd, - MilanLongInt *verLocPtr, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *Mate, - vector &GMate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - vector &U, - vector &privateU, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void PROCESS_CROSS_EDGE(MilanLongInt *edge, - MilanLongInt *SPtr); - - void processMatchedVertices( - MilanLongInt NLVer, - vector &UChunkBeingProcessed, - vector &U, - vector &privateU, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - MilanLongInt *candidateMate, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void processMatchedVerticesAndSendMessages( - MilanLongInt NLVer, - vector &UChunkBeingProcessed, - vector &U, - vector &privateU, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - MilanLongInt *candidateMate, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner, - MPI_Comm comm, - MilanLongInt *msgActual, - vector &Message); - - void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, - MilanInt *BufferSizePtr, - MilanLongInt *Buffer, - vector &PCumulative, - vector &PMessageBundle, - vector &PSizeInfoMessages, - MilanLongInt *PCounter, - MilanLongInt NumMessagesBundled, - MilanLongInt *msgActualPtr, - MilanLongInt *MessageIndexPtr, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &SRequest, - vector &SStatus); - - void processMessages( - MilanLongInt NLVer, - MilanLongInt *Mate, - MilanLongInt *candidateMate, - map &Ghost2LocalMap, - vector &GMate, - vector &Counter, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *msgActualPtr, - MilanReal *edgeLocWeight, - MilanLongInt *verDistance, - MilanLongInt *verLocPtr, - MilanLongInt k, - MilanLongInt *verLocInd, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &Message, - MilanLongInt numGhostEdges, - MilanLongInt u, - MilanLongInt v, - MilanLongInt *SPtr, - vector &U); - - void extractUChunk( - vector &UChunkBeingProcessed, - vector &U, - vector &privateU); - - void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( - MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, - MilanLongInt *verDistance, - MilanLongInt *Mate, - MilanInt myRank, MilanInt numProcs, MPI_Comm comm, - MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, - MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, - MilanLongInt *ph1_card, MilanLongInt *ph2_card); -#endif + // Function of find the owner of a ghost vertex using binary search: + MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, + MilanInt myRank, MilanInt numProcs); + + MilanLongInt firstComputeCandidateMateD(MilanLongInt adj1, + MilanLongInt adj2, + MilanLongInt *verLocInd, + MilanReal *edgeLocWeight); + + void queuesTransfer(vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + bool isAlreadyMatched(MilanLongInt node, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + MilanLongInt computeCandidateMateD(MilanLongInt adj1, + MilanLongInt adj2, + MilanReal *edgeLocWeight, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt StartIndex, MilanLongInt EndIndex, + MilanLongInt *numGhostEdgesPtr, + MilanLongInt *numGhostVerticesPtr, + MilanLongInt *S, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + map &Ghost2LocalMap, + vector &Counter, + vector &verGhostPtr, + vector &verGhostInd, + vector &tempCounter, + vector &GMate, + vector &Message, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + MilanLongInt *&candidateMate, + vector &U, + vector &privateU, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void clean(MilanLongInt NLVer, + MilanInt myRank, + MilanLongInt MessageIndex, + vector &SRequest, + vector &SStatus, + MilanInt BufferSize, + MilanLongInt *Buffer, + MilanLongInt msgActual, + MilanLongInt *msgActualSent, + MilanLongInt msgInd, + MilanLongInt *msgIndSent, + MilanLongInt NumMessagesBundled, + MilanReal *msgPercent); + + void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanReal *edgeLocWeight, + MilanLongInt *candidateMate); + + void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, + MilanLongInt *candidateMate, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *Mate, + vector &GMate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void PROCESS_CROSS_EDGE(MilanLongInt *edge, + MilanLongInt *SPtr); + + void processMatchedVerticesD( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void processMatchedVerticesAndSendMessagesD( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message); + + void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, + MilanInt *BufferSizePtr, + MilanLongInt *Buffer, + vector &PCumulative, + vector &PMessageBundle, + vector &PSizeInfoMessages, + MilanLongInt *PCounter, + MilanLongInt NumMessagesBundled, + MilanLongInt *msgActualPtr, + MilanLongInt *MessageIndexPtr, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &SRequest, + vector &SStatus); + + void processMessagesD( + MilanLongInt NLVer, + MilanLongInt *Mate, + MilanLongInt *candidateMate, + map &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *msgActualPtr, + MilanReal *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *SPtr, + vector &U); + + void extractUChunk( + vector &UChunkBeingProcessed, + vector &U, + vector &privateU); + + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, + MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, + MilanLongInt *ph1_card, MilanLongInt *ph2_card); -#ifndef OPENMP - //Function of find the owner of a ghost vertex using binary search: - inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, - MilanInt myRank, MilanInt numProcs); -#endif void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( MilanLongInt NLVer, MilanLongInt NLEdge, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 668ed626..c9199ea5 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching @@ -244,7 +243,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. */ - PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer, + PARALLEL_COMPUTE_CANDIDATE_MATE_BD(NLVer, verLocPtr, verLocInd, myRank, @@ -321,7 +320,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( vector UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); - processMatchedVertices(NLVer, + processMatchedVerticesD(NLVer, UChunkBeingProcessed, U, privateU, @@ -430,7 +429,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - processMatchedVerticesAndSendMessages(NLVer, + processMatchedVerticesAndSendMessagesD(NLVer, UChunkBeingProcessed, U, privateU, @@ -491,7 +490,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - processMessages(NLVer, + processMessagesD(NLVer, Mate, candidateMate, Ghost2LocalMap, @@ -559,4 +558,3 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #endif #endif -#endif diff --git a/amgprec/impl/aggregator/clean.cpp b/amgprec/impl/aggregator/clean.cpp index 479dcce3..5a3bed01 100644 --- a/amgprec/impl/aggregator/clean.cpp +++ b/amgprec/impl/aggregator/clean.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP // TODO comment void clean(MilanLongInt NLVer, @@ -89,4 +88,3 @@ void clean(MilanLongInt NLVer, } } } -#endif diff --git a/amgprec/impl/aggregator/computeCandidateMate.cpp b/amgprec/impl/aggregator/computeCandidateMate.cpp index f70b8866..26bcbb4d 100644 --- a/amgprec/impl/aggregator/computeCandidateMate.cpp +++ b/amgprec/impl/aggregator/computeCandidateMate.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP /** * Execute the research fr the Candidate Mate without controlling if the vertices are already matched. * Returns the vertices with the highest weight @@ -9,7 +8,7 @@ * @param edgeLocWeight * @return */ -MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, +MilanLongInt firstComputeCandidateMateD(MilanLongInt adj1, MilanLongInt adj2, MilanLongInt *verLocInd, MilanReal *edgeLocWeight) @@ -42,7 +41,7 @@ MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, * @param Ghost2LocalMap * @return */ -MilanLongInt computeCandidateMate(MilanLongInt adj1, +MilanLongInt computeCandidateMateD(MilanLongInt adj1, MilanLongInt adj2, MilanReal *edgeLocWeight, MilanLongInt k, @@ -71,4 +70,3 @@ MilanLongInt computeCandidateMate(MilanLongInt adj1, return w; } -#endif diff --git a/amgprec/impl/aggregator/extractUChunk.cpp b/amgprec/impl/aggregator/extractUChunk.cpp index 4e50a4f3..9f5bdfe2 100644 --- a/amgprec/impl/aggregator/extractUChunk.cpp +++ b/amgprec/impl/aggregator/extractUChunk.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void extractUChunk( vector &UChunkBeingProcessed, vector &U, @@ -29,4 +28,3 @@ void extractUChunk( } // End of critical U // End of critical U } -#endif diff --git a/amgprec/impl/aggregator/findOwnerOfGhost.cpp b/amgprec/impl/aggregator/findOwnerOfGhost.cpp index 2723a7a3..779a5e7f 100644 --- a/amgprec/impl/aggregator/findOwnerOfGhost.cpp +++ b/amgprec/impl/aggregator/findOwnerOfGhost.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP /// Find the owner of a ghost node: MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs) @@ -27,4 +26,3 @@ MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, return Current; } // End of findOwnerOfGhost() -#endif diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 2c8f052d..baac9e8c 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt *numGhostEdges, @@ -302,4 +301,3 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, } // End of single region } // End of parallel region } -#endif diff --git a/amgprec/impl/aggregator/isAlreadyMatched.cpp b/amgprec/impl/aggregator/isAlreadyMatched.cpp index 16d47a14..62cdca4b 100644 --- a/amgprec/impl/aggregator/isAlreadyMatched.cpp +++ b/amgprec/impl/aggregator/isAlreadyMatched.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP /** * //TODO documentation * @param k @@ -44,4 +43,3 @@ bool isAlreadyMatched(MilanLongInt node, return val >= 0; // Already matched } -#endif diff --git a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp index 79f253eb..7576f900 100644 --- a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp +++ b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp @@ -1,6 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OPENMP -void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, +void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanInt myRank, @@ -20,9 +19,9 @@ void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, fflush(stdout); #endif // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) - candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight); + candidateMate[v] = firstComputeCandidateMateD(verLocPtr[v], verLocPtr[v + 1], + verLocInd, edgeLocWeight); // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) } } } -#endif diff --git a/amgprec/impl/aggregator/processCrossEdge.cpp b/amgprec/impl/aggregator/processCrossEdge.cpp index 45cddb44..d9d557a6 100644 --- a/amgprec/impl/aggregator/processCrossEdge.cpp +++ b/amgprec/impl/aggregator/processCrossEdge.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void PROCESS_CROSS_EDGE(MilanLongInt *edge, MilanLongInt *S) { @@ -22,4 +21,3 @@ void PROCESS_CROSS_EDGE(MilanLongInt *edge, // End: PARALLEL_PROCESS_CROSS_EDGE_B } -#endif diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index c53af9bb..f91109ca 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -1,5 +1,4 @@ -#include "MatchBoxPC.h" -#ifdef OPENMP +#include "MatchBoxPC.h" void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, @@ -66,7 +65,7 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, #pragma omp critical(Matching) { if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { - w = computeCandidateMate(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, + w = computeCandidateMateD(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); candidateMate[v] = w; @@ -181,4 +180,3 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, } // End of parallel region } -#endif diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 457e0de8..62db3efc 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,6 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OPENMP -void processMatchedVertices( +void processMatchedVerticesD( MilanLongInt NLVer, vector &UChunkBeingProcessed, vector &U, @@ -98,7 +97,7 @@ void processMatchedVertices( if (candidateMate[v - StartIndex] == u) { // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], + w = computeCandidateMateD(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, @@ -290,4 +289,3 @@ void processMatchedVertices( #endif } // End of parallel region } -#endif diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index e75fa8db..09ed7ab4 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -1,7 +1,6 @@ #include "MatchBoxPC.h" -#ifdef OPENMP //#define DEBUG_HANG_ -void processMatchedVerticesAndSendMessages( +void processMatchedVerticesAndSendMessagesD( MilanLongInt NLVer, vector &UChunkBeingProcessed, vector &U, @@ -103,7 +102,7 @@ void processMatchedVerticesAndSendMessages( if (candidateMate[v - StartIndex] == u) { // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], + w = computeCandidateMateD(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, @@ -293,4 +292,3 @@ void processMatchedVerticesAndSendMessages( cout << myRank<<" Done sending messages"< &U, vector &privateU, vector &QLocalVtx, @@ -31,4 +30,3 @@ void queuesTransfer(vector &U, privateQOwner.clear(); } -#endif diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index 3349ce86..54736f7a 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void sendBundledMessages(MilanLongInt *numGhostEdges, MilanInt *BufferSize, MilanLongInt *Buffer, @@ -207,4 +206,3 @@ void sendBundledMessages(MilanLongInt *numGhostEdges, } } } -#endif From 677e4fe6bc0e9efa8b8cef949f73304e55c66268 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 5 Jun 2024 15:13:18 +0200 Subject: [PATCH 2/5] Modify MatchBox names with D in preparation for S version --- amgprec/impl/aggregator/MatchBoxPC.h | 2 +- ...goDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 2 +- amgprec/impl/aggregator/processExposedVertex.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 4214993e..8e12c49c 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -266,7 +266,7 @@ is disabled there is no reason to actually compile or reference them. */ MilanReal *edgeLocWeight, MilanLongInt *candidateMate); - void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, + void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, MilanLongInt *verLocPtr, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index c9199ea5..cc9d1d52 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -268,7 +268,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( * TODO: Test when it's actually more efficient to execute this code * in parallel. */ - PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer, + PARALLEL_PROCESS_EXPOSED_VERTEX_BD(NLVer, candidateMate, verLocInd, verLocPtr, diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index f91109ca..d0dfe96b 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, +void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, MilanLongInt *verLocPtr, From 803d311d1c84fa00bc6f00bf81af169e3665c426 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 6 Jun 2024 17:12:31 +0200 Subject: [PATCH 3/5] S versions. Take out parallel in a few places --- amgprec/impl/aggregator/MatchBoxPC.cpp | 40 +- amgprec/impl/aggregator/MatchBoxPC.h | 159 ++++++ ...DomEdgesLinearSearchMesgBndlSmallMateC.cpp | 20 +- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 483 ++++++++++++++++++ .../impl/aggregator/computeCandidateMate.cpp | 64 +++ .../parallelComputeCandidateMateB.cpp | 27 + .../impl/aggregator/processExposedVertex.cpp | 188 ++++++- .../aggregator/processMatchedVertices.cpp | 293 +++++++++++ .../processMatchedVerticesAndSendMessages.cpp | 295 +++++++++++ amgprec/impl/aggregator/processMessages.cpp | 320 +++++++++++- .../impl/aggregator/sendBundledMessages.cpp | 36 +- 11 files changed, 1879 insertions(+), 46 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index 65a910b1..c49ce8d4 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -68,9 +68,9 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #define TIME_TRACKER - #ifdef TIME_TRACKER - double tmr = MPI_Wtime(); - #endif +#ifdef TIME_TRACKER + double tmr = MPI_Wtime(); +#endif #if 1 // defined(OPENMP) @@ -93,11 +93,11 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #endif - #ifdef TIME_TRACKER - tmr = MPI_Wtime() - tmr; - fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer); - #endif - +#ifdef TIME_TRACKER + tmr = MPI_Wtime() - tmr; + fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer); +#endif + #endif } @@ -115,13 +115,25 @@ void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); #endif +#if 1 + // defined(OPENMP) + //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); + salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +#else salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, - verLocPtr, verLocInd, edgeLocWeight, - verDistance, Mate, - myRank, numProcs, C_comm, - msgIndSent, msgActualSent, msgPercent, - ph0_time, ph1_time, ph2_time, - ph1_card, ph2_card ); + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +#endif #endif } diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 8e12c49c..f1a7245b 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -189,6 +189,7 @@ is disabled there is no reason to actually compile or reference them. */ MilanLongInt adj2, MilanLongInt *verLocInd, MilanReal *edgeLocWeight); + void queuesTransfer(vector &U, vector &privateU, @@ -417,6 +418,153 @@ is disabled there is no reason to actually compile or reference them. */ vector &UChunkBeingProcessed, vector &U, vector &privateU); + + MilanLongInt firstComputeCandidateMateS(MilanLongInt adj1, + MilanLongInt adj2, + MilanLongInt *verLocInd, + MilanFloat *edgeLocWeight); + + MilanLongInt computeCandidateMateS(MilanLongInt adj1, + MilanLongInt adj2, + MilanFloat *edgeLocWeight, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + void PARALLEL_COMPUTE_CANDIDATE_MATE_BS(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanFloat *edgeLocWeight, + MilanLongInt *candidateMate); + + void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer, + MilanLongInt *candidateMate, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *Mate, + vector &GMate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + void processMatchedVerticesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void processMatchedVerticesAndSendMessagesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message); + + void processMessagesS( + MilanLongInt NLVer, + MilanLongInt *Mate, + MilanLongInt *candidateMate, + map &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *msgActualPtr, + MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *SPtr, + vector &U); + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NLVer, MilanLongInt NLEdge, @@ -428,6 +576,17 @@ is disabled there is no reason to actually compile or reference them. */ MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, MilanLongInt *ph1_card, MilanLongInt *ph2_card); + + void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, + MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, + MilanLongInt *ph1_card, MilanLongInt *ph2_card); + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp index f03f726f..6ae18ebb 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp @@ -1303,16 +1303,16 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( // SINGLE PRECISION VERSION void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( - MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt* verLocPtr, MilanLongInt* verLocInd, - MilanFloat* edgeLocWeight, - MilanLongInt* verDistance, - MilanLongInt* Mate, - MilanInt myRank, MilanInt numProcs, MPI_Comm comm, - MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, - MilanReal* msgPercent, - MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, - MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, + MilanFloat* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, + MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { #if !defined(SERIAL_MPI) #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< Message; // [ u, v, message_type ] + Message.resize(3, -1); + // Data structures for Message Bundling: + // Although up to two messages can be sent along any cross edge, + // only one message will be sent in the initialization phase - + // one of: REQUEST/FAILURE/SUCCESS + vector QLocalVtx, QGhostVtx, QMsgType; + // Changed by Fabio to be an integer, addresses needs to be integers! + vector QOwner; + + MilanLongInt *PCounter = new MilanLongInt[numProcs]; + for (int i = 0; i < numProcs; i++) + PCounter[i] = 0; + + MilanLongInt NumMessagesBundled = 0; + // TODO when the last computational section will be refactored this could be eliminated + // Changed by Fabio to be an integer, addresses needs to be integers! + MilanInt ghostOwner = 0; + MilanLongInt *candidateMate = nullptr; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; + fflush(stdout); + cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex; + fflush(stdout); +#endif + // Other Variables: + MilanLongInt u = -1, v = -1, w = -1, i = 0; + MilanLongInt k = -1, adj1 = -1, adj2 = -1; + MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; + MilanLongInt myCard = 0; + + // Build the Ghost Vertex Set: Vg + // Map each ghost vertex to a local vertex + map Ghost2LocalMap; + // Store the edge count for each ghost vertex + vector Counter; + // Number of Ghost vertices + MilanLongInt numGhostVertices = 0, numGhostEdges = 0; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; + fflush(stdout); +#endif +#ifdef DEBUG_HANG_ + if (myRank == 0) + cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; + fflush(stdout); +#endif + + // Define Adjacency Lists for Ghost Vertices: + // cout<<"Building Ghost data structures ... \n\n"; + vector verGhostPtr, verGhostInd, tempCounter; + // Mate array for ghost vertices: + vector GMate; // Proportional to the number of ghost vertices + MilanLongInt S; + MilanLongInt privateMyCard = 0; + vector PCumulative, PMessageBundle, PSizeInfoMessages; + vector SRequest; // Requests that are used for each send message + vector SStatus; // Status of sent messages, used in MPI_Wait + MilanLongInt MessageIndex = 0; // Pointer for current message + MilanInt BufferSize; + MilanLongInt *Buffer; + + vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; + vector privateQOwner; + vector U, privateU; + + + initialize(NLVer, NLEdge, StartIndex, + EndIndex, &numGhostEdges, + &numGhostVertices, &S, + verLocInd, verLocPtr, + Ghost2LocalMap, Counter, + verGhostPtr, verGhostInd, + tempCounter, GMate, + Message, QLocalVtx, + QGhostVtx, QMsgType, QOwner, + candidateMate, U, + privateU, + privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + finishTime = MPI_Wtime(); + *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization +#ifdef DEBUG_HANG_ + cout << myRank << " Finished initialization" << endl; + fflush(stdout); +#endif + + startTime = MPI_Wtime(); + + ///////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////// INITIALIZATION ///////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + // Compute the Initial Matching Set: + + /* + * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from + * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize + * the two. + * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. + */ + + PARALLEL_COMPUTE_CANDIDATE_MATE_BS(NLVer, + verLocPtr, + verLocInd, + myRank, + edgeLocWeight, + candidateMate); + +#ifdef DEBUG_HANG_ + cout << myRank << " Finished Exposed Vertex" << endl; + fflush(stdout); +#if 0 + cout << myRank << " candidateMate after parallelCompute " < UChunkBeingProcessed; + UChunkBeingProcessed.reserve(UCHUNK); + + processMatchedVerticesS(NLVer, + UChunkBeingProcessed, + U, + privateU, + StartIndex, + EndIndex, + &myCard, + &msgInd, + &NumMessagesBundled, + &S, + verLocPtr, + verLocInd, + verDistance, + PCounter, + Counter, + myRank, + numProcs, + candidateMate, + GMate, + Mate, + Ghost2LocalMap, + edgeLocWeight, + QLocalVtx, + QGhostVtx, + QMsgType, + QOwner, + privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + +#ifdef DEBUG_HANG_ + cout << myRank << " Finished Process Vertices" << endl; + fflush(stdout); +#if 0 + cout << myRank << " Mate after Matched Vertices " < &GMate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *S, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) +{ + + MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; + MilanInt ghostOwner = 0, option, igw; + + //#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ + default(shared) num_threads(NUM_THREAD) + + { + //#pragma omp for reduction(+ \ + : PCounter[:numProcs], myCard \ + [:1], msgInd \ + [:1], NumMessagesBundled \ + [:1]) \ + schedule(static) + for (v = 0; v < NLVer; v++) { + option = -1; + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + k = candidateMate[v]; + candidateMate[v] = verLocInd[k]; + w = candidateMate[v]; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; + fflush(stdout); +#endif + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v + StartIndex << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { +#pragma omp critical(Matching) + { + if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { + w = computeCandidateMateS(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v] = w; + } + if (w >= 0) { + (*myCard)++; + if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex + option = 2; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { + option = 1; + Mate[v] = w; + GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == (v + StartIndex)) { + option = 3; + Mate[v] = w; // v is local + Mate[w - StartIndex] = v + StartIndex; // w is local +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; + fflush(stdout); +#endif + } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) + } // End of Else + } // End of second if + } + + } // End of if(w >=0) + else { + //#pragma omp critical(adjuse) + { + // This piece of code is executed a really small number of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + } + } + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + + switch (option) + { + case -1: + break; + case 1: + privateU.push_back(v + StartIndex); + privateU.push_back(w); + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")"; + fflush(stdout); +#endif + + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + case 2: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message (291):"; + cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + default: + privateU.push_back(v + StartIndex); + privateU.push_back(w); + break; + } + + } // End of for ( v=0; v < NLVer; v++ ) + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + } // End of parallel region +} diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 62db3efc..2b2160e9 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -289,3 +289,296 @@ void processMatchedVerticesD( #endif } // End of parallel region } + + + +void processMatchedVerticesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) +{ + + MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; + int option; + MilanLongInt mateVal; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + +#ifdef COUNT_LOCAL_VERTEX + MilanLongInt localVertices = 0; +#endif + //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \ + privateQMsgType, privateQOwner, UChunkBeingProcessed) \ + default(shared) num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1]) + { + + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); +#endif + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + +#ifdef COUNT_LOCAL_VERTEX + localVertices++; +#endif + + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); +#endif +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { +#pragma omp critical + { +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { + + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateS(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); +#endif + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + +#pragma omp critical + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + + (*NumMessagesBundled)++; + PCounter[ghostOwner]++; + (*msgInd)++; + + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + +#pragma omp critical(U) + { + U.insert(U.end(), privateU.begin(), privateU.end()); + } + +#pragma omp critical(sendMessageTransfer) + { + QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); + QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); + QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); + QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); + } + + privateU.clear(); + privateQLocalVtx.clear(); + privateQGhostVtx.clear(); + privateQMsgType.clear(); + privateQOwner.clear(); + + } // End of while ( !U.empty() ) + +#ifdef COUNT_LOCAL_VERTEX + printf("Count local vertexes: %ld for thread %d of processor %d\n", + localVertices, + omp_get_thread_num(), + myRank); + +#endif + } // End of parallel region +} diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 09ed7ab4..40d333a7 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -292,3 +292,298 @@ void processMatchedVerticesAndSendMessagesD( cout << myRank<<" Done sending messages"< &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message) +{ + + MilanLongInt initialSize = QLocalVtx.size(); + MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; + int option; + MilanLongInt mateVal; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + +#ifdef COUNT_LOCAL_VERTEX + MilanLongInt localVertices = 0; +#endif + //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ + privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ + num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1], msgActual \ + [:1]) + { + + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); +#endif + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + +#ifdef COUNT_LOCAL_VERTEX + localVertices++; +#endif + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); +#endif +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { +#pragma omp critical + { +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { + + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateS(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { + + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); +#endif + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + +#pragma omp critical + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + // Build the Message Packet: + // Message[0] = u; // LOCAL + // Message[1] = v; // GHOST + // Message[2] = SUCCESS; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + (*msgActual)++; + (*msgInd)++; + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + } // End of while ( !U.empty() ) + +#ifdef COUNT_LOCAL_VERTEX + printf("Count local vertexes: %ld for thread %d of processor %d\n", + localVertices, mp_get_thread_num(), myRank); +#endif + } // End of parallel region + + // Send the messages +#ifdef DEBUG_HANG_ + cout << myRank<<" Sending: "<(), ghostOwner, ComputeTag, comm); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + } +#ifdef DEBUG_HANG_ + cout << myRank<<" Done sending messages"<= 0) { + if ((w < StartIndex) || (w > EndIndex)) { + // w is a ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + Mate[v - StartIndex] = w; // v is local + GMate[Ghost2LocalMap[w]] = v; // w is ghost + U.push_back(v); + U.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); +#endif + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + Mate[v - StartIndex] = w; // v is local + Mate[w - StartIndex] = v; // w is local + // Q.push_back(u); + U.push_back(v); + U.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else { // No dominant edge found + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { + // A ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + } // End of if(GHOST) + } // End of for loop + } // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of if ( candidateMate[v-StartIndex] == u ) + } // End of if ( Mate[v] == -1 ) + } // End of if ( message_type == SUCCESS ) + else { + // CASE III: FAILURE +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is FAILURE" << endl; + fflush(stdout); +#endif + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter + } // End of else: CASE III + } // End of else: CASE I + } + + return; +} + + +void processMessagesS( + MilanLongInt NLVer, + MilanLongInt *Mate, + MilanLongInt *candidateMate, + map &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *msgActual, + MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *S, + vector &U) +{ + + //#define PRINT_DEBUG_INFO_ + + MilanInt Sender; + MPI_Status computeStatus; + MilanLongInt bundleSize, w; + MilanLongInt adj11, adj12, k1; + MilanLongInt ghostOwner; + int error_codeC; + error_codeC = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + char error_message[MPI_MAX_ERROR_STRING]; + int message_length; + MilanLongInt message_type = 0; + + // Buffer to receive bundled messages + // Maximum messages that can be received from any processor is + // twice the edge cut: REQUEST; REQUEST+(FAILURE/SUCCESS) + vector ReceiveBuffer; + try + { + ReceiveBuffer.reserve(numGhostEdges * 2 * 3); // Three integers per cross edge + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + +#ifdef PRINT_DEBUG_INFO_ + cout + << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")About to begin Message processing phase ... *S=" << *S << endl; + fflush(stdout); +#endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + // BLOCKING RECEIVE: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << " Waiting for blocking receive..." << endl; + fflush(stdout); + fflush(stdout); +#endif + + //cout << myRank<<" Receiving ..."; + error_codeC = MPI_Recv(&Message[0], 3, TypeMap(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS) + { + MPI_Error_string(error_codeC, error_message, &message_length); + cout << "\n*Error in call to MPI_Receive on Slave: " << error_message << "\n"; + fflush(stdout); + } + Sender = computeStatus.MPI_SOURCE; + //cout << " ...from "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n"; + fflush(stdout); + } +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message Bundle After: " << endl; + for (int i = 0; i < bundleSize; i++) + cout << ReceiveBuffer[i] << ","; + cout << endl; + fflush(stdout); +#endif + } else { // Just a single message: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl; + fflush(stdout); +#endif + // Add the current message to Queue: + bundleSize = 3; //#of integers in the message + // Build the Message Buffer: + if (!ReceiveBuffer.empty()) + ReceiveBuffer.clear(); // Empty it out first + ReceiveBuffer.resize(bundleSize, -1); // Initialize + + ReceiveBuffer[0] = Message[0]; // u + ReceiveBuffer[1] = Message[1]; // v + ReceiveBuffer[2] = Message[2]; // message_type + } + +#ifdef DEBUG_GHOST_ + if ((v < StartIndex) || (v > EndIndex)) { + cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; + fflush(stdout); + } +#endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing message: u= " << u << " v= " << v << " Type= " << message_type << endl; + fflush(stdout); +#endif + + // Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop + for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) { + u = ReceiveBuffer[bundleCounter - 3]; // GHOST + v = ReceiveBuffer[bundleCounter - 2]; // LOCAL + message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE + + // CASE I: REQUEST + if (message_type == REQUEST) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is REQUEST" << endl; + fflush(stdout); +#endif +#ifdef DEBUG_GHOST_ + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } + +#endif + + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched (v is local) + candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost + if (candidateMate[v - StartIndex] == u) { + GMate[Ghost2LocalMap[u]] = v; // u is ghost + Mate[v - StartIndex] = u; // v is local + U.push_back(v); + U.push_back(u); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl; + fflush(stdout); +#endif + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); + } // End of if ( candidateMate[v-StartIndex] == u )e + } // End of if ( Mate[v] == -1 ) + } // End of REQUEST + else { // CASE II: SUCCESS + if (message_type == SUCCESS) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is SUCCESS" << endl; + fflush(stdout); +#endif + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); +#ifdef DEBUG_GHOST_ + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } +#endif + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched ( v is local) + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateS(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], + edgeLocWeight, k,verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); candidateMate[v - StartIndex] = w; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index 54736f7a..debabf7e 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -1,23 +1,23 @@ #include "MatchBoxPC.h" void sendBundledMessages(MilanLongInt *numGhostEdges, - MilanInt *BufferSize, - MilanLongInt *Buffer, - vector &PCumulative, - vector &PMessageBundle, - vector &PSizeInfoMessages, - MilanLongInt *PCounter, - MilanLongInt NumMessagesBundled, - MilanLongInt *msgActual, - MilanLongInt *msgInd, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &SRequest, - vector &SStatus) + MilanInt *BufferSize, + MilanLongInt *Buffer, + vector &PCumulative, + vector &PMessageBundle, + vector &PSizeInfoMessages, + MilanLongInt *PCounter, + MilanLongInt NumMessagesBundled, + MilanLongInt *msgActual, + MilanLongInt *msgInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &SRequest, + vector &SStatus) { MilanLongInt myIndex = 0, numMessagesToSend; From 818ead5878c2c0070ceeaa68dbd9db6a132eef1d Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 11 Jun 2024 09:52:11 +0200 Subject: [PATCH 4/5] Try changes for matching --- amgprec/amg_s_matchboxp_mod.f90 | 2 -- amgprec/impl/aggregator/processMatchedVertices.cpp | 2 +- .../impl/aggregator/processMatchedVerticesAndSendMessages.cpp | 4 ++-- amgprec/impl/level/amg_d_base_onelev_memory_use.f90 | 3 +-- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/amgprec/amg_s_matchboxp_mod.f90 b/amgprec/amg_s_matchboxp_mod.f90 index a7f41c24..04194836 100644 --- a/amgprec/amg_s_matchboxp_mod.f90 +++ b/amgprec/amg_s_matchboxp_mod.f90 @@ -272,9 +272,7 @@ contains write(0,*) 'Impossible: mate(k) > nc' cycle else - if (ilaggr(k) == ilaggr_neginit) then - wk = w(k) widx = w(idx) wmax = max(abs(wk),abs(widx)) diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 2b2160e9..531c9d32 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -338,7 +338,7 @@ void processMatchedVerticesS( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif - //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \ privateQMsgType, privateQOwner, UChunkBeingProcessed) \ default(shared) num_threads(NUM_THREAD) \ diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 40d333a7..1631348d 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -26,7 +26,7 @@ void processMatchedVerticesAndSendMessagesD( vector &QLocalVtx, vector &QGhostVtx, vector &QMsgType, - vector &QOwner, + vector &QOwner, vector &privateQLocalVtx, vector &privateQGhostVtx, vector &privateQMsgType, @@ -345,7 +345,7 @@ void processMatchedVerticesAndSendMessagesS( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif - //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ num_threads(NUM_THREAD) \ diff --git a/amgprec/impl/level/amg_d_base_onelev_memory_use.f90 b/amgprec/impl/level/amg_d_base_onelev_memory_use.f90 index 25534fd0..da56851c 100644 --- a/amgprec/impl/level/amg_d_base_onelev_memory_use.f90 +++ b/amgprec/impl/level/amg_d_base_onelev_memory_use.f90 @@ -98,8 +98,7 @@ subroutine amg_d_base_onelev_memory_use(lv,il,nl,ilmin,info,iout,verbosity,prefi prefix_ = "" end if - write(iout_,*) trim(prefix_) - + if (me == 0) write(iout_,*) trim(prefix_) if (global_) then allocate(sz(6)) From 6362db0cc5ac59680046b5e2100c45712d84afec Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 3 Jul 2024 11:00:13 +0200 Subject: [PATCH 5/5] Try improve OpenMP version of matchbox --- amgprec/amg_d_matchboxp_mod.f90 | 1 + ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 2 +- .../impl/aggregator/processExposedVertex.cpp | 22 ++++++++++--------- .../processMatchedVerticesAndSendMessages.cpp | 19 ++++++++-------- 4 files changed, 24 insertions(+), 20 deletions(-) diff --git a/amgprec/amg_d_matchboxp_mod.f90 b/amgprec/amg_d_matchboxp_mod.f90 index e19ce617..5e0151ec 100644 --- a/amgprec/amg_d_matchboxp_mod.f90 +++ b/amgprec/amg_d_matchboxp_mod.f90 @@ -724,6 +724,7 @@ contains & vnl, mate, iam, np,ictxt,& & msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp) if (do_timings) call psb_toc(idx_cmboxp) + if (iam==0) write(0,*) iam,' buildmatching from PMatchBox:', info,ph0t,ph1t,ph2t if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info if (debug_sync) then call psb_max(ictxt,info) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 742f2b85..3b478cd7 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -489,7 +489,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - + //startTime = MPI_Wtime(); processMessagesD(NLVer, Mate, candidateMate, diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index cf0fb826..cb3cea65 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -30,15 +30,16 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer, vector &privateQOwner) { - MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; + MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanInt ghostOwner = 0, option, igw; - //#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ - firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ - default(shared) num_threads(NUM_THREAD) +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner) \ + default(shared) num_threads(NUM_THREAD) { - //#pragma omp for reduction(+ \ + #pragma omp for reduction(+ \ : PCounter[:numProcs], myCard \ [:1], msgInd \ [:1], NumMessagesBundled \ @@ -216,17 +217,18 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer, MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanInt ghostOwner = 0, option, igw; - //#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ - firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner) \ default(shared) num_threads(NUM_THREAD) { - //#pragma omp for reduction(+ \ +#pragma omp for reduction(+ \ : PCounter[:numProcs], myCard \ [:1], msgInd \ [:1], NumMessagesBundled \ - [:1]) \ - schedule(static) + [:1]) \ + schedule(static) for (v = 0; v < NLVer; v++) { option = -1; // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 1631348d..d094afaa 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -51,15 +51,16 @@ void processMatchedVerticesAndSendMessagesD( MilanLongInt localVertices = 0; #endif //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ - firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ - privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ - num_threads(NUM_THREAD) \ - reduction(+ \ - : msgInd[:1], PCounter \ - [:numProcs], myCard \ - [:1], NumMessagesBundled \ - [:1], msgActual \ - [:1]) + firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \ + default(shared) \ + num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1], msgActual \ + [:1]) { while (!U.empty()) {