From 500403dbdac33e4a9af4a6a125bbbbc561d79e49 Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Sat, 23 Jul 2022 11:13:21 -0500 Subject: [PATCH] Replaced some staticQueues with vectors for performance reasons --- amgprec/impl/aggregator/MatchBoxPC.h | 36 +++++----- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 65 ++++++++++--------- amgprec/impl/aggregator/initialize.cpp | 17 ++--- .../impl/aggregator/processExposedVertex.cpp | 15 +++-- .../aggregator/processMatchedVertices.cpp | 8 +-- .../processMatchedVerticesAndSendMessages.cpp | 40 ++++-------- amgprec/impl/aggregator/queueTransfer.cpp | 38 ++++++----- 7 files changed, 110 insertions(+), 109 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 01cc0589..1066f8ef 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -189,10 +189,10 @@ extern "C" vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner); + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); bool isAlreadyMatched(MilanLongInt node, MilanLongInt StartIndex, @@ -233,10 +233,10 @@ extern "C" MilanLongInt *&candidateMate, staticQueue &U, staticQueue &privateU, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner); + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); void clean(MilanLongInt NLVer, MilanInt myRank, @@ -284,10 +284,10 @@ extern "C" vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner); + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); void PROCESS_CROSS_EDGE(MilanLongInt *edge, MilanLongInt *SPtr); @@ -319,10 +319,10 @@ extern "C" vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner); + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); void processMatchedVerticesAndSendMessages( MilanLongInt NLVer, @@ -351,6 +351,10 @@ extern "C" vector &QGhostVtx, vector &QMsgType, vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, MPI_Comm comm, MilanLongInt *msgActual, vector &Message); diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index c1210ea7..4297391a 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -182,7 +182,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( vector GMate; // Proportional to the number of ghost vertices MilanLongInt S; MilanLongInt privateMyCard = 0; - staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner; + staticQueue U, privateU; vector PCumulative, PMessageBundle, PSizeInfoMessages; vector SRequest; // Requests that are used for each send message vector SStatus; // Status of sent messages, used in MPI_Wait @@ -190,6 +190,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanInt BufferSize; MilanLongInt *Buffer; + vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; + vector privateQOwner; + initialize(NLVer, NLEdge, StartIndex, EndIndex, &numGhostEdges, &numGhostVertices, &S, @@ -370,34 +373,38 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////////////////////////////////////////////////////////////// processMatchedVerticesAndSendMessages(NLVer, - UChunkBeingProcessed, - U, - privateU, - StartIndex, - EndIndex, - &myCard, - &msgInd, - &NumMessagesBundled, - &S, - verLocPtr, - verLocInd, - verDistance, - PCounter, - Counter, - myRank, - numProcs, - candidateMate, - GMate, - Mate, - Ghost2LocalMap, - edgeLocWeight, - QLocalVtx, - QGhostVtx, - QMsgType, - QOwner, - comm, - &msgActual, - Message); + UChunkBeingProcessed, + U, + privateU, + StartIndex, + EndIndex, + &myCard, + &msgInd, + &NumMessagesBundled, + &S, + verLocPtr, + verLocInd, + verDistance, + PCounter, + Counter, + myRank, + numProcs, + candidateMate, + GMate, + Mate, + Ghost2LocalMap, + edgeLocWeight, + QLocalVtx, + QGhostVtx, + QMsgType, + QOwner, + privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner, + comm, + &msgActual, + Message); ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 477f5f6d..47f424fd 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -21,10 +21,10 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt *&candidateMate, staticQueue &U, staticQueue &privateU, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner) + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) { MilanLongInt insertMe = 0; @@ -295,10 +295,11 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, // Initialize the privte data structure new (&privateU) staticQueue(NLVer + (*numGhostVertices)); // TODO how can I put a meaningfull size? - new (&privateQLocalVtx) staticQueue(size); - new (&privateQGhostVtx) staticQueue(size); - new (&privateQMsgType) staticQueue(size); - new (&privateQOwner) staticQueue(size); + + privateQLocalVtx.reserve(*numGhostVertices); + privateQGhostVtx.reserve(*numGhostVertices); + privateQMsgType.reserve(*numGhostVertices); + privateQOwner.reserve(*numGhostVertices); } // end of task } // End of single region diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index 91035372..c330e724 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -25,10 +25,10 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner) + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) { MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; @@ -39,8 +39,11 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, num_threads(NUM_THREAD) { -#pragma omp for reduction(+ \ - : PCounter[:numProcs], myCard[:1], msgInd[:1], NumMessagesBundled[:1]) schedule(static) +#pragma omp for reduction(+ \ + : PCounter[:numProcs], myCard \ + [:1], msgInd \ + [:1], NumMessagesBundled \ + [:1]) schedule(static) for (v = 0; v < NLVer; v++) { option = -1; diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 1e7b2641..510c9877 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -27,10 +27,10 @@ void processMatchedVertices( vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner) + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) { MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 9d4077a7..debfc5ca 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -27,6 +27,10 @@ void processMatchedVerticesAndSendMessages( vector &QGhostVtx, vector &QMsgType, vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, MPI_Comm comm, MilanLongInt *msgActual, vector &Message) @@ -37,13 +41,6 @@ void processMatchedVerticesAndSendMessages( int option; MilanLongInt mateVal; - // TODO reserve!!! - vector privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner; - privateQLocalVtx.reserve(100000); - privateQGhostVtx.reserve(100000); - privateQMsgType.reserve(100000); - privateQOwner.reserve(100000); - #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << "=========================************===============================" << endl; fflush(stdout); @@ -53,7 +50,7 @@ void processMatchedVerticesAndSendMessages( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ num_threads(NUM_THREAD) \ reduction(+ \ @@ -286,25 +283,12 @@ void processMatchedVerticesAndSendMessages( } } // End of outer for -#pragma omp critical(U) - { - while (!privateU.empty()) - U.push_back(privateU.pop_back()); - } - -#pragma omp critical(sendMessageTransfer) - { - - QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); - QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); - QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); - QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); - - privateQLocalVtx.clear(); - privateQGhostVtx.clear(); - privateQMsgType.clear(); - privateQOwner.clear(); - } + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); } // End of while ( !U.empty() ) @@ -317,7 +301,7 @@ void processMatchedVerticesAndSendMessages( #endif } // End of parallel region - //Send the messages + // Send the messages for (int i = initialSize; i < QOwner.size(); i++) { diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index cbae1fc2..0439a08c 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -1,32 +1,34 @@ #include "MatchBoxPC.h" void queuesTransfer(staticQueue &U, - staticQueue &privateU, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner) + staticQueue &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) { - #pragma omp critical(U) { while (!privateU.empty()) U.push_back(privateU.pop_back()); } -#pragma omp critical(privateMsg) +#pragma omp critical(sendMessageTransfer) { - while (!privateQLocalVtx.empty()) - { - QLocalVtx.push_back(privateQLocalVtx.pop_back()); - QGhostVtx.push_back(privateQGhostVtx.pop_back()); - QMsgType.push_back(privateQMsgType.pop_back()); - QOwner.push_back(privateQOwner.pop_back()); - } + + QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); + QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); + QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); + QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); } + + privateQLocalVtx.clear(); + privateQGhostVtx.clear(); + privateQMsgType.clear(); + privateQOwner.clear(); } \ No newline at end of file