From 066c1a5e62de13c322d1101834fa4a492e7af72b Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Sat, 23 Jul 2022 09:27:35 -0500 Subject: [PATCH] optimization processMatchedVerticesAndSendMessages.cpp --- amgprec/impl/aggregator/MatchBoxPC.h | 4 - ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 4 - .../processMatchedVerticesAndSendMessages.cpp | 100 ++++++++---------- 3 files changed, 42 insertions(+), 66 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 8fcc495b..01cc0589 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -351,10 +351,6 @@ extern "C" vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner, MPI_Comm comm, MilanLongInt *msgActual, vector &Message); diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 612ac95f..c1210ea7 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -395,10 +395,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( QGhostVtx, QMsgType, QOwner, - privateQLocalVtx, - privateQGhostVtx, - privateQMsgType, - privateQOwner, comm, &msgActual, Message); diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 49235870..9d4077a7 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -27,20 +27,22 @@ void processMatchedVerticesAndSendMessages( vector &QGhostVtx, vector &QMsgType, vector &QOwner, - staticQueue &privateQLocalVtx, - staticQueue &privateQGhostVtx, - staticQueue &privateQMsgType, - staticQueue &privateQOwner, MPI_Comm comm, MilanLongInt *msgActual, vector &Message) { + MilanLongInt initialSize = QLocalVtx.size(); MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; int option; MilanLongInt mateVal; - vector privatemessagesToSend, messagesToSend; + // TODO reserve!!! + vector privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner; + privateQLocalVtx.reserve(100000); + privateQGhostVtx.reserve(100000); + privateQMsgType.reserve(100000); + privateQOwner.reserve(100000); #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << "=========================************===============================" << endl; @@ -51,7 +53,7 @@ void processMatchedVerticesAndSendMessages( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option, privatemessagesToSend) \ +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ num_threads(NUM_THREAD) \ reduction(+ \ @@ -195,21 +197,12 @@ void processMatchedVerticesAndSendMessages( // Found a dominating edge, it is a ghost ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = REQUEST; // TYPE - // Send a Request (Asynchronous) - - // printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - // fflush(stdout); - privatemessagesToSend.push_back(v); - privatemessagesToSend.push_back(w); - privatemessagesToSend.push_back(REQUEST); - privatemessagesToSend.push_back(ghostOwner); + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); (*msgActual)++; @@ -242,21 +235,12 @@ void processMatchedVerticesAndSendMessages( #endif ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = FAILURE; // TYPE - // Send a Request (Asynchronous) - - // printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - // fflush(stdout); - privatemessagesToSend.push_back(v); - privatemessagesToSend.push_back(w); - privatemessagesToSend.push_back(FAILURE); - privatemessagesToSend.push_back(ghostOwner); + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); (*msgActual)++; @@ -280,21 +264,12 @@ void processMatchedVerticesAndSendMessages( #endif ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); // Build the Message Packet: - Message[0] = u; // LOCAL - Message[1] = v; // GHOST - Message[2] = SUCCESS; // TYPE - + // Message[0] = u; // LOCAL + // Message[1] = v; // GHOST + // Message[2] = SUCCESS; // TYPE // Send a Request (Asynchronous) - // printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - // fflush(stdout);) - privatemessagesToSend.push_back(u); - privatemessagesToSend.push_back(v); - privatemessagesToSend.push_back(SUCCESS); - privatemessagesToSend.push_back(ghostOwner); // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); (*msgActual)++; @@ -311,19 +286,25 @@ void processMatchedVerticesAndSendMessages( } } // End of outer for +#pragma omp critical(U) + { + while (!privateU.empty()) + U.push_back(privateU.pop_back()); + } + #pragma omp critical(sendMessageTransfer) { - messagesToSend.insert(messagesToSend.end(), privatemessagesToSend.begin(), privatemessagesToSend.end()); - privatemessagesToSend.clear(); - } + QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); + QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); + QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); + QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); - queuesTransfer(U, privateU, QLocalVtx, - QGhostVtx, - QMsgType, QOwner, privateQLocalVtx, - privateQGhostVtx, - privateQMsgType, - privateQOwner); + privateQLocalVtx.clear(); + privateQGhostVtx.clear(); + privateQMsgType.clear(); + privateQOwner.clear(); + } } // End of while ( !U.empty() ) @@ -336,12 +317,15 @@ void processMatchedVerticesAndSendMessages( #endif } // End of parallel region - for (int i = 0; i < messagesToSend.size(); i += 4) + //Send the messages + for (int i = initialSize; i < QOwner.size(); i++) { - Message[0] = messagesToSend[i]; - Message[1] = messagesToSend[i + 1]; - Message[2] = messagesToSend[i + 2]; - ghostOwner = messagesToSend[i + 3]; + + Message[0] = QLocalVtx[i]; + Message[1] = QGhostVtx[i]; + Message[2] = QMsgType[i]; + ghostOwner = QOwner[i]; + MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); } }