From 561cadee0fd721d475f625c3441ead31790c2ede Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Fri, 15 Jul 2022 07:27:30 -0500 Subject: [PATCH] parallelQueues working --- amgprec/impl/aggregator/MatchBoxPC.h | 2 +- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 63 +++++++++---------- .../aggregator/processMatchedVertices.cpp | 63 +++++++------------ amgprec/impl/aggregator/queueTransfer.cpp | 1 + 4 files changed, 54 insertions(+), 75 deletions(-) diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index a8f22f49..d1e26fbc 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -66,7 +66,7 @@ using namespace std; #define NUM_THREAD 4 -#define UCHUNK 1000 +#define UCHUNK 100000 const MilanLongInt REQUEST = 1; const MilanLongInt SUCCESS = 2; diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 6e24393b..d5ac4394 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -71,8 +71,6 @@ Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) */ -#define UCHUNK 1000 - #ifdef SERIAL_MPI #else @@ -121,7 +119,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( fflush(stdout); #endif - MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank + MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank MPI_Status computeStatus; @@ -147,7 +145,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( PCounter[i] = 0; MilanLongInt NumMessagesBundled = 0; - //TODO when the last computational section will be refactored this could be eliminated + // TODO when the last computational section will be refactored this could be eliminated MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers! MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ @@ -282,6 +280,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( vector UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); + processMatchedVertices(NLVer, UChunkBeingProcessed, U, @@ -329,8 +328,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( &MessageIndex, numProcs, myRank, - //ComputeTag, - //BundleTag, comm, QLocalVtx, QGhostVtx, @@ -598,35 +595,35 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( &S, U); - ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// + ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S; - fflush(stdout); - cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual; - fflush(stdout); - cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S; + fflush(stdout); + cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual; + fflush(stdout); + cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl; + fflush(stdout); #endif -} // End of while (true) - -clean(NLVer, - myRank, - MessageIndex, - SRequest, - SStatus, - BufferSize, - Buffer, - msgActual, - msgActualSent, - msgInd, - msgIndSent, - NumMessagesBundled, - msgPercent, - MateLock); - -finishTime = MPI_Wtime(); -*ph2_time = finishTime - startTime; // Time taken for Phase-2 -*ph2_card = myCard; // Cardinality at the end of Phase-2 + } // End of while (true) + + clean(NLVer, + myRank, + MessageIndex, + SRequest, + SStatus, + BufferSize, + Buffer, + msgActual, + msgActualSent, + msgInd, + msgIndSent, + NumMessagesBundled, + msgPercent, + MateLock); + + finishTime = MPI_Wtime(); + *ph2_time = finishTime - startTime; // Time taken for Phase-2 + *ph2_card = myCard; // Cardinality at the end of Phase-2 } // End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate #endif diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index c1ae6d13..d766bc42 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,6 +1,6 @@ #include "MatchBoxPC.h" -//#define privateQueues +//#define error void processMatchedVertices( MilanLongInt NLVer, @@ -38,8 +38,13 @@ void processMatchedVertices( MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; MilanLongInt myCard = *myCardPtr, msgInd = *msgIndPtr, NumMessagesBundled = *NumMessagesBundledPtr, S = *SPtr, privateMyCard = 0; - // TODO check if private queues arrive empty -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateMyCard, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(4) + // TODO check that the queues arrives empty + assert(privateQGhostVtx.empty()); + assert(privateQLocalVtx.empty()); + assert(privateQMsgType.empty()); + assert(privateQOwner.empty()); + +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateMyCard, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) { #ifdef PRINT_DEBUG_INFO_ @@ -140,25 +145,18 @@ void processMatchedVertices( cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); #endif + msgInd++; + NumMessagesBundled++; ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); + PCounter[ghostOwner]++; -#ifdef privateQueues privateQLocalVtx.push_back(v); privateQGhostVtx.push_back(w); privateQMsgType.push_back(REQUEST); privateQOwner.push_back(ghostOwner); -#endif -#ifndef privateQueues - QLocalVtx.push_back(v); - QGhostVtx.push_back(w); - QMsgType.push_back(REQUEST); - QOwner.push_back(ghostOwner); -#endif - PCounter[ghostOwner]++; - NumMessagesBundled++; - msgInd++; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { Mate[v - StartIndex] = w; // v is a local vertex @@ -214,28 +212,18 @@ void processMatchedVertices( cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); fflush(stdout); #endif - - // ghostOwner = inputSubGraph.findOwner(w); + msgInd++; + NumMessagesBundled++; ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); + PCounter[ghostOwner]++; -#ifdef privateQueues privateQLocalVtx.push_back(v); privateQGhostVtx.push_back(w); privateQMsgType.push_back(FAILURE); privateQOwner.push_back(ghostOwner); -#endif -#ifndef privateQueues - QLocalVtx.push_back(v); - QGhostVtx.push_back(w); - QMsgType.push_back(FAILURE); - QOwner.push_back(ghostOwner); -#endif - PCounter[ghostOwner]++; - NumMessagesBundled++; - msgInd++; } // End of if(GHOST) } // End of for loop } // End of Else: w == -1 @@ -270,26 +258,18 @@ void processMatchedVertices( fflush(stdout); #endif + msgInd++; + NumMessagesBundled++; ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); + PCounter[ghostOwner]++; -#ifdef privateQueues privateQLocalVtx.push_back(u); privateQGhostVtx.push_back(v); privateQMsgType.push_back(SUCCESS); privateQOwner.push_back(ghostOwner); -#endif -#ifndef privateQueues - QLocalVtx.push_back(u); - QGhostVtx.push_back(v); - QMsgType.push_back(SUCCESS); - QOwner.push_back(ghostOwner); -#endif - PCounter[ghostOwner]++; - NumMessagesBundled++; - msgInd++; } // End of If( v != Mate[u] ) // omp_unset_lock(&MateLock[u - StartIndex]); @@ -306,14 +286,15 @@ void processMatchedVertices( if (privateU.size() < UCHUNK && !U.empty()) continue; -#ifdef privateQueues + printf("Executed \n"); +#ifdef error #pragma omp critical(U) { while (!privateU.empty()) U.push_back(privateU.pop_back()); } #endif -#ifndef privateQueues +#ifndef error queuesTransfer(U, privateU, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, @@ -322,7 +303,7 @@ void processMatchedVertices( privateQOwner); #endif } - } // End of while ( /*!Q.empty()*/ !U.empty() ) + } // End of while ( !U.empty() ) queuesTransfer(U, privateU, QLocalVtx, QGhostVtx, diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index ed2829c6..cbae1fc2 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -12,6 +12,7 @@ void queuesTransfer(staticQueue &U, staticQueue &privateQOwner) { + #pragma omp critical(U) { while (!privateU.empty())