From ebe9b451775dd0df0ac0d9c0ffa28db64e87da87 Mon Sep 17 00:00:00 2001 From: Salvatore Filippone Date: Fri, 10 Feb 2023 07:50:58 -0500 Subject: [PATCH] Modify MATCHBOXP to fix OpenMP. Performance to be reviewed --- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 175 ++++-- .../impl/aggregator/computeCandidateMate.cpp | 41 +- .../parallelComputeCandidateMateB.cpp | 3 +- .../impl/aggregator/processExposedVertex.cpp | 156 +++--- .../aggregator/processMatchedVertices.cpp | 437 +++++++-------- .../processMatchedVerticesAndSendMessages.cpp | 517 +++++++++--------- amgprec/impl/aggregator/processMessages.cpp | 366 ++++++------- amgprec/impl/aggregator/queueTransfer.cpp | 3 +- .../impl/aggregator/sendBundledMessages.cpp | 241 ++++---- 9 files changed, 978 insertions(+), 961 deletions(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index bb2dd5a7..49b366a6 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -70,7 +70,7 @@ Statistics: ph0_time, ph1_time, ph2_time: Runtimes Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) */ - +//#define DEBUG_HANG_ #ifdef SERIAL_MPI #else @@ -110,17 +110,24 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #endif #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; + cout << "\n(" << myRank << ") verDistance [" ; + for (int i = 0; i < numProcs; i++) + cout << verDistance[i] << "," << verDistance[i+1]; + cout << "]\n"; fflush(stdout); #endif #ifdef DEBUG_HANG_ - if (myRank == 0) - cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; + if (myRank == 0) { + cout << "\n(" << myRank << ") verDistance [" ; + for (int i = 0; i < numProcs; i++) + cout << verDistance[i] << "," ; + cout << verDistance[numProcs]<< "]\n"; + } fflush(stdout); #endif MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank - MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank + MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank MPI_Status computeStatus; @@ -211,7 +218,11 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( finishTime = MPI_Wtime(); *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization - +#ifdef DEBUG_HANG_ + cout << myRank << " Finished initialization" << endl; + fflush(stdout); +#endif + startTime = MPI_Wtime(); ///////////////////////////////////////////////////////////////////////////////////////// @@ -233,6 +244,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( edgeLocWeight, candidateMate); +#ifdef DEBUG_HANG_ + cout << myRank << " Finished Exposed Vertex" << endl; + fflush(stdout); +#if 0 + cout << myRank << " candidateMate after parallelCompute " < &privateQGhostVtx, vector &privateQMsgType, vector &privateQOwner) -{ +{ MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; - MilanInt ghostOwner = 0, option; + MilanInt ghostOwner = 0, option, igw; -#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ - firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) \ - num_threads(NUM_THREAD) +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ + default(shared) num_threads(NUM_THREAD) { #pragma omp for reduction(+ \ : PCounter[:numProcs], myCard \ [:1], msgInd \ [:1], NumMessagesBundled \ - [:1]) schedule(static) - for (v = 0; v < NLVer; v++) - { + [:1]) \ + schedule(static) + for (v = 0; v < NLVer; v++) { option = -1; // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) k = candidateMate[v]; @@ -67,91 +67,81 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, #pragma omp critical(processExposed) { - if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) - { - w = computeCandidateMate(verLocPtr[v], - verLocPtr[v + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - candidateMate[v] = w; + if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { + w = computeCandidateMate(verLocPtr[v], + verLocPtr[v + 1], + edgeLocWeight, 0, + verLocInd, + StartIndex, + EndIndex, + GMate, + Mate, + Ghost2LocalMap); + candidateMate[v] = w; } - - if (w >= 0) - { - (*myCard)++; - if ((w < StartIndex) || (w > EndIndex)) - { // w is a ghost vertex - option = 2; - - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) - { - option = 1; - Mate[v] = w; - GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost - - } // End of if CandidateMate[w] = v - - } // End of if a Ghost Vertex - else - { // w is a local vertex - - if (candidateMate[w - StartIndex] == (v + StartIndex)) - { - option = 3; - Mate[v] = w; // v is local - Mate[w - StartIndex] = v + StartIndex; // w is local - + + if (w >= 0) { + (*myCard)++; + if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex + option = 2; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { + option = 1; + Mate[v] = w; + GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost + + } // End of if CandidateMate[w] = v + + } // End of if a Ghost Vertex + else { // w is a local vertex + + if (candidateMate[w - StartIndex] == (v + StartIndex)) { + option = 3; + Mate[v] = w; // v is local + Mate[w - StartIndex] = v + StartIndex; // w is local + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; + fflush(stdout); #endif - - } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) - } // End of Else - + + } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) + } // End of Else + } // End of second if - + } // End critical processExposed - + } // End of if(w >=0) - else - { - // This piece of code is executed a really small amount of times - adj11 = verLocPtr[v]; - adj12 = verLocPtr[v + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost + else { + // This piece of code is executed a really small amount of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - (*msgInd)++; - (*NumMessagesBundled)++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - - privateQLocalVtx.push_back(v + StartIndex); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop } // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - + switch (option) { case -1: @@ -202,4 +192,4 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, privateQOwner); } // End of parallel region -} \ No newline at end of file +} diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index e96dcc1d..d9363c39 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -46,264 +46,249 @@ void processMatchedVertices( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ - firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ - num_threads(NUM_THREAD) \ - reduction(+ \ - : msgInd[:1], PCounter \ - [:numProcs], myCard \ - [:1], NumMessagesBundled \ + //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \ + privateQMsgType, privateQOwner, UChunkBeingProcessed) \ + default(shared) num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ [:1]) { - while (!U.empty()) - { - - extractUChunk(UChunkBeingProcessed, U, privateU); - - for (MilanLongInt u : UChunkBeingProcessed) - { + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")u: " << u; - fflush(stdout); + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); #endif - if ((u >= StartIndex) && (u <= EndIndex)) - { // Process Only the Local Vertices - + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + #ifdef COUNT_LOCAL_VERTEX - localVertices++; + localVertices++; #endif - - // Get the Adjacency list for u - adj1 = verLocPtr[u - StartIndex]; // Pointer - adj2 = verLocPtr[u - StartIndex + 1]; - for (k = adj1; k < adj2; k++) - { - option = -1; - v = verLocInd[k]; - - if ((v >= StartIndex) && (v <= EndIndex)) - { // If Local Vertex: - + + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; - fflush(stdout); + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); #endif #pragma omp atomic read - mateVal = Mate[v - StartIndex]; - // If the current vertex is pointing to a matched vertex and is not matched - if (mateVal < 0) - { + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { #pragma omp critical - { - if (candidateMate[v - StartIndex] == u) - { - // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], - verLocPtr[v - StartIndex + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - - candidateMate[v - StartIndex] = w; - + { + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMate(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, + StartIndex, + EndIndex, + GMate, + Mate, + Ghost2LocalMap); + + candidateMate[v - StartIndex] = w; + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")" << v << " Points to: " << w; - fflush(stdout); + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); #endif - // If found a dominating edge: - if (w >= 0) - { - - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost + // If found a dominating edge: + if (w >= 0) { + if ((w < StartIndex) || (w > EndIndex)) { // A ghost #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message:"; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); #endif - option = 2; - - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) - { - option = 1; - Mate[v - StartIndex] = w; // v is a local vertex - GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex - - } // End of if CandidateMate[w] = v - } // End of if a Ghost Vertex - else - { // w is a local vertex - if (candidateMate[w - StartIndex] == v) - { - option = 3; - Mate[v - StartIndex] = w; // v is a local vertex - Mate[w - StartIndex] = v; // w is a local vertex - + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); #endif - } // End of if(CandidateMate(w) = v - } // End of Else - } // End of if(w >=0) - else - option = 4; // End of Else: w == -1 - // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - } // End of If (candidateMate[v-StartIndex] == u - } // End of task - } // mateval < 0 - } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: - else - { // Neighbor is a ghost vertex - + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + #pragma omp critical - { - if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) - candidateMate[NLVer + Ghost2LocalMap[v]] = -1; - if (v != Mate[u - StartIndex]) - option = 5; // u is local - } // End of critical - } // End of Else //A Ghost Vertex - - switch (option) - { - case -1: - // No things to do - break; - case 1: - // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v - privateU.push_back(v); - privateU.push_back(w); - - (*myCard)++; + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); #endif - // Decrement the counter: - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); - case 2: - - // Found a dominating edge, it is a ghost - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - (*NumMessagesBundled)++; - (*msgInd)++; - - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(REQUEST); - privateQOwner.push_back(ghostOwner); - break; - case 3: - privateU.push_back(v); - privateU.push_back(w); - - (*myCard)++; - break; - case 4: - // Could not find a dominating vertex - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost - + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - - PCounter[ghostOwner]++; - (*NumMessagesBundled)++; - (*msgInd)++; - - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop - break; - case 5: - default: - + + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a success message: "; - cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; - fflush(stdout); + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); #endif - - ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - - (*NumMessagesBundled)++; - PCounter[ghostOwner]++; - (*msgInd)++; - - privateQLocalVtx.push_back(u); - privateQGhostVtx.push_back(v); - privateQMsgType.push_back(SUCCESS); - privateQOwner.push_back(ghostOwner); - - break; - } // End of switch - - } // End of inner for - } - } // End of outer for - - queuesTransfer(U, privateU, QLocalVtx, - QGhostVtx, - QMsgType, QOwner, privateQLocalVtx, - privateQGhostVtx, - privateQMsgType, - privateQOwner); - + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + + (*NumMessagesBundled)++; + PCounter[ghostOwner]++; + (*msgInd)++; + + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + #pragma omp critical(U) - { - U.insert(U.end(), privateU.begin(), privateU.end()); - } - - privateU.clear(); - + { + U.insert(U.end(), privateU.begin(), privateU.end()); + } + + privateU.clear(); + #pragma omp critical(sendMessageTransfer) - { - - QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); - QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); - QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); - QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); - } - - privateQLocalVtx.clear(); - privateQGhostVtx.clear(); - privateQMsgType.clear(); - privateQOwner.clear(); - + { + + QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); + QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); + QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); + QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); + } + + privateQLocalVtx.clear(); + privateQGhostVtx.clear(); + privateQMsgType.clear(); + privateQOwner.clear(); + } // End of while ( !U.empty() ) - + #ifdef COUNT_LOCAL_VERTEX printf("Count local vertexes: %ld for thread %d of processor %d\n", localVertices, omp_get_thread_num(), myRank); - + #endif } // End of parallel region } diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 3322a05b..469d7a16 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -1,39 +1,39 @@ #include "MatchBoxPC.h" - +//#define DEBUG_HANG_ void processMatchedVerticesAndSendMessages( - MilanLongInt NLVer, - vector &UChunkBeingProcessed, - vector &U, - vector &privateU, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCard, - MilanLongInt *msgInd, - MilanLongInt *NumMessagesBundled, - MilanLongInt *SPtr, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - MilanLongInt *candidateMate, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner, - MPI_Comm comm, - MilanLongInt *msgActual, - vector &Message) + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message) { MilanLongInt initialSize = QLocalVtx.size(); @@ -50,266 +50,259 @@ void processMatchedVerticesAndSendMessages( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ - firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ - num_threads(NUM_THREAD) \ - reduction(+ \ - : msgInd[:1], PCounter \ - [:numProcs], myCard \ - [:1], NumMessagesBundled \ - [:1], msgActual \ + //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ + privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ + num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1], msgActual \ [:1]) { - while (!U.empty()) - { - - extractUChunk(UChunkBeingProcessed, U, privateU); - - for (MilanLongInt u : UChunkBeingProcessed) - { + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")u: " << u; - fflush(stdout); + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); #endif - if ((u >= StartIndex) && (u <= EndIndex)) - { // Process Only the Local Vertices - + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + #ifdef COUNT_LOCAL_VERTEX - localVertices++; + localVertices++; #endif - - // Get the Adjacency list for u - adj1 = verLocPtr[u - StartIndex]; // Pointer - adj2 = verLocPtr[u - StartIndex + 1]; - for (k = adj1; k < adj2; k++) - { - option = -1; - v = verLocInd[k]; - - if ((v >= StartIndex) && (v <= EndIndex)) - { // If Local Vertex: - + + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; - fflush(stdout); + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); #endif #pragma omp atomic read - mateVal = Mate[v - StartIndex]; - // If the current vertex is pointing to a matched vertex and is not matched - if (mateVal < 0) - { + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { #pragma omp critical - { - if (candidateMate[v - StartIndex] == u) - { - // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], - verLocPtr[v - StartIndex + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - - candidateMate[v - StartIndex] = w; - + { + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMate(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, + StartIndex, + EndIndex, + GMate, + Mate, + Ghost2LocalMap); + + candidateMate[v - StartIndex] = w; + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")" << v << " Points to: " << w; - fflush(stdout); + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); #endif - // If found a dominating edge: - if (w >= 0) - { - - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost + // If found a dominating edge: + if (w >= 0) { + + if ((w < StartIndex) || (w > EndIndex)) { // A ghost #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message:"; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); #endif - option = 2; - - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) - { - option = 1; - Mate[v - StartIndex] = w; // v is a local vertex - GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex - - } // End of if CandidateMate[w] = v - } // End of if a Ghost Vertex - else - { // w is a local vertex - if (candidateMate[w - StartIndex] == v) - { - option = 3; - Mate[v - StartIndex] = w; // v is a local vertex - Mate[w - StartIndex] = v; // w is a local vertex - + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); #endif - } // End of if(CandidateMate(w) = v - } // End of Else - } // End of if(w >=0) - else - option = 4; // End of Else: w == -1 - // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - } // End of If (candidateMate[v-StartIndex] == u - } // End of task - } // mateval < 0 - } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: - else - { // Neighbor is a ghost vertex - + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + #pragma omp critical - { - if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) - candidateMate[NLVer + Ghost2LocalMap[v]] = -1; - if (v != Mate[u - StartIndex]) - option = 5; // u is local - } // End of critical - } // End of Else //A Ghost Vertex - - switch (option) - { - case -1: - // No things to do - break; - case 1: - // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v - privateU.push_back(v); - privateU.push_back(w); - (*myCard)++; + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); #endif - // Decrement the counter: - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); - case 2: - - // Found a dominating edge, it is a ghost - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - - // Build the Message Packet: - // Message[0] = v; // LOCAL - // Message[1] = w; // GHOST - // Message[2] = REQUEST; // TYPE - // Send a Request (Asynchronous) - // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - - (*msgActual)++; - (*msgInd)++; - - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(REQUEST); - privateQOwner.push_back(ghostOwner); - break; - case 3: - privateU.push_back(v); - privateU.push_back(w); - (*myCard)++; - break; - case 4: - // Could not find a dominating vertex - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost - + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - - // Build the Message Packet: - // Message[0] = v; // LOCAL - // Message[1] = w; // GHOST - // Message[2] = FAILURE; // TYPE - // Send a Request (Asynchronous) - // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - - (*msgActual)++; - (*msgInd)++; - - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop - break; - case 5: - default: - + + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a success message: "; - cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; - fflush(stdout); + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); #endif - - ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); - - // Build the Message Packet: - // Message[0] = u; // LOCAL - // Message[1] = v; // GHOST - // Message[2] = SUCCESS; // TYPE - // Send a Request (Asynchronous) - // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - - (*msgActual)++; - (*msgInd)++; - - privateQLocalVtx.push_back(u); - privateQGhostVtx.push_back(v); - privateQMsgType.push_back(SUCCESS); - privateQOwner.push_back(ghostOwner); - - break; - } // End of switch - } // End of inner for - } - } // End of outer for - - queuesTransfer(U, privateU, QLocalVtx, - QGhostVtx, - QMsgType, QOwner, privateQLocalVtx, - privateQGhostVtx, - privateQMsgType, - privateQOwner); - + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + + // Build the Message Packet: + // Message[0] = u; // LOCAL + // Message[1] = v; // GHOST + // Message[2] = SUCCESS; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + } // End of while ( !U.empty() ) - + #ifdef COUNT_LOCAL_VERTEX printf("Count local vertexes: %ld for thread %d of processor %d\n", localVertices, omp_get_thread_num(), myRank); - + #endif } // End of parallel region - + // Send the messages - for (int i = initialSize; i < QOwner.size(); i++) - { - - Message[0] = QLocalVtx[i]; - Message[1] = QGhostVtx[i]; - Message[2] = QMsgType[i]; - ghostOwner = QOwner[i]; - - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); +#ifdef DEBUG_HANG_ + cout << myRank<<" Sending: "<(), ghostOwner, ComputeTag, comm); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); } +#ifdef DEBUG_HANG_ + cout << myRank<<" Done sending messages"<(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); if (error_codeC != MPI_SUCCESS) { @@ -86,70 +88,66 @@ void processMessages( fflush(stdout); } Sender = computeStatus.MPI_SOURCE; - + //cout << " ...from "<(), Sender, BundleTag, comm, &computeStatus); - if (error_codeC != MPI_SUCCESS) - { - MPI_Error_string(error_codeC, error_message, &message_length); - cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n"; - fflush(stdout); - } + // Receive the message + //cout << myRank<<" Receiving from "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n"; + fflush(stdout); + } #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Message Bundle After: " << endl; - for (int i = 0; i < bundleSize; i++) - cout << ReceiveBuffer[i] << ","; - cout << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Message Bundle After: " << endl; + for (int i = 0; i < bundleSize; i++) + cout << ReceiveBuffer[i] << ","; + cout << endl; + fflush(stdout); #endif - } - else - { // Just a single message: + } else { // Just a single message: #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl; + fflush(stdout); #endif - // Add the current message to Queue: - bundleSize = 3; //#of integers in the message - // Build the Message Buffer: - if (!ReceiveBuffer.empty()) - ReceiveBuffer.clear(); // Empty it out first - ReceiveBuffer.resize(bundleSize, -1); // Initialize - - ReceiveBuffer[0] = Message[0]; // u - ReceiveBuffer[1] = Message[1]; // v - ReceiveBuffer[2] = Message[2]; // message_type + // Add the current message to Queue: + bundleSize = 3; //#of integers in the message + // Build the Message Buffer: + if (!ReceiveBuffer.empty()) + ReceiveBuffer.clear(); // Empty it out first + ReceiveBuffer.resize(bundleSize, -1); // Initialize + + ReceiveBuffer[0] = Message[0]; // u + ReceiveBuffer[1] = Message[1]; // v + ReceiveBuffer[2] = Message[2]; // message_type } - + #ifdef DEBUG_GHOST_ - if ((v < StartIndex) || (v > EndIndex)) - { - cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; - fflush(stdout); + if ((v < StartIndex) || (v > EndIndex)) { + cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; + fflush(stdout); } #endif #ifdef PRINT_DEBUG_INFO_ @@ -158,172 +156,160 @@ void processMessages( #endif // Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop - for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) - { - u = ReceiveBuffer[bundleCounter - 3]; // GHOST - v = ReceiveBuffer[bundleCounter - 2]; // LOCAL - message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE - - // CASE I: REQUEST - if (message_type == REQUEST) - { + for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) { + u = ReceiveBuffer[bundleCounter - 3]; // GHOST + v = ReceiveBuffer[bundleCounter - 2]; // LOCAL + message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE + + // CASE I: REQUEST + if (message_type == REQUEST) { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Message type is REQUEST" << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Message type is REQUEST" << endl; + fflush(stdout); #endif #ifdef DEBUG_GHOST_ - if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) - { - cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; - fflush(stdout); - } + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } #endif - if (Mate[v - StartIndex] == -1) - { // Process only if not already matched (v is local) - candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost - if (candidateMate[v - StartIndex] == u) - { - GMate[Ghost2LocalMap[u]] = v; // u is ghost - Mate[v - StartIndex] = u; // v is local - U.push_back(v); - U.push_back(u); - (*myCard)++; + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched (v is local) + candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost + if (candidateMate[v - StartIndex] == u) { + GMate[Ghost2LocalMap[u]] = v; // u is ghost + Mate[v - StartIndex] = u; // v is local + U.push_back(v); + U.push_back(u); + (*myCard)++; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl; + fflush(stdout); #endif - - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); - } // End of if ( candidateMate[v-StartIndex] == u )e - } // End of if ( Mate[v] == -1 ) - } // End of REQUEST - else - { // CASE II: SUCCESS - if (message_type == SUCCESS) - { + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); + } // End of if ( candidateMate[v-StartIndex] == u )e + } // End of if ( Mate[v] == -1 ) + } // End of REQUEST + else { // CASE II: SUCCESS + if (message_type == SUCCESS) { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Message type is SUCCESS" << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Message type is SUCCESS" << endl; + fflush(stdout); #endif - GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); #ifdef DEBUG_GHOST_ - if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) - { - cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; - fflush(stdout); - } + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } #endif - if (Mate[v - StartIndex] == -1) - { // Process only if not already matched ( v is local) - if (candidateMate[v - StartIndex] == u) - { - // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); - candidateMate[v - StartIndex] = w; + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched ( v is local) + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, + verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; - fflush(stdout); + cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; + fflush(stdout); #endif - // If found a dominating edge: - if (w >= 0) - { - if ((w < StartIndex) || (w > EndIndex)) - { // w is a ghost - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = REQUEST; // TYPE - // Send a Request (Asynchronous) + // If found a dominating edge: + if (w >= 0) { + if ((w < StartIndex) || (w > EndIndex)) { + // w is a ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Sending a request message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); #endif - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - //assert(ghostOwner != -1); - //assert(ghostOwner != myRank); - - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - (*msgInd)++; - (*msgActual)++; - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) - { - Mate[v - StartIndex] = w; // v is local - GMate[Ghost2LocalMap[w]] = v; // w is ghost - U.push_back(v); - U.push_back(w); - (*myCard)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + Mate[v - StartIndex] = w; // v is local + GMate[Ghost2LocalMap[w]] = v; // w is ghost + U.push_back(v); + U.push_back(w); + (*myCard)++; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); #endif - - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); - } // End of if CandidateMate[w] = v - } // End of if a Ghost Vertex - else - { // w is a local vertex - if (candidateMate[w - StartIndex] == v) - { - Mate[v - StartIndex] = w; // v is local - Mate[w - StartIndex] = v; // w is local - // Q.push_back(u); - U.push_back(v); - U.push_back(w); - (*myCard)++; + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + Mate[v - StartIndex] = w; // v is local + Mate[w - StartIndex] = v; // w is local + // Q.push_back(u); + U.push_back(v); + U.push_back(w); + (*myCard)++; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); #endif - } // End of if(CandidateMate(w) = v - } // End of Else - } // End of if(w >=0) - else - { // No dominant edge found - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = FAILURE; // TYPE - // Send a Request (Asynchronous) + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else { // No dominant edge found + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { + // A ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); #endif - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - //assert(ghostOwner != -1); - //assert(ghostOwner != myRank); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - (*msgInd)++; - (*msgActual)++; - } // End of if(GHOST) - } // End of for loop - } // End of Else: w == -1 + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + } // End of if(GHOST) + } // End of for loop + } // End of Else: w == -1 // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - } // End of if ( candidateMate[v-StartIndex] == u ) - } // End of if ( Mate[v] == -1 ) - } // End of if ( message_type == SUCCESS ) - else - { // CASE III: FAILURE + } // End of if ( candidateMate[v-StartIndex] == u ) + } // End of if ( Mate[v] == -1 ) + } // End of if ( message_type == SUCCESS ) + else { + // CASE III: FAILURE #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Message type is FAILURE" << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Message type is FAILURE" << endl; + fflush(stdout); #endif - GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter - } // End of else: CASE III - } // End of else: CASE I + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter + } // End of else: CASE III + } // End of else: CASE I } - + return; -} \ No newline at end of file +} diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index 7200b43d..33c65749 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -32,4 +32,5 @@ void queuesTransfer(vector &U, privateQGhostVtx.clear(); privateQMsgType.clear(); privateQOwner.clear(); -} \ No newline at end of file + +} diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index f7fd2f78..80a88b94 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -38,108 +38,107 @@ void sendBundledMessages(MilanLongInt *numGhostEdges, #pragma omp task depend(inout \ : PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \ : NumMessagesBundled, numProcs) - {try { + { + try { PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message - PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure - PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets -} -catch (length_error) -{ - cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; - cout << "Not enough memory to allocate the internal variables \n"; - exit(1); -} -PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize -PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable -PSizeInfoMessages.resize(numProcs * 3, 0); -} + PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure + PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize + PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable + PSizeInfoMessages.resize(numProcs * 3, 0); + } #pragma omp task depend(inout \ - : PCumulative) depend(in \ + : PCumulative) depend(in \ : PCounter) -{ - for (i = 0; i < numProcs; i++) - PCumulative[i + 1] = PCumulative[i] + PCounter[i]; -} - + { + for (i = 0; i < numProcs; i++) + PCumulative[i + 1] = PCumulative[i] + PCounter[i]; + } + #pragma omp task depend(inout \ : PCounter) -{ - // Reuse PCounter to keep track of how many messages were inserted: - for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers! - PCounter[i] = 0; -} + { + // Reuse PCounter to keep track of how many messages were inserted: + for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers! + PCounter[i] = 0; + } // Build the Message Bundle packet: #pragma omp task depend(in \ : PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \ : myIndex, PMessageBundle, PCounter) { - for (i = 0; i < NumMessagesBundled; i++) - { - myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3; - PMessageBundle[myIndex + 0] = QLocalVtx[i]; - PMessageBundle[myIndex + 1] = QGhostVtx[i]; - PMessageBundle[myIndex + 2] = QMsgType[i]; - PCounter[QOwner[i]]++; - } -} - + for (i = 0; i < NumMessagesBundled; i++) { + myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3; + PMessageBundle[myIndex + 0] = QLocalVtx[i]; + PMessageBundle[myIndex + 1] = QGhostVtx[i]; + PMessageBundle[myIndex + 2] = QMsgType[i]; + PCounter[QOwner[i]]++; + } + } + // Send the Bundled Messages: Use ISend #pragma omp task depend(out \ : SRequest, SStatus) -{ - try - { - SRequest.reserve(numProcs * 2); // At most two messages per processor - SStatus.reserve(numProcs * 2); // At most two messages per processor - } - catch (length_error) - { - cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; - cout << "Not enough memory to allocate the internal variables \n"; - exit(1); - } -} - + { + try + { + SRequest.reserve(numProcs * 2); // At most two messages per processor + SStatus.reserve(numProcs * 2); // At most two messages per processor + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + } + // Send the Messages #pragma omp task depend(inout \ : SRequest, PSizeInfoMessages, PCumulative) depend(out \ : *msgActual, *msgInd) { - for (i = 0; i < numProcs; i++) - { // Changed by Fabio to be an integer, addresses needs to be integers! - if (i == myRank) // Do not send anything to yourself - continue; - // Send the Message with information about the size of next message: - // Build the Message Packet: - PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message - PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet - PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE - // Send a Request (Asynchronous) + for (i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers! + if (i == myRank) // Do not send anything to yourself + continue; + // Send the Message with information about the size of next message: + // Build the Message Packet: + PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message + PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet + PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE + // Send a Request (Asynchronous) #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl; + fflush(stdout); #endif - if (PSizeInfoMessages[i * 3 + 0] > 0) - { // Send only if it is a nonempty packet - MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap(), i, ComputeTag, comm, - &SRequest[(*msgInd)]); - (*msgActual)++; - (*msgInd)++; - // Now Send the message with the data packet: + if (PSizeInfoMessages[i * 3 + 0] > 0) + { // Send only if it is a nonempty packet + MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap(), i, ComputeTag, comm, + &SRequest[(*msgInd)]); + (*msgActual)++; + (*msgInd)++; + // Now Send the message with the data packet: #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl; - for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++) - cout << PMessageBundle[k] << ","; - cout << endl; - fflush(stdout); + cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl; + for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++) + cout << PMessageBundle[k] << ","; + cout << endl; + fflush(stdout); #endif - MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0], - TypeMap(), i, BundleTag, comm, &SRequest[(*msgInd)]); - (*msgInd)++; - } // End of if size > 0 - } + MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0], + TypeMap(), i, BundleTag, comm, &SRequest[(*msgInd)]); + (*msgInd)++; + } // End of if size > 0 + } } #pragma omp task depend(inout \ @@ -147,64 +146,64 @@ PSizeInfoMessages.resize(numProcs * 3, 0); { // Free up temporary memory: - PCumulative.clear(); - QLocalVtx.clear(); - QGhostVtx.clear(); - QMsgType.clear(); - QOwner.clear(); + PCumulative.clear(); + QLocalVtx.clear(); + QGhostVtx.clear(); + QMsgType.clear(); + QOwner.clear(); } #pragma omp task depend(inout : OneMessageSize, *BufferSize) depend(out : numMessagesToSend) depend(in : *numGhostEdges) { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; - cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2; - cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled; - if (*numGhostEdges > 0) + cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; + cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2; + cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled; + if (*numGhostEdges > 0) { - cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n"; + cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n"; } - fflush(stdout); + fflush(stdout); #endif - // Allocate memory for MPI Send messages: - /* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */ - OneMessageSize = 0; - MPI_Pack_size(3, TypeMap(), comm, &OneMessageSize); // Size of one message packet - // How many messages to send? - // Potentially three kinds of messages will be sent/received: - // Request, Success, Failure. - // But only two will be sent from a given processor. - // Substract the number of messages that have already been sent as bundled messages: - numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled; - *BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend; + // Allocate memory for MPI Send messages: + /* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */ + OneMessageSize = 0; + MPI_Pack_size(3, TypeMap(), comm, &OneMessageSize); // Size of one message packet + // How many messages to send? + // Potentially three kinds of messages will be sent/received: + // Request, Success, Failure. + // But only two will be sent from a given processor. + // Substract the number of messages that have already been sent as bundled messages: + numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled; + *BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend; } #pragma omp task depend(out : Buffer) depend(in : *BufferSize) -{ - Buffer = 0; + { + Buffer = 0; #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize; - cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD; - cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; - cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend; - cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize); - cout << "\n(" << myRank << ")Attaching Buffer on.. "; - fflush(stdout); + cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize; + cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD; + cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; + cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend; + cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize); + cout << "\n(" << myRank << ")Attaching Buffer on.. "; + fflush(stdout); #endif - if ((*BufferSize) > 0) - { - Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory - if (Buffer == 0) - { - cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; - cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n"; - exit(1); - } - MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer - } + if ((*BufferSize) > 0) + { + Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory + if (Buffer == 0) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n"; + exit(1); + } + MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer + } + } } } } -} \ No newline at end of file