From 9b065602a8573eaf0c0f8f105e5b44ccb4fcc203 Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Wed, 20 Jul 2022 16:24:37 -0500 Subject: [PATCH] Fixed race condition in processExposedVertices --- .../impl/aggregator/processExposedVertex.cpp | 200 ++++++++++-------- 1 file changed, 108 insertions(+), 92 deletions(-) diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index c28a7c66..c53f2f53 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -32,14 +32,14 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, { MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; - MilanInt ghostOwner = 0; + MilanInt ghostOwner = 0, option; -#pragma omp parallel private(k, w, v, k1, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) { -#pragma omp for reduction(+ \ - : PCounter[:numProcs]) schedule(static) +#pragma omp for reduction(+ : PCounter[:numProcs]) schedule(static) for (v = 0; v < NLVer; v++) { + option = -1; // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) k = candidateMate[v]; candidateMate[v] = verLocInd[k]; @@ -58,29 +58,75 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, if (w >= 0) { - if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) - { - w = computeCandidateMate(verLocPtr[v], - verLocPtr[v + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - candidateMate[v] = w; - } - - if (w >= 0) +#pragma omp critical(processExposed) { + if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) + { + w = computeCandidateMate(verLocPtr[v], + verLocPtr[v + 1], + edgeLocWeight, 0, + verLocInd, + StartIndex, + EndIndex, + GMate, + Mate, + Ghost2LocalMap); + candidateMate[v] = w; + } + + if (w >= 0) + { #pragma omp atomic - (*myCard)++; + (*myCard)++; + if ((w < StartIndex) || (w > EndIndex)) + { // w is a ghost vertex + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) + { + option = 1; + Mate[v] = w; + GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost + + } // End of if CandidateMate[w] = v + + } // End of if a Ghost Vertex + else + { // w is a local vertex + + if (candidateMate[w - StartIndex] == (v + StartIndex)) + { + option = 3; + Mate[v] = w; // v is local + Mate[w - StartIndex] = v + StartIndex; // w is local + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; + fflush(stdout); +#endif + + } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) + } // End of Else + + } // End of second if + + } // End critical processExposed + + } // End of if(w >=0) + else + { + // This piece of code is executed a really small amount of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) + { + w = verLocInd[k1]; if ((w < StartIndex) || (w > EndIndex)) - { // w is a ghost vertex + { // A ghost + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message (291):"; - cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); fflush(stdout); #endif #pragma omp atomic @@ -94,86 +140,56 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, privateQLocalVtx.push_back(v + StartIndex); privateQGhostVtx.push_back(w); - privateQMsgType.push_back(REQUEST); + privateQMsgType.push_back(FAILURE); privateQOwner.push_back(ghostOwner); - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) - { - - privateU.push_back(v + StartIndex); - privateU.push_back(w); - Mate[v] = w; - // FIXME could this instruction create errors? - GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost - -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")"; - fflush(stdout); -#endif - - // TODO refactor this!! - // Decrement the counter: - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); - } // End of if CandidateMate[w] = v - - } // End of if a Ghost Vertex - else - { // w is a local vertex - - if (candidateMate[w - StartIndex] == (v + StartIndex)) - { - privateU.push_back(v + StartIndex); - privateU.push_back(w); + } // End of if(GHOST) + } // End of for loop + } + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - Mate[v] = w; // v is local - // FIXME this instruction could create errors - Mate[w - StartIndex] = v + StartIndex; // w is local + switch (option) + { + case -1: + break; + case 1: + privateU.push_back(v + StartIndex); + privateU.push_back(w); #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; - fflush(stdout); + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")"; + fflush(stdout); #endif - } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) - } // End of Else - - continue; - } // End of second if - - } // End of if(w >=0) - - // This piece of code is executed a really small amount of times, I will not allocate a - // huge amount of memory for the private data structures. - adj11 = verLocPtr[v]; - adj12 = verLocPtr[v + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost - + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + case 2: #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a request message (291):"; + cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); #endif #pragma omp atomic - (*msgInd)++; + (*msgInd)++; #pragma omp atomic - (*NumMessagesBundled)++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - - privateQLocalVtx.push_back(v + StartIndex); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop - // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + default: + privateU.push_back(v + StartIndex); + privateU.push_back(w); + break; + } + } // End of for ( v=0; v < NLVer; v++ ) queuesTransfer(U, privateU, QLocalVtx,