diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index d1e26fbc..58053c18 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -66,7 +66,7 @@ using namespace std; #define NUM_THREAD 4 -#define UCHUNK 100000 +#define UCHUNK 1000 const MilanLongInt REQUEST = 1; const MilanLongInt SUCCESS = 2; @@ -323,7 +323,8 @@ extern "C" staticQueue &privateQLocalVtx, staticQueue &privateQGhostVtx, staticQueue &privateQMsgType, - staticQueue &privateQOwner); + staticQueue &privateQOwner, + omp_lock_t *MateLock); void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, MilanInt *BufferSizePtr, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index d5ac4394..ffe27f68 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -281,6 +281,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( vector UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); +//#define PRINT_DEBUG_INFO_ processMatchedVertices(NLVer, UChunkBeingProcessed, U, @@ -310,7 +311,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( privateQLocalVtx, privateQGhostVtx, privateQMsgType, - privateQOwner); + privateQOwner, + MateLock); ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////// SEND BUNDLED MESSAGES ///////////////////////////////////// diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index d766bc42..d92f1a57 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -32,30 +32,24 @@ void processMatchedVertices( staticQueue &privateQLocalVtx, staticQueue &privateQGhostVtx, staticQueue &privateQMsgType, - staticQueue &privateQOwner) + staticQueue &privateQOwner, + omp_lock_t *MateLock) { MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; MilanLongInt myCard = *myCardPtr, msgInd = *msgIndPtr, NumMessagesBundled = *NumMessagesBundledPtr, S = *SPtr, privateMyCard = 0; - // TODO check that the queues arrives empty - assert(privateQGhostVtx.empty()); - assert(privateQLocalVtx.empty()); - assert(privateQMsgType.empty()); - assert(privateQOwner.empty()); - -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateMyCard, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) - { - #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << "=========================************===============================" << endl; - fflush(stdout); - fflush(stdout); + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); #endif #ifdef COUNT_LOCAL_VERTEX - MilanLongInt localVertices = 0; + MilanLongInt localVertices = 0; #endif +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateMyCard, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) + { // TODO what would be the optimal UCHUNK // TODO refactor @@ -89,23 +83,29 @@ void processMatchedVertices( if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: -#pragma omp critical(innerProcessMatched) - { #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; - fflush(stdout); + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); #endif - // If the current vertex is pointing to a matched vertex and is not matched - // FIXME is there a way to make candidateMate private? - // for the moment it could generate an error. - if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap) and - candidateMate[v - StartIndex] == u) + // If the current vertex is pointing to a matched vertex and is not matched + // FIXME is there a way to make candidateMate private? + // for the moment it could generate an error. + if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) + { + + bool seh = false; +#pragma omp critical(prova) { + seh = candidateMate[v - StartIndex] != u; + } + if (seh) + continue; +#pragma omp critical(prova) + { // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, 0, @@ -117,48 +117,44 @@ void processMatchedVertices( Ghost2LocalMap); candidateMate[v - StartIndex] = w; + } - // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")" << v << " Points to: " << w; - fflush(stdout); + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); #endif - // If found a dominating edge: - if (w >= 0) - { - - // TODO is it possible to lock without a critical region? - // TODO there must be a more elegant and efficient way to do this - /* - while(true) { - if (omp_test_lock(&MateLock[v - StartIndex])) { - if (omp_test_lock(&MateLock[w - StartIndex])) break; - else omp_unset_lock(&MateLock[v - StartIndex]); - } - } - */ + // If found a dominating edge: + if (w >= 0) + { - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost + if ((w < StartIndex) || (w > EndIndex)) + { // A ghost #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message:"; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); #endif - msgInd++; - NumMessagesBundled++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - PCounter[ghostOwner]++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; +#pragma omp atomic + msgInd++; +#pragma omp atomic + NumMessagesBundled++; - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(REQUEST); - privateQOwner.push_back(ghostOwner); + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); +#pragma omp critical(prova) + { if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + while (!omp_test_lock(&MateLock[v - StartIndex])) + ; Mate[v - StartIndex] = w; // v is a local vertex GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex // Q.push_back(u); @@ -170,16 +166,23 @@ void processMatchedVertices( fflush(stdout); #endif - // TODO refactor this // Decrement the counter: PROCESS_CROSS_EDGE(Counter, Ghost2LocalMap[w], &S); - + omp_unset_lock(&MateLock[v - StartIndex]); } // End of if CandidateMate[w] = v - } // End of if a Ghost Vertex - else - { // w is a local vertex + } + } // End of if a Ghost Vertex + else + { // w is a local vertex +#pragma omp critical(prova) + { if (candidateMate[w - StartIndex] == v) { + while (!omp_test_lock(&MateLock[v - StartIndex])) + ; + while (!omp_test_lock(&MateLock[w - StartIndex])) + ; + Mate[v - StartIndex] = w; // v is a local vertex Mate[w - StartIndex] = v; // w is a local vertex // Q.push_back(u); @@ -190,121 +193,120 @@ void processMatchedVertices( cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; fflush(stdout); #endif + omp_unset_lock(&MateLock[v - StartIndex]); + omp_unset_lock(&MateLock[w - StartIndex]); } // End of if(CandidateMate(w) = v - } // End of Else - - // omp_unset_lock(&MateLock[v - StartIndex]); - // omp_unset_lock(&MateLock[w - StartIndex]); + } + } // End of Else - } // End of if(w >=0) - else + } // End of if(w >=0) + else + { + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) + { // A ghost #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - msgInd++; - NumMessagesBundled++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - privateQLocalVtx.push_back(v); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; +#pragma omp atomic + msgInd++; +#pragma omp atomic + NumMessagesBundled++; - } // End of if(GHOST) - } // End of for loop - } // End of Else: w == -1 - // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); - } // End of If (candidateMate[v-StartIndex] == u + } // End of if(GHOST) + } // End of for loop + } // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - } // End of critical region if + } // End of If (candidateMate[v-StartIndex] == u } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: else { // Neighbor is a ghost vertex -#pragma omp critical(innerProcessMatched) + while (!omp_test_lock(&MateLock[u - StartIndex])) + ; +#pragma omp critical(prova) { - - // while(!omp_test_lock(&MateLock[u - StartIndex])); - if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) candidateMate[NLVer + Ghost2LocalMap[v]] = -1; - if (v != Mate[u - StartIndex]) - { // u is local - // Build the Message Packet: - // Message[0] = u; //LOCAL - // Message[1] = v; //GHOST - // Message[2] = SUCCESS; //TYPE - // Send a Request (Asynchronous) + } + if (v != Mate[u - StartIndex]) + { // u is local #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a success message: "; - cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; - fflush(stdout); + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); #endif - msgInd++; - NumMessagesBundled++; - ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - - privateQLocalVtx.push_back(u); - privateQGhostVtx.push_back(v); - privateQMsgType.push_back(SUCCESS); - privateQOwner.push_back(ghostOwner); - - } // End of If( v != Mate[u] ) + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; +#pragma omp atomic + msgInd++; +#pragma omp atomic + NumMessagesBundled++; - // omp_unset_lock(&MateLock[u - StartIndex]); + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); - } // End of critical region - } // End of Else //A Ghost Vertex + } // End of If( v != Mate[u] ) - } // End of For Loop adj(u) + omp_unset_lock(&MateLock[u - StartIndex]); - } // End of if ( (u >= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } // End of Else //A Ghost Vertex - // Ask for the critical section only when a certain amount - // of data have been accumulated in the private queue - if (privateU.size() < UCHUNK && !U.empty()) - continue; + } // End of for - printf("Executed \n"); -#ifdef error + // TODO commenting that part of code might generate errors + // Ask for the critical section only when there are no more data to + // compute. + if (/*privateU.size() < UCHUNK &&*/ !U.empty()) + continue; #pragma omp critical(U) - { - while (!privateU.empty()) - U.push_back(privateU.pop_back()); - } -#endif + { + while (!privateU.empty()) + U.push_back(privateU.pop_back()); + } + #ifndef error - queuesTransfer(U, privateU, QLocalVtx, - QGhostVtx, - QMsgType, QOwner, privateQLocalVtx, - privateQGhostVtx, - privateQMsgType, - privateQOwner); +#pragma omp critical(privateMsg) + { + while (!privateQLocalVtx.empty()) + { + QLocalVtx.push_back(privateQLocalVtx.pop_back()); + QGhostVtx.push_back(privateQGhostVtx.pop_back()); + QMsgType.push_back(privateQMsgType.pop_back()); + QOwner.push_back(privateQOwner.pop_back()); + } + } #endif + } } } // End of while ( !U.empty() ) - queuesTransfer(U, privateU, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, @@ -329,4 +331,4 @@ void processMatchedVertices( *msgIndPtr = msgInd; *NumMessagesBundledPtr = NumMessagesBundled; *SPtr = S; -} \ No newline at end of file +}