diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 6e58d724..f747f1fc 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -1,7 +1,6 @@ #include "MatchBoxPC.h" #include #include - // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching @@ -314,17 +313,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #endif - /* - * Not parallelizable - */ + /* + * Not parallelizable + */ - for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) - verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; + for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) + verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; #ifdef PRINT_DEBUG_INFO_ - cout< 0) { + if (Counter[Ghost2LocalMap[w]] > 0) { Counter[Ghost2LocalMap[w]] -= 1; //Decrement if (Counter[Ghost2LocalMap[w]] == 0) { @@ -614,6 +613,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } //End of if(w >=0) + //This piece of code is executed a really small amount of times, I will not allocate a + //huge amount of memory to the private data structures. adj11 = verLocPtr[v]; adj12 = verLocPtr[v + 1]; for (k1 = adj11; k1 < adj12; k1++) { @@ -632,28 +633,16 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( assert(ghostOwner != -1); assert(ghostOwner != myRank); PCounter[ghostOwner]++; - privateQLocalVtx.push_back(v + StartIndex); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); + QLocalVtx.push_back(v + StartIndex); + QGhostVtx.push_back(w); + QMsgType.push_back(FAILURE); + QOwner.push_back(ghostOwner); } //End of if(GHOST) } //End of for loop //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) } //End of for ( v=0; v < NLVer; v++ ) -#pragma omp critical(privateMsg) - { - while (!privateQLocalVtx.empty()) { - - QLocalVtx.push_back(privateQLocalVtx.pop_back()); - QGhostVtx.push_back(privateQGhostVtx.pop_back()); - QMsgType.push_back(privateQMsgType.pop_back()); - QOwner.push_back(privateQOwner.pop_back()); - - } - } - #pragma omp critical(U) { while (!privateU.empty()) @@ -668,261 +657,258 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< Us; - Us.reserve(UCHUNK); + //TODO what would be the optimal UCHUNK + vector Us; + Us.reserve(UCHUNK); - while( true ) { + while( true ) { - Us.clear(); + Us.clear(); #pragma omp critical(U) - { - //If U is emptu and there are no new node to add to U - if (U.empty() && privateU.empty()) - isEmpty = true; - else { - if (U.empty() && !privateU.empty()) // If U is empty but there are nodes in private U - while (!privateU.empty()) { - U.push_back(privateU.pop_front()); - myCard += privateMyCard; + { + //If U is emptu and there are no new node to add to U + if (U.empty() && privateU.empty()) + isEmpty = true; + else { + if (U.empty() && !privateU.empty()) // If U is empty but there are nodes in private U + while (!privateU.empty()) { + U.push_back(privateU.pop_front()); + myCard += privateMyCard; + } + for (int i = 0; i < UCHUNK; i++) { // Pop the new nodes + if (U.empty()) break; + Us.push_back(U.pop_front()); } - for (int i = 0; i < UCHUNK; i++) { // Pop the new nodes - if (U.empty()) break; - Us.push_back(U.pop_front()); } - } - } // End of critical U - if (isEmpty) break; + } // End of critical U + if (isEmpty) break; - for (MilanLongInt u : Us) - { + for (MilanLongInt u : Us) + { #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("<= StartIndex) && (u <= EndIndex)) { //Process Only the Local Vertices + if ((u >= StartIndex) && (u <= EndIndex)) { //Process Only the Local Vertices #ifdef COUNT_LOCAL_VERTEX - localVertices ++; + localVertices ++; #endif - //Get the Adjacency list for u - adj1 = verLocPtr[u - StartIndex]; //Pointer - adj2 = verLocPtr[u - StartIndex + 1]; - for (k = adj1; k < adj2; k++) { - v = verLocInd[k]; - - if ((v >= StartIndex) && (v <= EndIndex)) { //If Local Vertex: -#pragma omp critical(innerProcessMatched) - { + //Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; //Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + v = verLocInd[k]; if ((v >= StartIndex) && (v <= EndIndex)) { //If Local Vertex: +#pragma omp critical(innerProcessMatched) + { - - //If the current vertex is pointing to a matched vertex and is not matched - //FIXME is there a way to make candidateMate private? - // for the moment it could generate an error. - if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap) and - candidateMate[v - StartIndex] == u) { - - - //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - //Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], - verLocPtr[v - StartIndex + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - - candidateMate[v - StartIndex] = w; - - //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("<= 0) { - - //TODO is it possible to lock without a critical region? - //TODO there must be a more elegant and efficient way to do this - while(true) { - if (omp_test_lock(&MateLock[v - StartIndex])) { - if (omp_test_lock(&MateLock[w - StartIndex])) break; - else omp_unset_lock(&MateLock[v - StartIndex]); - } - } - if ((w < StartIndex) || (w > EndIndex)) { //A ghost -#ifdef PRINT_DEBUG_INFO_ - cout<<"\n("<= 0) { - omp_unset_lock(&MateLock[v - StartIndex]); - omp_unset_lock(&MateLock[w - StartIndex]); + //TODO is it possible to lock without a critical region? + //TODO there must be a more elegant and efficient way to do this + while(true) { + if (omp_test_lock(&MateLock[v - StartIndex])) { + if (omp_test_lock(&MateLock[w - StartIndex])) break; + else omp_unset_lock(&MateLock[v - StartIndex]); + } + } - } //End of if(w >=0) - else { - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) { //A ghost + if ((w < StartIndex) || (w > EndIndex)) { //A ghost #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< 0) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if (Counter[Ghost2LocalMap[w]] == 0) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0) { - Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement - if (Counter[Ghost2LocalMap[w]] == 0) { - S--; //Decrement S -#ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if (candidateMate[w - StartIndex] == v) { + Mate[v - StartIndex] = w; //v is a local vertex + Mate[w - StartIndex] = v; //w is a local vertex + //Q.push_back(u); + privateU.push_back(v); + privateU.push_back(w); + privateMyCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { //A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + /* MPI_Bsend(&Message[0], 3, MPI_INT, inputSubGraph.findOwner(w), + ComputeTag, comm); */ + QLocalVtx.push_back(v); + QGhostVtx.push_back(w); + QMsgType.push_back(FAILURE); + //ghostOwner = inputSubGraph.findOwner(w); + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); + QOwner.push_back(ghostOwner); + PCounter[ghostOwner]++; + NumMessagesBundled++; + msgInd++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + + } //End of If (candidateMate[v-StartIndex] == u + + } //End of critical region if + + } //End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor is a ghost vertex #pragma omp critical(innerProcessMatched) - { + { - while(!omp_test_lock(&MateLock[u - StartIndex])); + while(!omp_test_lock(&MateLock[u - StartIndex])); - if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) - candidateMate[NLVer + Ghost2LocalMap[v]] = -1; - if (v != Mate[u - StartIndex]) { //u is local - //Build the Message Packet: - //Message[0] = u; //LOCAL - //Message[1] = v; //GHOST - //Message[2] = SUCCESS; //TYPE - //Send a Request (Asynchronous) + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) { //u is local + //Build the Message Packet: + //Message[0] = u; //LOCAL + //Message[1] = v; //GHOST + //Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) - if (candidateMate[w - StartIndex] == v) { - Mate[v - StartIndex] = w; //v is a local vertex - Mate[w - StartIndex] = v; //w is a local vertex - privateU.push_back(v); - privateU.push_back(w); - privateMyCard++; #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of if ( (u >= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex - //Avoid to ask for the critical section if there is nothing to add - if (privateU.size() < UCHUNK && !U.empty()) continue; + //Avoid to ask for the critical section if there is nothing to add + if (privateU.size() < UCHUNK && !U.empty()) continue; #pragma omp critical(U) - { - while (!privateU.empty()) { - U.push_back(privateU.pop_front()); - } + { + while (!privateU.empty()) { + U.push_back(privateU.pop_front()); + } - myCard += privateMyCard; - } //End of critical U + myCard += privateMyCard; + } //End of critical U - } - } //End of while ( /*!Q.empty()*/ !U.empty() ) + } + } //End of while ( /*!Q.empty()*/ !U.empty() ) #pragma omp critical(privateMsg) { @@ -938,20 +924,20 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #ifdef COUNT_LOCAL_VERTEX - printf("Count local vertexes: %ld for thread %d of processor %d\n", + printf("Count local vertexes: %ld for thread %d of processor %d\n", localVertices, omp_get_thread_num(), myRank); #endif - ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// #ifdef DEBUG_HANG_ - if (myRank == 0) cout<<"\n("< 0 ) { - Buffer = (MilanLongInt *) malloc(BufferSize); //Allocate memory - if ( Buffer == 0 ) { - cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; - cout<<"Not enough memory to allocate for send buffer on process "< 0 ) { + Buffer = (MilanLongInt *) malloc(BufferSize); //Allocate memory + if ( Buffer == 0 ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate for send buffer on process "< &GMate, - MilanLongInt* Mate, - map &Ghost2LocalMap - ) { + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt* Mate, + map &Ghost2LocalMap +) { bool result = false; #pragma omp critical(Mate) @@ -1776,15 +1762,15 @@ inline bool isAlreadyMatched(MilanLongInt node, * @return */ inline MilanLongInt computeCandidateMate(MilanLongInt adj1, - MilanLongInt adj2, - MilanReal* edgeLocWeight, - MilanLongInt k, - MilanLongInt* verLocInd, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - vector & GMate, - MilanLongInt* Mate, - map & Ghost2LocalMap) + MilanLongInt adj2, + MilanReal* edgeLocWeight, + MilanLongInt k, + MilanLongInt* verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector & GMate, + MilanLongInt* Mate, + map & Ghost2LocalMap) { MilanInt w = -1; MilanReal heaviestEdgeWt = MilanRealMin; //Assign the smallest Value possible first LDBL_MIN