From aa45e2fe936db3d150aedb01409dfe933984cb5e Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Sat, 23 Jul 2022 05:14:26 -0500 Subject: [PATCH] processMatchedVerticesAndSendMessages.cpp unoptimized --- amgprec/impl/aggregator/Makefile | 2 +- amgprec/impl/aggregator/MatchBoxPC.h | 46 ++- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 178 +------- .../aggregator/processMatchedVertices.cpp | 78 +--- .../processMatchedVerticesAndSendMessages.cpp | 380 ++++++++++++++++++ amgprec/impl/aggregator/processMessages.cpp | 16 +- 6 files changed, 435 insertions(+), 265 deletions(-) create mode 100644 amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp diff --git a/amgprec/impl/aggregator/Makefile b/amgprec/impl/aggregator/Makefile index f1760822..b3b1ac94 100644 --- a/amgprec/impl/aggregator/Makefile +++ b/amgprec/impl/aggregator/Makefile @@ -70,6 +70,7 @@ findOwnerOfGhost.o \ computeCandidateMate.o \ parallelComputeCandidateMateB.o \ processMatchedVertices.o \ +processMatchedVerticesAndSendMessages.o \ processCrossEdge.o \ queueTransfer.o \ processMessages.o \ @@ -77,7 +78,6 @@ processExposedVertex.o \ algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o \ algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o - OBJS = $(FOBJS) $(MPCOBJS) LIBNAME=libamg_prec.a diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index cb7d95e2..8bba9540 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -65,8 +65,8 @@ using namespace std; -const int NUM_THREAD = 2; -const int UCHUNK = 50; +const int NUM_THREAD = 4; +const int UCHUNK = 10; const MilanLongInt REQUEST = 1; const MilanLongInt SUCCESS = 2; @@ -293,6 +293,38 @@ extern "C" MilanLongInt *SPtr); void processMatchedVertices( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + staticQueue &U, + staticQueue &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + staticQueue &privateQLocalVtx, + staticQueue &privateQGhostVtx, + staticQueue &privateQMsgType, + staticQueue &privateQOwner); + + void processMatchedVerticesAndSendMessages( MilanLongInt NLVer, vector &UChunkBeingProcessed, staticQueue &U, @@ -323,11 +355,11 @@ extern "C" staticQueue &privateQGhostVtx, staticQueue &privateQMsgType, staticQueue &privateQOwner, - bool sendMessages = false, - MPI_Comm comm = NULL, - MilanLongInt *msgActual = nullptr, - MilanLongInt *msgInd = nullptr, - vector &Message = DEFAULT_VECTOR); + bool sendMessages, + MPI_Comm comm, + MilanLongInt *msgActual, + MilanLongInt *msgInd, + vector &Message); void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, MilanInt *BufferSizePtr, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 99fd57c3..d8e8bfb7 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -368,11 +368,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - ///* -//#define error -#ifdef error - processMatchedVertices(NLVer, + processMatchedVerticesAndSendMessages(NLVer, UChunkBeingProcessed, U, privateU, @@ -407,179 +404,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( &msgActual, &msgInd, Message); -#endif -#ifndef error - - while (!U.empty()) - { - - u = U.pop_front(); // Get an element from the queue -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")u: " << u; - fflush(stdout); -#endif - if ((u >= StartIndex) && (u <= EndIndex)) - { // Process Only If a Local Vertex - // Get the Adjacency list for u - adj1 = verLocPtr[u - StartIndex]; // Pointer - adj2 = verLocPtr[u - StartIndex + 1]; - for (k = adj1; k < adj2; k++) - { - v = verLocInd[k]; - if ((v >= StartIndex) && (v <= EndIndex)) - { // v is a Local Vertex: - // if (Mate[v - StartIndex] >= 0) // v is already matched - // continue; -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; - fflush(stdout); -#endif - // If the current vertex is pointing to a matched vertex and is not matched - if (Mate[v - StartIndex] < 0) - { - if (candidateMate[v - StartIndex] == u) - { // Only if pointing to the matched vertex - // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], - verLocPtr[v - StartIndex + 1], - edgeLocWeight, 0, - verLocInd, - StartIndex, - EndIndex, - GMate, - Mate, - Ghost2LocalMap); - - candidateMate[v - StartIndex] = w; - // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")" << v << " Points to: " << w; - fflush(stdout); -#endif - // If found a dominating edge: - if (w >= 0) - { - if ((w < StartIndex) || (w > EndIndex)) - { // w is a ghost - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = REQUEST; // TYPE - // Send a Request (Asynchronous) -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a request message:"; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); -#endif - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - msgInd++; - msgActual++; - if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) - { - Mate[v - StartIndex] = w; // v is local - GMate[Ghost2LocalMap[w]] = v; // w is ghost - // Q.push_back(u); - U.push_back(v); - U.push_back(w); - myCard++; -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); -#endif - - PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], &S); - - } // End of if CandidateMate[w] = v - } // End of if a Ghost Vertex - else - { // w is a local vertex - if (candidateMate[w - StartIndex] == v) - { - Mate[v - StartIndex] = w; // v is local - Mate[w - StartIndex] = v; // w is local - // Q.push_back(u); - U.push_back(v); - U.push_back(w); - myCard++; -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; - fflush(stdout); -#endif - } // End of if(CandidateMate(w) = v - } // End of Else - } // End of if(w >=0) - else - { // no dominating edge found: w == -1 - adj11 = verLocPtr[v - StartIndex]; - adj12 = verLocPtr[v - StartIndex + 1]; - for (k1 = adj11; k1 < adj12; k1++) - { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) - { // A ghost - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = FAILURE; // TYPE - // Send a Request (Asynchronous) -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); -#endif - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - msgInd++; - msgActual++; - } // End of if(GHOST) - } // End of for loop - } // End of Else: w == -1 - // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - } // End of If (candidateMate[v-StartIndex] == u) - } // if (Mate[v - StartIndex] < 0) - } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: - else - { // Neighbor v is a ghost vertex - if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) - candidateMate[NLVer + Ghost2LocalMap[v]] = -1; - if (v != Mate[u - StartIndex]) - { // u is a local vertex - // Build the Message Packet: - Message[0] = u; // LOCAL - Message[1] = v; // GHOST - Message[2] = SUCCESS; // TYPE - // Send a Request (Asynchronous) -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a success message: "; - cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs); - fflush(stdout); -#endif - ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); - assert(ghostOwner != -1); - assert(ghostOwner != myRank); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - msgInd++; - msgActual++; -#ifdef DEBUG_GHOST_ - if ((u < StartIndex) || (u > EndIndex)) - { - cout << "\n(" << myRank << ") " << __LINE__ << " From Send: should not happen: u= " << u << " v= " << v << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; - fflush(stdout); - } -#endif - - } // End of If( v != Mate[u] ) - } // End of Else //A Ghost Vertex - } // End of For Loop adj(u) - } // End of if ( (u >= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex - } // End of while ( !U.empty() ) -#endif ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 5e233ce9..edb1f788 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,7 +1,5 @@ #include "MatchBoxPC.h" -//#define error - void processMatchedVertices( MilanLongInt NLVer, vector &UChunkBeingProcessed, @@ -32,12 +30,7 @@ void processMatchedVertices( staticQueue &privateQLocalVtx, staticQueue &privateQGhostVtx, staticQueue &privateQMsgType, - staticQueue &privateQOwner, - bool sendMessages, - MPI_Comm comm, - MilanLongInt *msgActual, - MilanLongInt *msgInd, - vector &Message) + staticQueue &privateQOwner) { MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; @@ -53,7 +46,7 @@ void processMatchedVertices( #ifdef COUNT_LOCAL_VERTEX MilanLongInt localVertices = 0; #endif -#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) num_threads(NUM_THREAD) +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) num_threads(NUM_THREAD) { while (!U.empty()) @@ -192,29 +185,10 @@ void processMatchedVertices( ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); - if (sendMessages) - { - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = REQUEST; // TYPE - // Send a Request (Asynchronous) - - printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - fflush(stdout); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); - #pragma omp atomic - (*msgActual)++; - } - else - { + PCounter[ghostOwner]++; #pragma omp atomic - PCounter[ghostOwner]++; -#pragma omp atomic - (*NumMessagesBundledPtr)++; - } - + (*NumMessagesBundledPtr)++; #pragma omp atomic (*msgIndPtr)++; @@ -248,28 +222,10 @@ void processMatchedVertices( ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); - if (sendMessages) - { - // Build the Message Packet: - Message[0] = v; // LOCAL - Message[1] = w; // GHOST - Message[2] = FAILURE; // TYPE - // Send a Request (Asynchronous) - - printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - fflush(stdout); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); -#pragma omp atomic - (*msgActual)++; - } - else - { #pragma omp atomic - PCounter[ghostOwner]++; + PCounter[ghostOwner]++; #pragma omp atomic - (*NumMessagesBundledPtr)++; - } - + (*NumMessagesBundledPtr)++; #pragma omp atomic (*msgIndPtr)++; @@ -293,27 +249,11 @@ void processMatchedVertices( ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); assert(ghostOwner != -1); assert(ghostOwner != myRank); - if (sendMessages) - { - // Build the Message Packet: - Message[0] = u; // LOCAL - Message[1] = v; // GHOST - Message[2] = SUCCESS; // TYPE - - // Send a Request (Asynchronous) - // printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); - fflush(stdout); - MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); -#pragma omp atomic - (*msgActual)++; - } - else - { + #pragma omp atomic - (*NumMessagesBundledPtr)++; + (*NumMessagesBundledPtr)++; #pragma omp atomic - PCounter[ghostOwner]++; - } + PCounter[ghostOwner]++; #pragma omp atomic (*msgIndPtr)++; diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp new file mode 100644 index 00000000..e61d561f --- /dev/null +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -0,0 +1,380 @@ +#include "MatchBoxPC.h" + +void processMatchedVerticesAndSendMessages( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + staticQueue &U, + staticQueue &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + staticQueue &privateQLocalVtx, + staticQueue &privateQGhostVtx, + staticQueue &privateQMsgType, + staticQueue &privateQOwner, + bool sendMessages, + MPI_Comm comm, + MilanLongInt *msgActual, + MilanLongInt *msgInd, + vector &Message) +{ + + MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; + int option; + MilanLongInt mateVal; + + vector messagesToSend; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + +#ifdef COUNT_LOCAL_VERTEX + MilanLongInt localVertices = 0; +#endif +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) num_threads(NUM_THREAD) + { + + while (!U.empty()) + { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) + { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); +#endif + if ((u >= StartIndex) && (u <= EndIndex)) + { // Process Only the Local Vertices + +#ifdef COUNT_LOCAL_VERTEX + localVertices++; +#endif + + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) + { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) + { // If Local Vertex: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); +#endif +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) + { +#pragma omp critical + { + if (candidateMate[v - StartIndex] == u) + { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMate(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, + StartIndex, + EndIndex, + GMate, + Mate, + Ghost2LocalMap); + + candidateMate[v - StartIndex] = w; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) + { + + if ((w < StartIndex) || (w > EndIndex)) + { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); +#endif + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) + { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else + { // w is a local vertex + if (candidateMate[w - StartIndex] == v) + { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else + { // Neighbor is a ghost vertex + +#pragma omp critical + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); +#pragma omp atomic + (*myCardPtr)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); + if (sendMessages) + { + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) + + //printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); + //fflush(stdout); +#pragma omp critical(sendMessage) + { + messagesToSend.push_back(v); + messagesToSend.push_back(w); + messagesToSend.push_back(REQUEST); + messagesToSend.push_back(ghostOwner); + } + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + +#pragma omp atomic + (*msgActual)++; + } + else + { +#pragma omp atomic + PCounter[ghostOwner]++; +#pragma omp atomic + (*NumMessagesBundledPtr)++; + } + +#pragma omp atomic + (*msgIndPtr)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); +#pragma omp atomic + (*myCardPtr)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) + { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) + { // A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); + if (sendMessages) + { + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) + + //printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); + //fflush(stdout); +#pragma omp critical(sendMessage) + { + messagesToSend.push_back(v); + messagesToSend.push_back(w); + messagesToSend.push_back(FAILURE); + messagesToSend.push_back(ghostOwner); + } + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); +#pragma omp atomic + (*msgActual)++; + } + else + { +#pragma omp atomic + PCounter[ghostOwner]++; +#pragma omp atomic + (*NumMessagesBundledPtr)++; + } + +#pragma omp atomic + (*msgIndPtr)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + assert(ghostOwner != -1); + assert(ghostOwner != myRank); + if (sendMessages) + { + // Build the Message Packet: + Message[0] = u; // LOCAL + Message[1] = v; // GHOST + Message[2] = SUCCESS; // TYPE + + // Send a Request (Asynchronous) + //printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); + //fflush(stdout); +#pragma omp critical(sendMessage) + { + messagesToSend.push_back(u); + messagesToSend.push_back(v); + messagesToSend.push_back(SUCCESS); + messagesToSend.push_back(ghostOwner); + } + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); +#pragma omp atomic + (*msgActual)++; + } + else + { +#pragma omp atomic + (*NumMessagesBundledPtr)++; +#pragma omp atomic + PCounter[ghostOwner]++; + } + +#pragma omp atomic + (*msgIndPtr)++; + + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + } // End of while ( !U.empty() ) + +#ifdef COUNT_LOCAL_VERTEX + printf("Count local vertexes: %ld for thread %d of processor %d\n", + localVertices, + omp_get_thread_num(), + myRank); + +#endif + } // End of parallel region + + for (int i = 0; i < messagesToSend.size(); i += 4) + { + Message[0] = messagesToSend[i]; + Message[1] = messagesToSend[i + 1]; + Message[2] = messagesToSend[i + 2]; + ghostOwner = messagesToSend[i + 3]; + MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + } +} diff --git a/amgprec/impl/aggregator/processMessages.cpp b/amgprec/impl/aggregator/processMessages.cpp index 474453e3..c6cb2531 100644 --- a/amgprec/impl/aggregator/processMessages.cpp +++ b/amgprec/impl/aggregator/processMessages.cpp @@ -28,7 +28,7 @@ void processMessages( staticQueue &U) { -//#define PRINT_DEBUG_INFO_ + //#define PRINT_DEBUG_INFO_ MilanInt Sender; MPI_Status computeStatus; @@ -94,8 +94,6 @@ void processMessages( if (Message[2] == SIZEINFO) { - //printf("Inizio sizeinfo\n"); - fflush(stdout); #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl; @@ -128,9 +126,6 @@ void processMessages( cout << endl; fflush(stdout); #endif - - //printf("Fine sizeinfo\n"); - fflush(stdout); } else { // Just a single message: @@ -162,8 +157,7 @@ void processMessages( fflush(stdout); #endif - - //Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop + // Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) { u = ReceiveBuffer[bundleCounter - 3]; // GHOST @@ -325,10 +319,10 @@ void processMessages( cout << "\n(" << myRank << ")Message type is FAILURE" << endl; fflush(stdout); #endif - GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter - } // End of else: CASE III - } // End of else: CASE I + } // End of else: CASE III + } // End of else: CASE I } return;