Modify MATCHBOXP to fix OpenMP. Performance to be reviewed

omp-walther
Salvatore Filippone 2 years ago
parent 32994c7ce8
commit ebe9b45177

@ -70,7 +70,7 @@
Statistics: ph0_time, ph1_time, ph2_time: Runtimes
Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2)
*/
//#define DEBUG_HANG_
#ifdef SERIAL_MPI
#else
@ -110,12 +110,19 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]";
cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," << verDistance[i+1];
cout << "]\n";
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]";
if (myRank == 0) {
cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," ;
cout << verDistance[numProcs]<< "]\n";
}
fflush(stdout);
#endif
@ -211,6 +218,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
#ifdef DEBUG_HANG_
cout << myRank << " Finished initialization" << endl;
fflush(stdout);
#endif
startTime = MPI_Wtime();
@ -233,6 +244,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
edgeLocWeight,
candidateMate);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " candidateMate after parallelCompute " <<endl;
for (int i=0; i<NLVer; i++) {
cout << candidateMate[i] << " " ;
}
cout << endl;
#endif
#endif
/*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
* TODO: write comment
@ -272,6 +294,18 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
tempCounter.clear(); // Do not need this any more
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Exposed Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
@ -311,6 +345,19 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
privateQMsgType,
privateQOwner);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Process Vertices" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Matched Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
/////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
@ -339,6 +386,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
finishTime = MPI_Wtime();
*ph1_time = finishTime - startTime; // Time taken for Phase-1
#ifdef DEBUG_HANG_
cout << myRank << " Finished sendBundles" << endl;
fflush(stdout);
#endif
*ph1_card = myCard; // Cardinality at the end of Phase-1
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
@ -360,10 +413,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
fflush(stdout);
#endif
while (true)
{
while (true) {
#ifdef DEBUG_HANG_
if (myRank == 0)
//if (myRank == 0)
cout << "\n(" << myRank << ") Main loop" << endl;
fflush(stdout);
#endif
@ -408,12 +460,20 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
//// BREAK IF NO MESSAGES EXPECTED /////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMatchedAndSend phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
#endif
if (S == 0)
{
if (S == 0) {
#ifdef DEBUG_HANG_
cout << "\n(" << myRank << ") Breaking out" << endl;
fflush(stdout);
@ -451,6 +511,15 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
U);
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMessages phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
fflush(stdout);

@ -17,12 +17,9 @@ MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
int finalK;
for (int k = adj1; k < adj2; k++)
{
for (int k = adj1; k < adj2; k++) {
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k])))
{
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
finalK = k;
@ -60,14 +57,12 @@ MilanLongInt computeCandidateMate(MilanLongInt adj1,
MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
for (k = adj1; k < adj2; k++)
{
for (k = adj1; k < adj2; k++) {
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
continue;
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k])))
{
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
}

@ -14,8 +14,7 @@ void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
{
#pragma omp for schedule(static)
for (v = 0; v < NLVer; v++)
{
for (v = 0; v < NLVer; v++) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout);

@ -32,20 +32,20 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
{
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option;
MilanInt ghostOwner = 0, option, igw;
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) \
num_threads(NUM_THREAD)
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \
default(shared) num_threads(NUM_THREAD)
{
#pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \
[:1], msgInd \
[:1], NumMessagesBundled \
[:1]) schedule(static)
for (v = 0; v < NLVer; v++)
{
[:1]) \
schedule(static)
for (v = 0; v < NLVer; v++) {
option = -1;
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
k = candidateMate[v];
@ -67,8 +67,7 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
#pragma omp critical(processExposed)
{
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
{
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
w = computeCandidateMate(verLocPtr[v],
verLocPtr[v + 1],
edgeLocWeight, 0,
@ -81,15 +80,11 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
candidateMate[v] = w;
}
if (w >= 0)
{
if (w >= 0) {
(*myCard)++;
if ((w < StartIndex) || (w > EndIndex))
{ // w is a ghost vertex
if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex)
{
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
option = 1;
Mate[v] = w;
GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost
@ -97,11 +92,9 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
else { // w is a local vertex
if (candidateMate[w - StartIndex] == (v + StartIndex))
{
if (candidateMate[w - StartIndex] == (v + StartIndex)) {
option = 3;
Mate[v] = w; // v is local
Mate[w - StartIndex] = v + StartIndex; // w is local
@ -119,16 +112,13 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
} // End critical processExposed
} // End of if(w >=0)
else
{
else {
// This piece of code is executed a really small amount of times
adj11 = verLocPtr[v];
adj12 = verLocPtr[v + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";

@ -46,9 +46,10 @@ void processMatchedVertices(
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
//#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \
privateQMsgType, privateQOwner, UChunkBeingProcessed) \
default(shared) num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
@ -56,19 +57,16 @@ void processMatchedVertices(
[:1])
{
while (!U.empty())
{
while (!U.empty()) {
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed)
{
for (MilanLongInt u : UChunkBeingProcessed) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
@ -77,13 +75,11 @@ void processMatchedVertices(
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
for (k = adj1; k < adj2; k++) {
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
@ -92,12 +88,10 @@ void processMatchedVertices(
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0)
{
if (mateVal < 0) {
#pragma omp critical
{
if (candidateMate[v - StartIndex] == u)
{
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
@ -116,29 +110,23 @@ void processMatchedVertices(
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
@ -157,8 +145,7 @@ void processMatchedVertices(
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
else { // Neighbor is a ghost vertex
#pragma omp critical
{
@ -211,11 +198,9 @@ void processMatchedVertices(
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";

@ -1,5 +1,5 @@
#include "MatchBoxPC.h"
//#define DEBUG_HANG_
void processMatchedVerticesAndSendMessages(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
@ -50,8 +50,9 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
//#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\
privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
@ -61,19 +62,16 @@ void processMatchedVerticesAndSendMessages(
[:1])
{
while (!U.empty())
{
while (!U.empty()) {
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed)
{
for (MilanLongInt u : UChunkBeingProcessed) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
@ -82,13 +80,11 @@ void processMatchedVerticesAndSendMessages(
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
for (k = adj1; k < adj2; k++) {
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
@ -97,12 +93,10 @@ void processMatchedVerticesAndSendMessages(
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0)
{
if (mateVal < 0) {
#pragma omp critical
{
if (candidateMate[v - StartIndex] == u)
{
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
@ -121,29 +115,24 @@ void processMatchedVerticesAndSendMessages(
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
@ -162,8 +151,7 @@ void processMatchedVerticesAndSendMessages(
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
else { // Neighbor is a ghost vertex
#pragma omp critical
{
@ -219,11 +207,9 @@ void processMatchedVerticesAndSendMessages(
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
@ -302,14 +288,21 @@ void processMatchedVerticesAndSendMessages(
} // End of parallel region
// Send the messages
for (int i = initialSize; i < QOwner.size(); i++)
{
#ifdef DEBUG_HANG_
cout << myRank<<" Sending: "<<QOwner.size()-initialSize<<" messages" <<endl;
#endif
for (int i = initialSize; i < QOwner.size(); i++) {
Message[0] = QLocalVtx[i];
Message[1] = QGhostVtx[i];
Message[2] = QMsgType[i];
ghostOwner = QOwner[i];
//MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
}
#ifdef DEBUG_HANG_
cout << myRank<<" Done sending messages"<<endl;
#endif
}

@ -1,4 +1,5 @@
#include "MatchBoxPC.h"
//#define DEBUG_HANG_
void processMessages(
MilanLongInt NLVer,
@ -78,6 +79,7 @@ void processMessages(
fflush(stdout);
#endif
//cout << myRank<<" Receiving ...";
error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS)
{
@ -86,14 +88,13 @@ void processMessages(
fflush(stdout);
}
Sender = computeStatus.MPI_SOURCE;
//cout << " ...from "<<Sender << endl;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl;
fflush(stdout);
#endif
if (Message[2] == SIZEINFO)
{
if (Message[2] == SIZEINFO) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl;
@ -112,9 +113,9 @@ void processMessages(
fflush(stdout);
#endif
// Receive the message
//cout << myRank<<" Receiving from "<<Sender<<endl;
error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS)
{
if (error_codeC != MPI_SUCCESS) {
MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n";
fflush(stdout);
@ -126,9 +127,7 @@ void processMessages(
cout << endl;
fflush(stdout);
#endif
}
else
{ // Just a single message:
} else { // Just a single message:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl;
fflush(stdout);
@ -146,8 +145,7 @@ void processMessages(
}
#ifdef DEBUG_GHOST_
if ((v < StartIndex) || (v > EndIndex))
{
if ((v < StartIndex) || (v > EndIndex)) {
cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl;
fflush(stdout);
}
@ -158,33 +156,29 @@ void processMessages(
#endif
// Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3)
{
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) {
u = ReceiveBuffer[bundleCounter - 3]; // GHOST
v = ReceiveBuffer[bundleCounter - 2]; // LOCAL
message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
// CASE I: REQUEST
if (message_type == REQUEST)
{
if (message_type == REQUEST) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is REQUEST" << endl;
fflush(stdout);
#endif
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer))
{
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1)
{ // Process only if not already matched (v is local)
if (Mate[v - StartIndex] == -1) {
// Process only if not already matched (v is local)
candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost
if (candidateMate[v - StartIndex] == u)
{
if (candidateMate[v - StartIndex] == u) {
GMate[Ghost2LocalMap[u]] = v; // u is ghost
Mate[v - StartIndex] = u; // v is local
U.push_back(v);
@ -199,10 +193,8 @@ void processMessages(
} // End of if ( candidateMate[v-StartIndex] == u )e
} // End of if ( Mate[v] == -1 )
} // End of REQUEST
else
{ // CASE II: SUCCESS
if (message_type == SUCCESS)
{
else { // CASE II: SUCCESS
if (message_type == SUCCESS) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is SUCCESS" << endl;
fflush(stdout);
@ -210,28 +202,26 @@ void processMessages(
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer))
{
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1)
{ // Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u)
{
if (Mate[v - StartIndex] == -1) {
// Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap);
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k,
verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if ((w < StartIndex) || (w > EndIndex))
{ // w is a ghost
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex)) {
// w is a ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
@ -245,12 +235,11 @@ void processMessages(
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
Mate[v - StartIndex] = w; // v is local
GMate[Ghost2LocalMap[w]] = v; // w is ghost
U.push_back(v);
@ -264,10 +253,8 @@ void processMessages(
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
Mate[v - StartIndex] = w; // v is local
Mate[w - StartIndex] = v; // w is local
// Q.push_back(u);
@ -281,15 +268,13 @@ void processMessages(
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
{ // No dominant edge found
else { // No dominant edge found
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
if ((w < StartIndex) || (w > EndIndex)) {
// A ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
@ -303,6 +288,7 @@ void processMessages(
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
@ -313,8 +299,8 @@ void processMessages(
} // End of if ( candidateMate[v-StartIndex] == u )
} // End of if ( Mate[v] == -1 )
} // End of if ( message_type == SUCCESS )
else
{ // CASE III: FAILURE
else {
// CASE III: FAILURE
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is FAILURE" << endl;
fflush(stdout);

@ -32,4 +32,5 @@ void queuesTransfer(vector<MilanLongInt> &U,
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
}

@ -38,57 +38,57 @@ void sendBundledMessages(MilanLongInt *numGhostEdges,
#pragma omp task depend(inout \
: PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \
: NumMessagesBundled, numProcs)
{try {
{
try {
PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message
PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure
PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets
}
catch (length_error)
{
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize
PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable
PSizeInfoMessages.resize(numProcs * 3, 0);
}
}
PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize
PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable
PSizeInfoMessages.resize(numProcs * 3, 0);
}
#pragma omp task depend(inout \
: PCumulative) depend(in \
: PCounter)
{
{
for (i = 0; i < numProcs; i++)
PCumulative[i + 1] = PCumulative[i] + PCounter[i];
}
}
#pragma omp task depend(inout \
: PCounter)
{
{
// Reuse PCounter to keep track of how many messages were inserted:
for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCounter[i] = 0;
}
}
// Build the Message Bundle packet:
#pragma omp task depend(in \
: PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \
: myIndex, PMessageBundle, PCounter)
{
for (i = 0; i < NumMessagesBundled; i++)
{
for (i = 0; i < NumMessagesBundled; i++) {
myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3;
PMessageBundle[myIndex + 0] = QLocalVtx[i];
PMessageBundle[myIndex + 1] = QGhostVtx[i];
PMessageBundle[myIndex + 2] = QMsgType[i];
PCounter[QOwner[i]]++;
}
}
}
// Send the Bundled Messages: Use ISend
#pragma omp task depend(out \
: SRequest, SStatus)
{
{
try
{
SRequest.reserve(numProcs * 2); // At most two messages per processor
@ -100,15 +100,14 @@ PSizeInfoMessages.resize(numProcs * 3, 0);
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
}
}
// Send the Messages
#pragma omp task depend(inout \
: SRequest, PSizeInfoMessages, PCumulative) depend(out \
: *msgActual, *msgInd)
{
for (i = 0; i < numProcs; i++)
{ // Changed by Fabio to be an integer, addresses needs to be integers!
for (i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
if (i == myRank) // Do not send anything to yourself
continue;
// Send the Message with information about the size of next message:
@ -182,7 +181,7 @@ PSizeInfoMessages.resize(numProcs * 3, 0);
}
#pragma omp task depend(out : Buffer) depend(in : *BufferSize)
{
{
Buffer = 0;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize;
@ -204,7 +203,7 @@ PSizeInfoMessages.resize(numProcs * 3, 0);
}
MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer
}
}
}
}
}
}
Loading…
Cancel
Save