Modify MATCHBOXP to fix OpenMP. Performance to be reviewed

omp-walther
Salvatore Filippone 2 years ago
parent 32994c7ce8
commit ebe9b45177

@ -70,7 +70,7 @@
Statistics: ph0_time, ph1_time, ph2_time: Runtimes Statistics: ph0_time, ph1_time, ph2_time: Runtimes
Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2)
*/ */
//#define DEBUG_HANG_
#ifdef SERIAL_MPI #ifdef SERIAL_MPI
#else #else
@ -110,17 +110,24 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," << verDistance[i+1];
cout << "]\n";
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef DEBUG_HANG_ #ifdef DEBUG_HANG_
if (myRank == 0) if (myRank == 0) {
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," ;
cout << verDistance[numProcs]<< "]\n";
}
fflush(stdout); fflush(stdout);
#endif #endif
MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank
MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank
MPI_Status computeStatus; MPI_Status computeStatus;
@ -211,7 +218,11 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
finishTime = MPI_Wtime(); finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
#ifdef DEBUG_HANG_
cout << myRank << " Finished initialization" << endl;
fflush(stdout);
#endif
startTime = MPI_Wtime(); startTime = MPI_Wtime();
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
@ -233,6 +244,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
edgeLocWeight, edgeLocWeight,
candidateMate); candidateMate);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " candidateMate after parallelCompute " <<endl;
for (int i=0; i<NLVer; i++) {
cout << candidateMate[i] << " " ;
}
cout << endl;
#endif
#endif
/* /*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B * PARALLEL_PROCESS_EXPOSED_VERTEX_B
* TODO: write comment * TODO: write comment
@ -272,6 +294,18 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
tempCounter.clear(); // Do not need this any more tempCounter.clear(); // Do not need this any more
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Exposed Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
@ -311,6 +345,19 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
privateQMsgType, privateQMsgType,
privateQOwner); privateQOwner);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Process Vertices" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Matched Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES ///////////////////////////////////// ///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
@ -339,6 +386,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
finishTime = MPI_Wtime(); finishTime = MPI_Wtime();
*ph1_time = finishTime - startTime; // Time taken for Phase-1 *ph1_time = finishTime - startTime; // Time taken for Phase-1
#ifdef DEBUG_HANG_
cout << myRank << " Finished sendBundles" << endl;
fflush(stdout);
#endif
*ph1_card = myCard; // Cardinality at the end of Phase-1 *ph1_card = myCard; // Cardinality at the end of Phase-1
startTime = MPI_Wtime(); startTime = MPI_Wtime();
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
@ -360,65 +413,72 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
fflush(stdout); fflush(stdout);
#endif #endif
while (true) while (true) {
{
#ifdef DEBUG_HANG_ #ifdef DEBUG_HANG_
if (myRank == 0) //if (myRank == 0)
cout << "\n(" << myRank << ") Main loop" << endl; cout << "\n(" << myRank << ") Main loop" << endl;
fflush(stdout); fflush(stdout);
#endif #endif
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessages(NLVer, processMatchedVerticesAndSendMessages(NLVer,
UChunkBeingProcessed, UChunkBeingProcessed,
U, U,
privateU, privateU,
StartIndex, StartIndex,
EndIndex, EndIndex,
&myCard, &myCard,
&msgInd, &msgInd,
&NumMessagesBundled, &NumMessagesBundled,
&S, &S,
verLocPtr, verLocPtr,
verLocInd, verLocInd,
verDistance, verDistance,
PCounter, PCounter,
Counter, Counter,
myRank, myRank,
numProcs, numProcs,
candidateMate, candidateMate,
GMate, GMate,
Mate, Mate,
Ghost2LocalMap, Ghost2LocalMap,
edgeLocWeight, edgeLocWeight,
QLocalVtx, QLocalVtx,
QGhostVtx, QGhostVtx,
QMsgType, QMsgType,
QOwner, QOwner,
privateQLocalVtx, privateQLocalVtx,
privateQGhostVtx, privateQGhostVtx,
privateQMsgType, privateQMsgType,
privateQOwner, privateQOwner,
comm, comm,
&msgActual, &msgActual,
Message); Message);
///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// ///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
//// BREAK IF NO MESSAGES EXPECTED ///////// //// BREAK IF NO MESSAGES EXPECTED /////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMatchedAndSend phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl; cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
#endif #endif
if (S == 0) if (S == 0) {
{
#ifdef DEBUG_HANG_ #ifdef DEBUG_HANG_
cout << "\n(" << myRank << ") Breaking out" << endl; cout << "\n(" << myRank << ") Breaking out" << endl;
fflush(stdout); fflush(stdout);
#endif #endif
break; break;
} }
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////// PROCESS MESSAGES //////////////////////////////////////
@ -451,6 +511,15 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
U); U);
///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// ///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMessages phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S; cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
fflush(stdout); fflush(stdout);
@ -482,4 +551,4 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate // End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
#endif #endif
#endif #endif

@ -17,16 +17,13 @@ MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
MilanInt w = -1; MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
int finalK; int finalK;
for (int k = adj1; k < adj2; k++) for (int k = adj1; k < adj2; k++) {
{ if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
if ((edgeLocWeight[k] > heaviestEdgeWt) || heaviestEdgeWt = edgeLocWeight[k];
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) w = verLocInd[k];
{ finalK = k;
heaviestEdgeWt = edgeLocWeight[k]; }
w = verLocInd[k];
finalK = k;
}
} // End of for loop } // End of for loop
return finalK; return finalK;
} }
@ -60,19 +57,17 @@ MilanLongInt computeCandidateMate(MilanLongInt adj1,
MilanInt w = -1; MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
for (k = adj1; k < adj2; k++) for (k = adj1; k < adj2; k++) {
{ if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) continue;
continue;
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
if ((edgeLocWeight[k] > heaviestEdgeWt) || ((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) heaviestEdgeWt = edgeLocWeight[k];
{ w = verLocInd[k];
heaviestEdgeWt = edgeLocWeight[k]; }
w = verLocInd[k];
}
} // End of for loop } // End of for loop
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
return w; return w;
} }

@ -14,8 +14,7 @@ void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
{ {
#pragma omp for schedule(static) #pragma omp for schedule(static)
for (v = 0; v < NLVer; v++) for (v = 0; v < NLVer; v++) {
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout); fflush(stdout);

@ -29,23 +29,23 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
vector<MilanLongInt> &privateQGhostVtx, vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType, vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner) vector<MilanInt> &privateQOwner)
{ {
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option; MilanInt ghostOwner = 0, option, igw;
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ #pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) \ firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \
num_threads(NUM_THREAD) default(shared) num_threads(NUM_THREAD)
{ {
#pragma omp for reduction(+ \ #pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \ : PCounter[:numProcs], myCard \
[:1], msgInd \ [:1], msgInd \
[:1], NumMessagesBundled \ [:1], NumMessagesBundled \
[:1]) schedule(static) [:1]) \
for (v = 0; v < NLVer; v++) schedule(static)
{ for (v = 0; v < NLVer; v++) {
option = -1; option = -1;
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
k = candidateMate[v]; k = candidateMate[v];
@ -67,91 +67,81 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
#pragma omp critical(processExposed) #pragma omp critical(processExposed)
{ {
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
{ w = computeCandidateMate(verLocPtr[v],
w = computeCandidateMate(verLocPtr[v], verLocPtr[v + 1],
verLocPtr[v + 1], edgeLocWeight, 0,
edgeLocWeight, 0, verLocInd,
verLocInd, StartIndex,
StartIndex, EndIndex,
EndIndex, GMate,
GMate, Mate,
Mate, Ghost2LocalMap);
Ghost2LocalMap); candidateMate[v] = w;
candidateMate[v] = w;
} }
if (w >= 0) if (w >= 0) {
{ (*myCard)++;
(*myCard)++; if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex
if ((w < StartIndex) || (w > EndIndex)) option = 2;
{ // w is a ghost vertex if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
option = 2; option = 1;
Mate[v] = w;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost
{
option = 1; } // End of if CandidateMate[w] = v
Mate[v] = w;
GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost } // End of if a Ghost Vertex
else { // w is a local vertex
} // End of if CandidateMate[w] = v
if (candidateMate[w - StartIndex] == (v + StartIndex)) {
} // End of if a Ghost Vertex option = 3;
else Mate[v] = w; // v is local
{ // w is a local vertex Mate[w - StartIndex] = v + StartIndex; // w is local
if (candidateMate[w - StartIndex] == (v + StartIndex))
{
option = 3;
Mate[v] = w; // v is local
Mate[w - StartIndex] = v + StartIndex; // w is local
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
} // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
} // End of Else } // End of Else
} // End of second if } // End of second if
} // End critical processExposed } // End critical processExposed
} // End of if(w >=0) } // End of if(w >=0)
else else {
{ // This piece of code is executed a really small amount of times
// This piece of code is executed a really small amount of times adj11 = verLocPtr[v];
adj11 = verLocPtr[v]; adj12 = verLocPtr[v + 1];
adj12 = verLocPtr[v + 1]; for (k1 = adj11; k1 < adj12; k1++) {
for (k1 = adj11; k1 < adj12; k1++) w = verLocInd[k1];
{ if ((w < StartIndex) || (w > EndIndex)) { // A ghost
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: "; cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout); fflush(stdout);
#endif #endif
(*msgInd)++; (*msgInd)++;
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex); privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE); privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
} // End of if(GHOST) } // End of if(GHOST)
} // End of for loop } // End of for loop
} }
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
switch (option) switch (option)
{ {
case -1: case -1:
@ -202,4 +192,4 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
privateQOwner); privateQOwner);
} // End of parallel region } // End of parallel region
} }

@ -46,264 +46,249 @@ void processMatchedVertices(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \
num_threads(NUM_THREAD) \ privateQMsgType, privateQOwner, UChunkBeingProcessed) \
reduction(+ \ default(shared) num_threads(NUM_THREAD) \
: msgInd[:1], PCounter \ reduction(+ \
[:numProcs], myCard \ : msgInd[:1], PCounter \
[:1], NumMessagesBundled \ [:numProcs], myCard \
[:1], NumMessagesBundled \
[:1]) [:1])
{ {
while (!U.empty()) while (!U.empty()) {
{
extractUChunk(UChunkBeingProcessed, U, privateU);
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed) {
for (MilanLongInt u : UChunkBeingProcessed)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u; cout << "\n(" << myRank << ")u: " << u;
fflush(stdout); fflush(stdout);
#endif #endif
if ((u >= StartIndex) && (u <= EndIndex)) if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
{ // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
localVertices++; localVertices++;
#endif #endif
// Get the Adjacency list for u // Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1]; adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++) for (k = adj1; k < adj2; k++) {
{ option = -1;
option = -1; v = verLocInd[k];
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp atomic read #pragma omp atomic read
mateVal = Mate[v - StartIndex]; mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched // If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) if (mateVal < 0) {
{
#pragma omp critical #pragma omp critical
{ {
if (candidateMate[v - StartIndex] == u) if (candidateMate[v - StartIndex] == u) {
{ // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) w = computeCandidateMate(verLocPtr[v - StartIndex],
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1],
verLocPtr[v - StartIndex + 1], edgeLocWeight, 0,
edgeLocWeight, 0, verLocInd,
verLocInd, StartIndex,
StartIndex, EndIndex,
EndIndex, GMate,
GMate, Mate,
Mate, Ghost2LocalMap);
Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w; cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout); fflush(stdout);
#endif #endif
// If found a dominating edge: // If found a dominating edge:
if (w >= 0) if (w >= 0) {
{ if ((w < StartIndex) || (w > EndIndex)) { // A ghost
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:"; cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif #endif
option = 2; option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
{ option = 1;
option = 1; Mate[v - StartIndex] = w; // v is a local vertex
Mate[v - StartIndex] = w; // v is a local vertex GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if CandidateMate[w] = v } // End of if a Ghost Vertex
} // End of if a Ghost Vertex else { // w is a local vertex
else if (candidateMate[w - StartIndex] == v) {
{ // w is a local vertex option = 3;
if (candidateMate[w - StartIndex] == v) Mate[v - StartIndex] = w; // v is a local vertex
{ Mate[w - StartIndex] = v; // w is a local vertex
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
} // End of if(CandidateMate(w) = v } // End of if(CandidateMate(w) = v
} // End of Else } // End of Else
} // End of if(w >=0) } // End of if(w >=0)
else else
option = 4; // End of Else: w == -1 option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u } // End of If (candidateMate[v-StartIndex] == u
} // End of task } // End of task
} // mateval < 0 } // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else else { // Neighbor is a ghost vertex
{ // Neighbor is a ghost vertex
#pragma omp critical #pragma omp critical
{ {
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1; candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex]) if (v != Mate[u - StartIndex])
option = 5; // u is local option = 5; // u is local
} // End of critical } // End of critical
} // End of Else //A Ghost Vertex } // End of Else //A Ghost Vertex
switch (option) switch (option)
{ {
case -1: case -1:
// No things to do // No things to do
break; break;
case 1: case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
(*myCard)++; (*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
// Decrement the counter: // Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2: case 2:
// Found a dominating edge, it is a ghost // Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST); privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
break; break;
case 3: case 3:
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
(*myCard)++; (*myCard)++;
break; break;
case 4: case 4:
// Could not find a dominating vertex // Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex]; adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1]; adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++) for (k1 = adj11; k1 < adj12; k1++) {
{ w = verLocInd[k1];
w = verLocInd[k1]; if ((w < StartIndex) || (w > EndIndex)) { // A ghost
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: "; cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE); privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
} // End of if(GHOST) } // End of if(GHOST)
} // End of for loop } // End of for loop
break; break;
case 5: case 5:
default: default:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: "; cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v); privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS); privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
break; break;
} // End of switch } // End of switch
} // End of inner for } // End of inner for
} }
} // End of outer for } // End of outer for
queuesTransfer(U, privateU, QLocalVtx, queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx, QGhostVtx,
QMsgType, QOwner, privateQLocalVtx, QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx, privateQGhostVtx,
privateQMsgType, privateQMsgType,
privateQOwner); privateQOwner);
#pragma omp critical(U) #pragma omp critical(U)
{ {
U.insert(U.end(), privateU.begin(), privateU.end()); U.insert(U.end(), privateU.begin(), privateU.end());
} }
privateU.clear(); privateU.clear();
#pragma omp critical(sendMessageTransfer) #pragma omp critical(sendMessageTransfer)
{ {
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
} }
privateQLocalVtx.clear(); privateQLocalVtx.clear();
privateQGhostVtx.clear(); privateQGhostVtx.clear();
privateQMsgType.clear(); privateQMsgType.clear();
privateQOwner.clear(); privateQOwner.clear();
} // End of while ( !U.empty() ) } // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n", printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices, localVertices,
omp_get_thread_num(), omp_get_thread_num(),
myRank); myRank);
#endif #endif
} // End of parallel region } // End of parallel region
} }

@ -1,39 +1,39 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
//#define DEBUG_HANG_
void processMatchedVerticesAndSendMessages( void processMatchedVerticesAndSendMessages(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU, vector<MilanLongInt> &privateU,
MilanLongInt StartIndex, MilanLongInt StartIndex,
MilanLongInt EndIndex, MilanLongInt EndIndex,
MilanLongInt *myCard, MilanLongInt *myCard,
MilanLongInt *msgInd, MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled, MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr, MilanLongInt *SPtr,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanLongInt *verDistance, MilanLongInt *verDistance,
MilanLongInt *PCounter, MilanLongInt *PCounter,
vector<MilanLongInt> &Counter, vector<MilanLongInt> &Counter,
MilanInt myRank, MilanInt myRank,
MilanInt numProcs, MilanInt numProcs,
MilanLongInt *candidateMate, MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate, vector<MilanLongInt> &GMate,
MilanLongInt *Mate, MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap, map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight, MilanReal *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx, vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx, vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType, vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner, vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx, vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx, vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType, vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner, vector<MilanInt> &privateQOwner,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
vector<MilanLongInt> &Message) vector<MilanLongInt> &Message)
{ {
MilanLongInt initialSize = QLocalVtx.size(); MilanLongInt initialSize = QLocalVtx.size();
@ -50,266 +50,259 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\
num_threads(NUM_THREAD) \ privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
reduction(+ \ num_threads(NUM_THREAD) \
: msgInd[:1], PCounter \ reduction(+ \
[:numProcs], myCard \ : msgInd[:1], PCounter \
[:1], NumMessagesBundled \ [:numProcs], myCard \
[:1], msgActual \ [:1], NumMessagesBundled \
[:1], msgActual \
[:1]) [:1])
{ {
while (!U.empty()) while (!U.empty()) {
{
extractUChunk(UChunkBeingProcessed, U, privateU);
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed) {
for (MilanLongInt u : UChunkBeingProcessed)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u; cout << "\n(" << myRank << ")u: " << u;
fflush(stdout); fflush(stdout);
#endif #endif
if ((u >= StartIndex) && (u <= EndIndex)) if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
{ // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
localVertices++; localVertices++;
#endif #endif
// Get the Adjacency list for u // Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1]; adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++) for (k = adj1; k < adj2; k++) {
{ option = -1;
option = -1; v = verLocInd[k];
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp atomic read #pragma omp atomic read
mateVal = Mate[v - StartIndex]; mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched // If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) if (mateVal < 0) {
{
#pragma omp critical #pragma omp critical
{ {
if (candidateMate[v - StartIndex] == u) if (candidateMate[v - StartIndex] == u) {
{ // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) w = computeCandidateMate(verLocPtr[v - StartIndex],
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1],
verLocPtr[v - StartIndex + 1], edgeLocWeight, 0,
edgeLocWeight, 0, verLocInd,
verLocInd, StartIndex,
StartIndex, EndIndex,
EndIndex, GMate,
GMate, Mate,
Mate, Ghost2LocalMap);
Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w; cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout); fflush(stdout);
#endif #endif
// If found a dominating edge: // If found a dominating edge:
if (w >= 0) if (w >= 0) {
{
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:"; cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif #endif
option = 2; option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
{ option = 1;
option = 1; Mate[v - StartIndex] = w; // v is a local vertex
Mate[v - StartIndex] = w; // v is a local vertex GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if CandidateMate[w] = v } // End of if a Ghost Vertex
} // End of if a Ghost Vertex else { // w is a local vertex
else if (candidateMate[w - StartIndex] == v) {
{ // w is a local vertex option = 3;
if (candidateMate[w - StartIndex] == v) Mate[v - StartIndex] = w; // v is a local vertex
{ Mate[w - StartIndex] = v; // w is a local vertex
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
} // End of if(CandidateMate(w) = v } // End of if(CandidateMate(w) = v
} // End of Else } // End of Else
} // End of if(w >=0) } // End of if(w >=0)
else else
option = 4; // End of Else: w == -1 option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u } // End of If (candidateMate[v-StartIndex] == u
} // End of task } // End of task
} // mateval < 0 } // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else else { // Neighbor is a ghost vertex
{ // Neighbor is a ghost vertex
#pragma omp critical #pragma omp critical
{ {
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1; candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex]) if (v != Mate[u - StartIndex])
option = 5; // u is local option = 5; // u is local
} // End of critical } // End of critical
} // End of Else //A Ghost Vertex } // End of Else //A Ghost Vertex
switch (option) switch (option)
{ {
case -1: case -1:
// No things to do // No things to do
break; break;
case 1: case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
(*myCard)++; (*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
// Decrement the counter: // Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2: case 2:
// Found a dominating edge, it is a ghost // Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet: // Build the Message Packet:
// Message[0] = v; // LOCAL // Message[0] = v; // LOCAL
// Message[1] = w; // GHOST // Message[1] = w; // GHOST
// Message[2] = REQUEST; // TYPE // Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++; (*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST); privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
break; break;
case 3: case 3:
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
(*myCard)++; (*myCard)++;
break; break;
case 4: case 4:
// Could not find a dominating vertex // Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex]; adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1]; adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++) for (k1 = adj11; k1 < adj12; k1++) {
{ w = verLocInd[k1];
w = verLocInd[k1]; if ((w < StartIndex) || (w > EndIndex)) { // A ghost
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: "; cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet: // Build the Message Packet:
// Message[0] = v; // LOCAL // Message[0] = v; // LOCAL
// Message[1] = w; // GHOST // Message[1] = w; // GHOST
// Message[2] = FAILURE; // TYPE // Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++; (*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE); privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
} // End of if(GHOST) } // End of if(GHOST)
} // End of for loop } // End of for loop
break; break;
case 5: case 5:
default: default:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: "; cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// Build the Message Packet: // Build the Message Packet:
// Message[0] = u; // LOCAL // Message[0] = u; // LOCAL
// Message[1] = v; // GHOST // Message[1] = v; // GHOST
// Message[2] = SUCCESS; // TYPE // Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++; (*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v); privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS); privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
break; break;
} // End of switch } // End of switch
} // End of inner for } // End of inner for
} }
} // End of outer for } // End of outer for
queuesTransfer(U, privateU, QLocalVtx, queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx, QGhostVtx,
QMsgType, QOwner, privateQLocalVtx, QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx, privateQGhostVtx,
privateQMsgType, privateQMsgType,
privateQOwner); privateQOwner);
} // End of while ( !U.empty() ) } // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n", printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices, localVertices,
omp_get_thread_num(), omp_get_thread_num(),
myRank); myRank);
#endif #endif
} // End of parallel region } // End of parallel region
// Send the messages // Send the messages
for (int i = initialSize; i < QOwner.size(); i++) #ifdef DEBUG_HANG_
{ cout << myRank<<" Sending: "<<QOwner.size()-initialSize<<" messages" <<endl;
#endif
Message[0] = QLocalVtx[i]; for (int i = initialSize; i < QOwner.size(); i++) {
Message[1] = QGhostVtx[i];
Message[2] = QMsgType[i]; Message[0] = QLocalVtx[i];
ghostOwner = QOwner[i]; Message[1] = QGhostVtx[i];
Message[2] = QMsgType[i];
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); ghostOwner = QOwner[i];
//MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
} }
#ifdef DEBUG_HANG_
cout << myRank<<" Done sending messages"<<endl;
#endif
} }

@ -1,4 +1,5 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
//#define DEBUG_HANG_
void processMessages( void processMessages(
MilanLongInt NLVer, MilanLongInt NLVer,
@ -78,6 +79,7 @@ void processMessages(
fflush(stdout); fflush(stdout);
#endif #endif
//cout << myRank<<" Receiving ...";
error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS) if (error_codeC != MPI_SUCCESS)
{ {
@ -86,70 +88,66 @@ void processMessages(
fflush(stdout); fflush(stdout);
} }
Sender = computeStatus.MPI_SOURCE; Sender = computeStatus.MPI_SOURCE;
//cout << " ...from "<<Sender << endl;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl; cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl;
fflush(stdout); fflush(stdout);
#endif #endif
if (Message[2] == SIZEINFO) if (Message[2] == SIZEINFO) {
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl; cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl;
fflush(stdout); fflush(stdout);
#endif #endif
bundleSize = Message[0]; //#of integers in the message bundleSize = Message[0]; //#of integers in the message
// Build the Message Buffer: // Build the Message Buffer:
if (!ReceiveBuffer.empty()) if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize ReceiveBuffer.resize(bundleSize, -1); // Initialize
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle Before: " << endl; cout << "\n(" << myRank << ")Message Bundle Before: " << endl;
for (int i = 0; i < bundleSize; i++) for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ","; cout << ReceiveBuffer[i] << ",";
cout << endl; cout << endl;
fflush(stdout); fflush(stdout);
#endif #endif
// Receive the message // Receive the message
error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus); //cout << myRank<<" Receiving from "<<Sender<<endl;
if (error_codeC != MPI_SUCCESS) error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus);
{ if (error_codeC != MPI_SUCCESS) {
MPI_Error_string(error_codeC, error_message, &message_length); MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n"; cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n";
fflush(stdout); fflush(stdout);
} }
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle After: " << endl; cout << "\n(" << myRank << ")Message Bundle After: " << endl;
for (int i = 0; i < bundleSize; i++) for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ","; cout << ReceiveBuffer[i] << ",";
cout << endl; cout << endl;
fflush(stdout); fflush(stdout);
#endif #endif
} } else { // Just a single message:
else
{ // Just a single message:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl; cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl;
fflush(stdout); fflush(stdout);
#endif #endif
// Add the current message to Queue: // Add the current message to Queue:
bundleSize = 3; //#of integers in the message bundleSize = 3; //#of integers in the message
// Build the Message Buffer: // Build the Message Buffer:
if (!ReceiveBuffer.empty()) if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize ReceiveBuffer.resize(bundleSize, -1); // Initialize
ReceiveBuffer[0] = Message[0]; // u ReceiveBuffer[0] = Message[0]; // u
ReceiveBuffer[1] = Message[1]; // v ReceiveBuffer[1] = Message[1]; // v
ReceiveBuffer[2] = Message[2]; // message_type ReceiveBuffer[2] = Message[2]; // message_type
} }
#ifdef DEBUG_GHOST_ #ifdef DEBUG_GHOST_
if ((v < StartIndex) || (v > EndIndex)) if ((v < StartIndex) || (v > EndIndex)) {
{ cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl;
cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; fflush(stdout);
fflush(stdout);
} }
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
@ -158,172 +156,160 @@ void processMessages(
#endif #endif
// Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop // Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) {
{ u = ReceiveBuffer[bundleCounter - 3]; // GHOST
u = ReceiveBuffer[bundleCounter - 3]; // GHOST v = ReceiveBuffer[bundleCounter - 2]; // LOCAL
v = ReceiveBuffer[bundleCounter - 2]; // LOCAL message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
// CASE I: REQUEST
// CASE I: REQUEST if (message_type == REQUEST) {
if (message_type == REQUEST)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is REQUEST" << endl; cout << "\n(" << myRank << ")Message type is REQUEST" << endl;
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef DEBUG_GHOST_ #ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
{ cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; fflush(stdout);
fflush(stdout); }
}
#endif #endif
if (Mate[v - StartIndex] == -1) if (Mate[v - StartIndex] == -1) {
{ // Process only if not already matched (v is local) // Process only if not already matched (v is local)
candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost
if (candidateMate[v - StartIndex] == u) if (candidateMate[v - StartIndex] == u) {
{ GMate[Ghost2LocalMap[u]] = v; // u is ghost
GMate[Ghost2LocalMap[u]] = v; // u is ghost Mate[v - StartIndex] = u; // v is local
Mate[v - StartIndex] = u; // v is local U.push_back(v);
U.push_back(v); U.push_back(u);
U.push_back(u); (*myCard)++;
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl; cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl;
fflush(stdout); fflush(stdout);
#endif #endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
} // End of if ( candidateMate[v-StartIndex] == u )e } // End of if ( candidateMate[v-StartIndex] == u )e
} // End of if ( Mate[v] == -1 ) } // End of if ( Mate[v] == -1 )
} // End of REQUEST } // End of REQUEST
else else { // CASE II: SUCCESS
{ // CASE II: SUCCESS if (message_type == SUCCESS) {
if (message_type == SUCCESS)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is SUCCESS" << endl; cout << "\n(" << myRank << ")Message type is SUCCESS" << endl;
fflush(stdout); fflush(stdout);
#endif #endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
#ifdef DEBUG_GHOST_ #ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
{ cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; fflush(stdout);
fflush(stdout); }
}
#endif #endif
if (Mate[v - StartIndex] == -1) if (Mate[v - StartIndex] == -1) {
{ // Process only if not already matched ( v is local) // Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u) if (candidateMate[v - StartIndex] == u) {
{ // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k,
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w; candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
fflush(stdout); fflush(stdout);
#endif #endif
// If found a dominating edge: // If found a dominating edge:
if (w >= 0) if (w >= 0) {
{ if ((w < StartIndex) || (w > EndIndex)) {
if ((w < StartIndex) || (w > EndIndex)) // w is a ghost
{ // w is a ghost // Build the Message Packet:
// Build the Message Packet: Message[0] = v; // LOCAL
Message[0] = v; // LOCAL Message[1] = w; // GHOST
Message[1] = w; // GHOST Message[2] = REQUEST; // TYPE
Message[2] = REQUEST; // TYPE // Send a Request (Asynchronous)
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message: "; cout << "\n(" << myRank << ")Sending a request message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1); //assert(ghostOwner != -1);
//assert(ghostOwner != myRank); //assert(ghostOwner != myRank);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++; (*msgInd)++;
(*msgActual)++; (*msgActual)++;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
{ Mate[v - StartIndex] = w; // v is local
Mate[v - StartIndex] = w; // v is local GMate[Ghost2LocalMap[w]] = v; // w is ghost
GMate[Ghost2LocalMap[w]] = v; // w is ghost U.push_back(v);
U.push_back(v); U.push_back(w);
U.push_back(w); (*myCard)++;
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout); fflush(stdout);
#endif #endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
} // End of if CandidateMate[w] = v } // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex } // End of if a Ghost Vertex
else else { // w is a local vertex
{ // w is a local vertex if (candidateMate[w - StartIndex] == v) {
if (candidateMate[w - StartIndex] == v) Mate[v - StartIndex] = w; // v is local
{ Mate[w - StartIndex] = v; // w is local
Mate[v - StartIndex] = w; // v is local // Q.push_back(u);
Mate[w - StartIndex] = v; // w is local U.push_back(v);
// Q.push_back(u); U.push_back(w);
U.push_back(v); (*myCard)++;
U.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout); fflush(stdout);
#endif #endif
} // End of if(CandidateMate(w) = v } // End of if(CandidateMate(w) = v
} // End of Else } // End of Else
} // End of if(w >=0) } // End of if(w >=0)
else else { // No dominant edge found
{ // No dominant edge found adj11 = verLocPtr[v - StartIndex];
adj11 = verLocPtr[v - StartIndex]; adj12 = verLocPtr[v - StartIndex + 1];
adj12 = verLocPtr[v - StartIndex + 1]; for (k1 = adj11; k1 < adj12; k1++) {
for (k1 = adj11; k1 < adj12; k1++) w = verLocInd[k1];
{ if ((w < StartIndex) || (w > EndIndex)) {
w = verLocInd[k1]; // A ghost
if ((w < StartIndex) || (w > EndIndex)) // Build the Message Packet:
{ // A ghost Message[0] = v; // LOCAL
// Build the Message Packet: Message[1] = w; // GHOST
Message[0] = v; // LOCAL Message[2] = FAILURE; // TYPE
Message[1] = w; // GHOST // Send a Request (Asynchronous)
Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: "; cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1); //assert(ghostOwner != -1);
//assert(ghostOwner != myRank); //assert(ghostOwner != myRank);
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); //cout << myRank<<" Sending to "<<ghostOwner<<endl;
(*msgInd)++; MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++; (*msgInd)++;
} // End of if(GHOST) (*msgActual)++;
} // End of for loop } // End of if(GHOST)
} // End of Else: w == -1 } // End of for loop
} // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of if ( candidateMate[v-StartIndex] == u ) } // End of if ( candidateMate[v-StartIndex] == u )
} // End of if ( Mate[v] == -1 ) } // End of if ( Mate[v] == -1 )
} // End of if ( message_type == SUCCESS ) } // End of if ( message_type == SUCCESS )
else else {
{ // CASE III: FAILURE // CASE III: FAILURE
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is FAILURE" << endl; cout << "\n(" << myRank << ")Message type is FAILURE" << endl;
fflush(stdout); fflush(stdout);
#endif #endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter
} // End of else: CASE III } // End of else: CASE III
} // End of else: CASE I } // End of else: CASE I
} }
return; return;
} }

@ -32,4 +32,5 @@ void queuesTransfer(vector<MilanLongInt> &U,
privateQGhostVtx.clear(); privateQGhostVtx.clear();
privateQMsgType.clear(); privateQMsgType.clear();
privateQOwner.clear(); privateQOwner.clear();
}
}

@ -38,108 +38,107 @@ void sendBundledMessages(MilanLongInt *numGhostEdges,
#pragma omp task depend(inout \ #pragma omp task depend(inout \
: PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \ : PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \
: NumMessagesBundled, numProcs) : NumMessagesBundled, numProcs)
{try { {
try {
PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message
PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure
PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets
} }
catch (length_error) catch (length_error)
{ {
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize
PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable
PSizeInfoMessages.resize(numProcs * 3, 0); PSizeInfoMessages.resize(numProcs * 3, 0);
} }
#pragma omp task depend(inout \ #pragma omp task depend(inout \
: PCumulative) depend(in \ : PCumulative) depend(in \
: PCounter) : PCounter)
{ {
for (i = 0; i < numProcs; i++) for (i = 0; i < numProcs; i++)
PCumulative[i + 1] = PCumulative[i] + PCounter[i]; PCumulative[i + 1] = PCumulative[i] + PCounter[i];
} }
#pragma omp task depend(inout \ #pragma omp task depend(inout \
: PCounter) : PCounter)
{ {
// Reuse PCounter to keep track of how many messages were inserted: // Reuse PCounter to keep track of how many messages were inserted:
for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers! for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCounter[i] = 0; PCounter[i] = 0;
} }
// Build the Message Bundle packet: // Build the Message Bundle packet:
#pragma omp task depend(in \ #pragma omp task depend(in \
: PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \ : PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \
: myIndex, PMessageBundle, PCounter) : myIndex, PMessageBundle, PCounter)
{ {
for (i = 0; i < NumMessagesBundled; i++) for (i = 0; i < NumMessagesBundled; i++) {
{ myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3;
myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3; PMessageBundle[myIndex + 0] = QLocalVtx[i];
PMessageBundle[myIndex + 0] = QLocalVtx[i]; PMessageBundle[myIndex + 1] = QGhostVtx[i];
PMessageBundle[myIndex + 1] = QGhostVtx[i]; PMessageBundle[myIndex + 2] = QMsgType[i];
PMessageBundle[myIndex + 2] = QMsgType[i]; PCounter[QOwner[i]]++;
PCounter[QOwner[i]]++; }
} }
}
// Send the Bundled Messages: Use ISend // Send the Bundled Messages: Use ISend
#pragma omp task depend(out \ #pragma omp task depend(out \
: SRequest, SStatus) : SRequest, SStatus)
{ {
try try
{ {
SRequest.reserve(numProcs * 2); // At most two messages per processor SRequest.reserve(numProcs * 2); // At most two messages per processor
SStatus.reserve(numProcs * 2); // At most two messages per processor SStatus.reserve(numProcs * 2); // At most two messages per processor
} }
catch (length_error) catch (length_error)
{ {
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n";
cout << "Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
} }
// Send the Messages // Send the Messages
#pragma omp task depend(inout \ #pragma omp task depend(inout \
: SRequest, PSizeInfoMessages, PCumulative) depend(out \ : SRequest, PSizeInfoMessages, PCumulative) depend(out \
: *msgActual, *msgInd) : *msgActual, *msgInd)
{ {
for (i = 0; i < numProcs; i++) for (i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
{ // Changed by Fabio to be an integer, addresses needs to be integers! if (i == myRank) // Do not send anything to yourself
if (i == myRank) // Do not send anything to yourself continue;
continue; // Send the Message with information about the size of next message:
// Send the Message with information about the size of next message: // Build the Message Packet:
// Build the Message Packet: PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message
PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet
PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE
PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE // Send a Request (Asynchronous)
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl; cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl;
fflush(stdout); fflush(stdout);
#endif #endif
if (PSizeInfoMessages[i * 3 + 0] > 0) if (PSizeInfoMessages[i * 3 + 0] > 0)
{ // Send only if it is a nonempty packet { // Send only if it is a nonempty packet
MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm, MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm,
&SRequest[(*msgInd)]); &SRequest[(*msgInd)]);
(*msgActual)++; (*msgActual)++;
(*msgInd)++; (*msgInd)++;
// Now Send the message with the data packet: // Now Send the message with the data packet:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl; cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl;
for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++) for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++)
cout << PMessageBundle[k] << ","; cout << PMessageBundle[k] << ",";
cout << endl; cout << endl;
fflush(stdout); fflush(stdout);
#endif #endif
MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0], MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0],
TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[(*msgInd)]); TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[(*msgInd)]);
(*msgInd)++; (*msgInd)++;
} // End of if size > 0 } // End of if size > 0
} }
} }
#pragma omp task depend(inout \ #pragma omp task depend(inout \
@ -147,64 +146,64 @@ PSizeInfoMessages.resize(numProcs * 3, 0);
{ {
// Free up temporary memory: // Free up temporary memory:
PCumulative.clear(); PCumulative.clear();
QLocalVtx.clear(); QLocalVtx.clear();
QGhostVtx.clear(); QGhostVtx.clear();
QMsgType.clear(); QMsgType.clear();
QOwner.clear(); QOwner.clear();
} }
#pragma omp task depend(inout : OneMessageSize, *BufferSize) depend(out : numMessagesToSend) depend(in : *numGhostEdges) #pragma omp task depend(inout : OneMessageSize, *BufferSize) depend(out : numMessagesToSend) depend(in : *numGhostEdges)
{ {
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2; cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2;
cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled; cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled;
if (*numGhostEdges > 0) if (*numGhostEdges > 0)
{ {
cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n"; cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n";
} }
fflush(stdout); fflush(stdout);
#endif #endif
// Allocate memory for MPI Send messages: // Allocate memory for MPI Send messages:
/* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */ /* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */
OneMessageSize = 0; OneMessageSize = 0;
MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); // Size of one message packet MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); // Size of one message packet
// How many messages to send? // How many messages to send?
// Potentially three kinds of messages will be sent/received: // Potentially three kinds of messages will be sent/received:
// Request, Success, Failure. // Request, Success, Failure.
// But only two will be sent from a given processor. // But only two will be sent from a given processor.
// Substract the number of messages that have already been sent as bundled messages: // Substract the number of messages that have already been sent as bundled messages:
numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled; numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled;
*BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend; *BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend;
} }
#pragma omp task depend(out : Buffer) depend(in : *BufferSize) #pragma omp task depend(out : Buffer) depend(in : *BufferSize)
{ {
Buffer = 0; Buffer = 0;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize; cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize;
cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD; cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD;
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges; cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend; cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend;
cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize); cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize);
cout << "\n(" << myRank << ")Attaching Buffer on.. "; cout << "\n(" << myRank << ")Attaching Buffer on.. ";
fflush(stdout); fflush(stdout);
#endif #endif
if ((*BufferSize) > 0) if ((*BufferSize) > 0)
{ {
Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory
if (Buffer == 0) if (Buffer == 0)
{ {
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n"; cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n";
exit(1); exit(1);
} }
MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer
} }
}
} }
} }
} }
}
Loading…
Cancel
Save