processMatchedVertices parallelized

omp-walther
StefanoPetrilli 2 years ago
parent 71d4cdc319
commit 9ab54adf3f

@ -36,6 +36,7 @@ void processMatchedVertices(
{
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
int option;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
@ -45,56 +46,55 @@ void processMatchedVertices(
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD)
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD)
{
// TODO what would be the optimal UCHUNK
// TODO refactor
vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK);
// TODO what would be the optimal UCHUNK
// TODO refactor
vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK);
while (!U.empty())
{
while (!U.empty())
{
extractUChunk(UChunkBeingProcessed, U, privateU);
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed)
{
for (MilanLongInt u : UChunkBeingProcessed)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
localVertices++;
#endif
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
v = verLocInd[k];
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
#endif
// If the current vertex is pointing to a matched vertex and is not matched
// FIXME is there a way to make candidateMate private?
// for the moment it could generate an error.
if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
{
// If the current vertex is pointing to a matched vertex and is not matched
if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
{
#pragma omp critical
{
if (candidateMate[v - StartIndex] == u)
{
if (candidateMate[v - StartIndex] == u)
{
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
@ -122,171 +122,189 @@ void processMatchedVertices(
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
#pragma omp atomic
(*msgIndPtr)++;
#pragma omp atomic
(*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic
(*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(Counter, Ghost2LocalMap[w], SPtr);
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic
(*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of Else
} // End of if(w >=0)
else
{
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
else option = 4;// End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
}
} // End of task
} // End of If (candidateMate[v-StartIndex] == u
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex]) option = 5; // u is local
} // End of critical
} // End of Else //A Ghost Vertex
switch (option)
{
case -1:
// No things to do
break;
case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic
(*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
case 2:
// Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
#pragma omp atomic
PCounter[ghostOwner]++;
(*msgIndPtr)++;
#pragma omp atomic
(*msgIndPtr)++;
(*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic
(*NumMessagesBundledPtr)++;
(*myCardPtr)++;
break;
case 4:
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
} // End of if(GHOST)
} // End of for loop
} // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
}
} // End of task
} // End of If (candidateMate[v-StartIndex] == u
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
#pragma omp atomic
(*msgIndPtr)++;
#pragma omp atomic
(*NumMessagesBundledPtr)++;
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex])
{ // u is local
} // End of if(GHOST)
} // End of for loop
break;
default:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
PCounter[ghostOwner]++;
#pragma omp atomic
(*msgIndPtr)++;
(*msgIndPtr)++;
#pragma omp atomic
(*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
(*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
} // End of If( v != Mate[u] )
break;
} //End of switch
} // End of task
} // End of Else //A Ghost Vertex
} // End of inner for
} // End of inner for
// TODO privateU.size() < UCHUNK could be commented but it generate errors, why?
if (privateU.size() > UCHUNK || U.empty())
{
// TODO privateU.size() < UCHUNK could be commented but it generate errors, why?
if (privateU.size() > UCHUNK || U.empty())
{
#pragma omp critical(U)
{
while (!privateU.empty())
U.push_back(privateU.pop_back());
}
{
while (!privateU.empty())
U.push_back(privateU.pop_back());
}
#ifndef error
#pragma omp critical(privateMsg)
{
while (!privateQLocalVtx.empty())
{
while (!privateQLocalVtx.empty())
{
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
}
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
}
}
#endif
} // End of private.size()
}
} // End of outer for
} // End of while ( !U.empty() )
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of private.size()
}
} // End of outer for
} // End of while ( !U.empty() )
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices,
omp_get_thread_num(),
myRank);
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices,
omp_get_thread_num(),
myRank);
#endif
} // End of parallel region

Loading…
Cancel
Save