processMatchedVertices rollback to critical regions

omp-walther
StefanoPetrilli 2 years ago
parent 1374f21ba8
commit 71d4cdc319

@ -66,7 +66,7 @@
using namespace std; using namespace std;
#define NUM_THREAD 4 #define NUM_THREAD 4
#define UCHUNK 1000 #define UCHUNK 5
const MilanLongInt REQUEST = 1; const MilanLongInt REQUEST = 1;
const MilanLongInt SUCCESS = 2; const MilanLongInt SUCCESS = 2;
@ -295,7 +295,6 @@ extern "C"
void processMatchedVertices( void processMatchedVertices(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
staticQueue &U, staticQueue &U,
staticQueue &privateU, staticQueue &privateU,
MilanLongInt StartIndex, MilanLongInt StartIndex,
@ -326,25 +325,25 @@ extern "C"
staticQueue &privateQOwner, staticQueue &privateQOwner,
omp_lock_t *MateLock); omp_lock_t *MateLock);
void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, void sendBundledMessages(MilanLongInt *numGhostEdgesPtr,
MilanInt *BufferSizePtr, MilanInt *BufferSizePtr,
MilanLongInt *Buffer, MilanLongInt *Buffer,
vector<MilanLongInt> &PCumulative, vector<MilanLongInt> &PCumulative,
vector<MilanLongInt> &PMessageBundle, vector<MilanLongInt> &PMessageBundle,
vector<MilanLongInt> &PSizeInfoMessages, vector<MilanLongInt> &PSizeInfoMessages,
MilanLongInt *PCounter, MilanLongInt *PCounter,
MilanLongInt NumMessagesBundled, MilanLongInt NumMessagesBundled,
MilanLongInt *msgActualPtr, MilanLongInt *msgActualPtr,
MilanLongInt *MessageIndexPtr, MilanLongInt *MessageIndexPtr,
MilanInt numProcs, MilanInt numProcs,
MilanInt myRank, MilanInt myRank,
MPI_Comm comm, MPI_Comm comm,
vector<MilanLongInt> &QLocalVtx, vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx, vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType, vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner, vector<MilanInt> &QOwner,
vector<MPI_Request> &SRequest, vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus); vector<MPI_Status> &SStatus);
void processMessages( void processMessages(
MilanLongInt NLVer, MilanLongInt NLVer,

@ -278,12 +278,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
/////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK);
//#define PRINT_DEBUG_INFO_
processMatchedVertices(NLVer, processMatchedVertices(NLVer,
UChunkBeingProcessed, //UChunkBeingProcessed,
U, U,
privateU, privateU,
StartIndex, StartIndex,

@ -1,5 +1,7 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
//TODO parallelize this
///Find the owner of a ghost node: ///Find the owner of a ghost node:
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs) { MilanInt myRank, MilanInt numProcs) {

@ -4,7 +4,6 @@
void processMatchedVertices( void processMatchedVertices(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
staticQueue &U, staticQueue &U,
staticQueue &privateU, staticQueue &privateU,
MilanLongInt StartIndex, MilanLongInt StartIndex,
@ -37,7 +36,6 @@ void processMatchedVertices(
{ {
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl; cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout); fflush(stdout);
@ -50,276 +48,246 @@ void processMatchedVertices(
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) #pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD)
{ {
// TODO what would be the optimal UCHUNK // TODO what would be the optimal UCHUNK
// TODO refactor // TODO refactor
vector<MilanLongInt> UChunkBeingProcessed; vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK); UChunkBeingProcessed.reserve(UCHUNK);
while (!U.empty()) while (!U.empty())
{ {
extractUChunk(UChunkBeingProcessed, U, privateU); extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed) for (MilanLongInt u : UChunkBeingProcessed)
{ {
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u; cout << "\n(" << myRank << ")u: " << u;
fflush(stdout); fflush(stdout);
#endif #endif
if ((u >= StartIndex) && (u <= EndIndex)) if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices { // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
localVertices++; localVertices++;
#endif #endif
// Get the Adjacency list for u // Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1]; adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++) for (k = adj1; k < adj2; k++)
{ {
v = verLocInd[k]; v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex)) if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex: { // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout); fflush(stdout);
#endif #endif
// If the current vertex is pointing to a matched vertex and is not matched // If the current vertex is pointing to a matched vertex and is not matched
// FIXME is there a way to make candidateMate private? // FIXME is there a way to make candidateMate private?
// for the moment it could generate an error. // for the moment it could generate an error.
if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
{
bool seh = false;
#pragma omp critical(prova)
{
seh = candidateMate[v - StartIndex] != u;
}
if (seh)
continue;
#pragma omp critical(prova)
{ {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) #pragma omp critical
w = computeCandidateMate(verLocPtr[v - StartIndex], {
verLocPtr[v - StartIndex + 1], if (candidateMate[v - StartIndex] == u)
edgeLocWeight, 0, {
verLocInd, // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
StartIndex, w = computeCandidateMate(verLocPtr[v - StartIndex],
EndIndex, verLocPtr[v - StartIndex + 1],
GMate, edgeLocWeight, 0,
Mate, verLocInd,
Ghost2LocalMap); StartIndex,
EndIndex,
candidateMate[v - StartIndex] = w; GMate,
} Mate,
Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w; cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout); fflush(stdout);
#endif #endif
// If found a dominating edge: // If found a dominating edge:
if (w >= 0) if (w >= 0)
{ {
if ((w < StartIndex) || (w > EndIndex)) if ((w < StartIndex) || (w > EndIndex))
{ // A ghost { // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:"; cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); assert(ghostOwner != -1);
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
#pragma omp atomic #pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic #pragma omp atomic
(*msgIndPtr)++; (*msgIndPtr)++;
#pragma omp atomic #pragma omp atomic
(*NumMessagesBundledPtr)++; (*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST); privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
#pragma omp critical(prova) if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{ {
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) Mate[v - StartIndex] = w; // v is a local vertex
{ GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
while (!omp_test_lock(&MateLock[v - StartIndex])) privateU.push_back(v);
; privateU.push_back(w);
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
// Q.push_back(u);
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic #pragma omp atomic
(*myCardPtr)++; (*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
// Decrement the counter: // Decrement the counter:
PROCESS_CROSS_EDGE(Counter, Ghost2LocalMap[w], SPtr); PROCESS_CROSS_EDGE(Counter, Ghost2LocalMap[w], SPtr);
omp_unset_lock(&MateLock[v - StartIndex]); } // End of if CandidateMate[w] = v
} // End of if CandidateMate[w] = v } // End of if a Ghost Vertex
} else
} // End of if a Ghost Vertex { // w is a local vertex
else if (candidateMate[w - StartIndex] == v)
{ // w is a local vertex {
#pragma omp critical(prova) Mate[v - StartIndex] = w; // v is a local vertex
{ Mate[w - StartIndex] = v; // w is a local vertex
if (candidateMate[w - StartIndex] == v) privateU.push_back(v);
{ privateU.push_back(w);
while (!omp_test_lock(&MateLock[v - StartIndex]))
;
while (!omp_test_lock(&MateLock[w - StartIndex]))
;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
// Q.push_back(u);
privateU.push_back(v);
privateU.push_back(w);
#pragma omp atomic #pragma omp atomic
(*myCardPtr)++; (*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
#endif #endif
omp_unset_lock(&MateLock[v - StartIndex]); } // End of if(CandidateMate(w) = v
omp_unset_lock(&MateLock[w - StartIndex]); } // End of Else
} // End of if(CandidateMate(w) = v
} } // End of if(w >=0)
} // End of Else else
{
} // End of if(w >=0) adj11 = verLocPtr[v - StartIndex];
else adj12 = verLocPtr[v - StartIndex + 1];
{ for (k1 = adj11; k1 < adj12; k1++)
adj11 = verLocPtr[v - StartIndex]; {
adj12 = verLocPtr[v - StartIndex + 1]; w = verLocInd[k1];
for (k1 = adj11; k1 < adj12; k1++) if ((w < StartIndex) || (w > EndIndex))
{ { // A ghost
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: "; cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); assert(ghostOwner != -1);
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
#pragma omp atomic #pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic #pragma omp atomic
(*msgIndPtr)++; (*msgIndPtr)++;
#pragma omp atomic #pragma omp atomic
(*NumMessagesBundledPtr)++; (*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE); privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
} // End of if(GHOST) } // End of if(GHOST)
} // End of for loop } // End of for loop
} // End of Else: w == -1 } // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
}
} // End of If (candidateMate[v-StartIndex] == u } // End of task
} // End of If (candidateMate[v-StartIndex] == u
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
{ // Neighbor is a ghost vertex else
{ // Neighbor is a ghost vertex
while (!omp_test_lock(&MateLock[u - StartIndex]))
; #pragma omp critical
#pragma omp critical(prova) {
{ if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
candidateMate[NLVer + Ghost2LocalMap[v]] = -1; if (v != Mate[u - StartIndex])
} { // u is local
if (v != Mate[u - StartIndex])
{ // u is local
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: "; cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
assert(ghostOwner != -1); assert(ghostOwner != -1);
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
#pragma omp atomic #pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic #pragma omp atomic
(*msgIndPtr)++; (*msgIndPtr)++;
#pragma omp atomic #pragma omp atomic
(*NumMessagesBundledPtr)++; (*NumMessagesBundledPtr)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v); privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS); privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner); privateQOwner.push_back(ghostOwner);
} // End of If( v != Mate[u] )
omp_unset_lock(&MateLock[u - StartIndex]);
} // End of Else //A Ghost Vertex } // End of If( v != Mate[u] )
} // End of for } // End of task
} // End of Else //A Ghost Vertex
} // End of inner for
// TODO commenting that part of code might generate errors // TODO privateU.size() < UCHUNK could be commented but it generate errors, why?
// Ask for the critical section only when there are no more data to if (privateU.size() > UCHUNK || U.empty())
// compute. {
if (/*privateU.size() < UCHUNK &&*/ !U.empty())
continue;
#pragma omp critical(U) #pragma omp critical(U)
{ {
while (!privateU.empty()) while (!privateU.empty())
U.push_back(privateU.pop_back()); U.push_back(privateU.pop_back());
} }
#ifndef error #ifndef error
#pragma omp critical(privateMsg) #pragma omp critical(privateMsg)
{ {
while (!privateQLocalVtx.empty()) while (!privateQLocalVtx.empty())
{ {
QLocalVtx.push_back(privateQLocalVtx.pop_back()); QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back()); QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back()); QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back()); QOwner.push_back(privateQOwner.pop_back());
} }
} }
#endif #endif
} } // End of private.size()
} }
} // End of while ( !U.empty() ) } // End of outer for
queuesTransfer(U, privateU, QLocalVtx, } // End of while ( !U.empty() )
QGhostVtx, queuesTransfer(U, privateU, QLocalVtx,
QMsgType, QOwner, privateQLocalVtx, QGhostVtx,
privateQGhostVtx, QMsgType, QOwner, privateQLocalVtx,
privateQMsgType, privateQGhostVtx,
privateQOwner); privateQMsgType,
privateQOwner);
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n", printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices, localVertices,
omp_get_thread_num(), omp_get_thread_num(),
myRank); myRank);
#endif #endif
} } // End of parallel region
} }

Loading…
Cancel
Save