fix private queues in PARALLEL_PROCESS_EXPOSED_VERTEX_B

omp-walther
StefanoPetrilli 3 years ago
parent 63b7602d3a
commit 6dcae6d0c1

@ -274,35 +274,35 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
edgeLocWeight,
candidateMate);
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer,
candidateMate,
verLocInd,
verLocPtr,
StartIndex,
EndIndex,
Mate,
GMate,
Ghost2LocalMap,
edgeLocWeight,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
U,
privateU,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer,
candidateMate,
verLocInd,
verLocPtr,
StartIndex,
EndIndex,
Mate,
GMate,
Ghost2LocalMap,
edgeLocWeight,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
U,
privateU,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
tempCounter.clear(); // Do not need this any more
@ -455,6 +455,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// TODO refactor this
// Decrement the counter:
// Start: PARALLEL_PROCESS_CROSS_EDGE_B(v,w)
if (Counter[Ghost2LocalMap[w]] > 0)
@ -579,29 +581,25 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
// Avoid to ask for the critical section if there is nothing to add
if (privateU.size() < UCHUNK && !U.empty())
continue;
#pragma omp critical(U)
{
while (!privateU.empty())
{
U.push_back(privateU.pop_front());
}
myCard += privateMyCard;
} // End of critical U
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
}
} // End of while ( /*!Q.empty()*/ !U.empty() )
#pragma omp critical(privateMsg)
#pragma omp critical
{
while (!privateQLocalVtx.empty())
{
QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_front());
}
myCard += privateMyCard;
}
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",

@ -11,14 +11,9 @@
/*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
* The sequential version could be a bit more
* efficient.
* TODO: write comment
*
* TODO: Maybe it is possible to append the values of QLocalVtx, QGhostVtx, QMsgType and QOwner
* first in a local variable and then, only at the end, append them to the real data structure
* to remove the critical sections.
*
* TODO: Test when it's more efficient to execute this code
* TODO: Test when it's actually more efficient to execute this code
* in parallel.
*/
@ -119,22 +114,11 @@ inline void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
/*
//TODO why does it fail if I use a private data structure???
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
*/
#pragma omp critical(MSG)
{
QLocalVtx.push_back(v + StartIndex);
QGhostVtx.push_back(w);
QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner);
} // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex)
{
@ -149,6 +133,8 @@ inline void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")";
fflush(stdout);
#endif
//TODO refactor this!!
// Decrement the counter:
// Start: PARALLEL_PROCESS_CROSS_EDGE_B(v)
#pragma omp critical
@ -218,10 +204,11 @@ inline void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
assert(ghostOwner != -1);
assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
QLocalVtx.push_back(v + StartIndex);
QGhostVtx.push_back(w);
QMsgType.push_back(FAILURE);
QOwner.push_back(ghostOwner);
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop

@ -22,17 +22,17 @@ inline void queuesTransfer(staticQueue &U,
#pragma omp critical(U)
{
while (!privateU.empty())
U.push_back(privateU.pop_front());
U.push_back(privateU.pop_back());
}
#pragma omp critical(privateMsg)
{
while (!privateQLocalVtx.empty())
{
QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_front());
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
}
}
}
Loading…
Cancel
Save