Further optimizations PARALLEL_PROCESS_EXPOSED_VERTEX_B

omp-walther
StefanoPetrilli 3 years ago
parent e2ca97ca47
commit b079d71f30

@ -164,6 +164,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
for (int i = 0; i < numProcs; i++) for (int i = 0; i < numProcs; i++)
PCounter[i] = 0; PCounter[i] = 0;
MilanLongInt NumMessagesBundled; MilanLongInt NumMessagesBundled;
MilanInt ghostOwner; // Changed by Fabio to be an integer, addresses needs to be integers! MilanInt ghostOwner; // Changed by Fabio to be an integer, addresses needs to be integers!
//vector<MilanLongInt> candidateMate; //vector<MilanLongInt> candidateMate;
@ -213,15 +214,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt privateMyCard = 0; MilanLongInt privateMyCard = 0;
staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner; staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner;
/*
staticQueue privateReqQLocalVtx, privateReqQGhostVtx, privateReqQMsgType, privateReqQOwner;
*/
bool isEmpty; bool isEmpty;
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
#endif #endif
#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner /*, privateReqQLocalVtx, privateReqQGhostVtx, privateReqQMsgType, privateReqQOwner*/) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) #pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
{ {
// TODO comments about the reduction // TODO comments about the reduction
@ -402,7 +400,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* Create the Queue Data Structure for the Dominating Set * Create the Queue Data Structure for the Dominating Set
* *
* I had to declare the staticuQueue U before the parallel region * I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't chane the dimension * to have it in the correct scope. Since we can't change the dimension
* of a staticQueue I had to destroy the previous object and instantiate * of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size. * a new one of the correct size.
*/ */
@ -462,102 +460,103 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* in parallel. * in parallel.
*/ */
MilanLongInt size = numGhostEdges; //TODO how can I decide a meaningfull size? MilanLongInt size = numGhostVertices; //TODO how can I decide a more meaningfull size?
//Fail messages //Fail messages
privateQLocalVtx.~staticQueue(); privateQLocalVtx.~staticQueue();
privateQGhostVtx.~staticQueue(); privateQGhostVtx.~staticQueue();
privateQMsgType.~staticQueue(); privateQMsgType.~staticQueue();
privateQOwner.~staticQueue(); privateQOwner.~staticQueue();
//Request messages privateU.~staticQueue();
/*
privateReqQLocalVtx.~staticQueue(); new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
privateReqQGhostVtx.~staticQueue();
privateReqQMsgType.~staticQueue();
privateReqQOwner.~staticQueue();
*/
new(&privateQLocalVtx) staticQueue(size); new(&privateQLocalVtx) staticQueue(size);
new(&privateQGhostVtx) staticQueue(size); new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size); new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size); new(&privateQOwner) staticQueue(size);
/*
new(&privateReqQLocalVtx) staticQueue(size);
new(&privateReqQGhostVtx) staticQueue(size);
new(&privateReqQMsgType) staticQueue(size);
new(&privateReqQOwner) staticQueue(size);
*/
#pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static) #pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static)
for ( v=0; v < NLVer; v++ ) for (v = 0; v < NLVer; v++) {
{ //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
//Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) k = candidateMate[v];
k = candidateMate[v]; candidateMate[v] = verLocInd[k];
candidateMate[v] = verLocInd[k]; w = candidateMate[v];
w = candidateMate[v];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout); cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")"<<v+StartIndex<<" Points to: "<<w; fflush(stdout); cout<<"\n("<<myRank<<")"<<v+StartIndex<<" Points to: "<<w; fflush(stdout);
#endif #endif
//If found a dominating edge:
if (w >= 0) {
//If found a dominating edge: if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
if (w >= 0) { w = computeCandidateMate(verLocPtr[v],
verLocPtr[v + 1],
//This piece of code is actually executed under 0.01% of the times edgeLocWeight, 0,
verLocInd,
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { StartIndex,
w = computeCandidateMate(verLocPtr[v], EndIndex,
verLocPtr[v + 1], GMate,
edgeLocWeight, 0, Mate,
verLocInd, Ghost2LocalMap);
StartIndex, candidateMate[v] = w;
EndIndex, }
GMate,
Mate,
Ghost2LocalMap);
candidateMate[v] = w;
}
if (w >= 0) { if (w >= 0) {
myCard++; myCard++;
if ((w < StartIndex) || (w > EndIndex)) { //w is a ghost vertex if ((w < StartIndex) || (w > EndIndex)) { //w is a ghost vertex
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Sending a request message (291):"; cout<<"\n("<<myRank<<")Sending a request message (291):";
cout<<"\n("<<myRank<<")Local is: "<<v+StartIndex<<" Ghost is "<<w<<" Owner is: "<< findOwnerOfGhost(w, verDistance, myRank, numProcs) <<endl; cout<<"\n("<<myRank<<")Local is: "<<v+StartIndex<<" Ghost is "<<w<<" Owner is: "<< findOwnerOfGhost(w, verDistance, myRank, numProcs) <<endl;
fflush(stdout); fflush(stdout);
#endif #endif
msgInd++; msgInd++;
NumMessagesBundled++; NumMessagesBundled++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); assert(ghostOwner != -1);
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp critical(Mate)
{
QLocalVtx.push_back(v + StartIndex); //TODO whyyyyy does it fail if I use a private data structure???
QGhostVtx.push_back(w); /*
QMsgType.push_back(REQUEST); privateQLocalVtx.push_back(v + StartIndex);
QOwner.push_back(ghostOwner); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
*/
#pragma omp critical(MSG)
{
QLocalVtx.push_back(v + StartIndex);
QGhostVtx.push_back(w);
QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner);
} // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
Mate[v] = w; privateU.push_back(v + StartIndex);
GMate[Ghost2LocalMap[w]] = v + StartIndex; //w is a Ghost privateU.push_back(w);
U.push_back(v + StartIndex); Mate[v] = w;
U.push_back(w); //FIXME could this instruction create errors?
GMate[Ghost2LocalMap[w]] = v + StartIndex; //w is a Ghost
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")MATCH: ("<<v+StartIndex<<","<<w<<")"; fflush(stdout); cout<<"\n("<<myRank<<")MATCH: ("<<v+StartIndex<<","<<w<<")"; fflush(stdout);
#endif #endif
//Decrement the counter: //Decrement the counter:
//Start: PARALLEL_PROCESS_CROSS_EDGE_B(v) //Start: PARALLEL_PROCESS_CROSS_EDGE_B(v)
if (Counter[Ghost2LocalMap[w]] > 0) { #pragma omp critical
{
if (Counter[Ghost2LocalMap[w]] > 0) {
Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement Counter[Ghost2LocalMap[w]] -= 1; //Decrement
if (Counter[Ghost2LocalMap[w]] == 0) { if (Counter[Ghost2LocalMap[w]] == 0) {
S--; //Decrement S S--; //Decrement S
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
@ -565,79 +564,86 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
fflush(stdout); fflush(stdout);
#endif #endif
} }
} //End of if Counter[w] > 0 }
//End: PARALLEL_PROCESS_CROSS_EDGE_B(v) } //End of if Counter[w] > 0
} //End of if CandidateMate[w] = v //End: PARALLEL_PROCESS_CROSS_EDGE_B(v)
} // end of critical region } //End of if CandidateMate[w] = v
} //End of if a Ghost Vertex
else { // w is a local vertex
} //End of if a Ghost Vertex
if (candidateMate[w - StartIndex] == (v + StartIndex)) { else { // w is a local vertex
#pragma omp critical(Mate)
{ if (candidateMate[w - StartIndex] == (v + StartIndex)) {
Mate[v] = w; //v is local privateU.push_back(v + StartIndex);
Mate[w - StartIndex] = v + StartIndex; //w is local privateU.push_back(w);
//Q.push_back(u);
U.push_back(v + StartIndex); Mate[v] = w; //v is local
U.push_back(w); //FIXME this instruction could create errors
Mate[w - StartIndex] = v + StartIndex; //w is local
#ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")MATCH: ("<<v+StartIndex<<","<<w<<") "; fflush(stdout);
#endif #ifdef PRINT_DEBUG_INFO_
} //End of critical cout<<"\n("<<myRank<<")MATCH: ("<<v+StartIndex<<","<<w<<") "; fflush(stdout);
#endif
} //End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
} //End of Else } //End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
continue; } //End of Else
} //End of second if continue;
} //End of if(w >=0) } //End of second if
//if (w < 0) { -- if it arrives here this one if is useless, it is certainly -1 } //End of if(w >=0)
adj11 = verLocPtr[v];
adj12 = verLocPtr[v + 1]; //This piece of code is executed a really small amount of times, I will not allocate a
for (k1 = adj11; k1 < adj12; k1++) { //huge amount of memory to the private data structures.
w = verLocInd[k1]; adj11 = verLocPtr[v];
if ((w < StartIndex) || (w > EndIndex)) { //A ghost adj12 = verLocPtr[v + 1];
//Build the Message Packet: for (k1 = adj11; k1 < adj12; k1++) {
//Message[0] = v+StartIndex; //LOCAL w = verLocInd[k1];
//Message[1] = w; //GHOST if ((w < StartIndex) || (w > EndIndex)) { //A ghost
//Message[2] = FAILURE; //TYPE
//Send a Request (Asynchronous) #ifdef PRINT_DEBUG_INFO_
#ifdef PRINT_DEBUG_INFO_ cout<<"\n("<<myRank<<")Sending a failure message: ";
cout<<"\n("<<myRank<<")Sending a failure message: "; cout<<"\n("<<myRank<<")Ghost is "<<w<<" Owner is: "<<findOwnerOfGhost(w, verDistance, myRank, numProcs);
cout<<"\n("<<myRank<<")Ghost is "<<w<<" Owner is: "<<findOwnerOfGhost(w, verDistance, myRank, numProcs); fflush(stdout);
fflush(stdout);
#endif #endif
/* MPI_Bsend(&Message[0], 3, MPI_INT, inputSubGraph.findOwner(w),
ComputeTag, comm); */ msgInd++;
NumMessagesBundled++; NumMessagesBundled++;
msgInd++; ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); assert(ghostOwner != -1);
assert(ghostOwner != -1); assert(ghostOwner != myRank);
assert(ghostOwner != myRank); PCounter[ghostOwner]++;
PCounter[ghostOwner]++; QLocalVtx.push_back(v + StartIndex);
privateQLocalVtx.push_back(v + StartIndex); QGhostVtx.push_back(w);
privateQGhostVtx.push_back(w); QMsgType.push_back(FAILURE);
privateQMsgType.push_back(FAILURE); QOwner.push_back(ghostOwner);
privateQOwner.push_back(ghostOwner);
} //End of if(GHOST)
} //End of if(GHOST) } //End of for loop
} //End of for loop //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
//} // End of Else: w == -1 } //End of for ( v=0; v < NLVer; v++ )
//End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} //End of for ( v=0; v < NLVer; v++ )
#pragma omp critical(privateMsg) #pragma omp critical(privateMsg)
{ {
while (!privateQLocalVtx.empty()) while (!privateQLocalVtx.empty())
{
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
}
}
#pragma omp critical(U)
{ {
QLocalVtx.push_back(privateQLocalVtx.pop_front()); while (!privateU.empty())
QGhostVtx.push_back(privateQGhostVtx.pop_front()); {
QMsgType.push_back(privateQMsgType.pop_front()); U.push_back(privateU.pop_front());
QOwner.push_back(privateQOwner.pop_front()); }
} }
}
tempCounter.clear(); //Do not need this any more tempCounter.clear(); //Do not need this any more
@ -649,10 +655,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
privateU.~staticQueue();
new(&privateU) staticQueue(1000); //TODO how can I put a meaningfull size?
isEmpty = false; isEmpty = false;
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX

Loading…
Cancel
Save