Refactoring private queues, still not working

omp-walther
StefanoPetrilli 3 years ago
parent deab695294
commit b5e52d31f5

@ -179,7 +179,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr, MilanLongInt* numGhostEdgesPtr,
MilanLongInt* numGhostVerticesPtr, MilanLongInt* numGhostVerticesPtr,
MilanLongInt* insertMePtr,
MilanLongInt* S, MilanLongInt* S,
MilanLongInt* verLocInd, MilanLongInt* verLocInd,
MilanLongInt* verLocPtr, MilanLongInt* verLocPtr,
@ -196,7 +195,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
vector<MilanLongInt>& QMsgType, vector<MilanLongInt>& QMsgType,
vector<MilanInt>& QOwner, vector<MilanInt>& QOwner,
MilanLongInt* &candidateMate, MilanLongInt* &candidateMate,
staticQueue& U staticQueue& U,
staticQueue& privateU,
staticQueue& privateQLocalVtx,
staticQueue& privateQGhostVtx,
staticQueue& privateQMsgType,
staticQueue& privateQOwner
); );
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP

@ -185,7 +185,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//Build the Ghost Vertex Set: Vg //Build the Ghost Vertex Set: Vg
map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex
vector <MilanLongInt> Counter; //Store the edge count for each ghost vertex vector <MilanLongInt> Counter; //Store the edge count for each ghost vertex
MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices MilanLongInt numGhostVertices = 0, numGhostEdges = 0; //Number of Ghost vertices
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
@ -218,7 +218,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
initialize(NLVer, NLEdge, StartIndex, initialize(NLVer, NLEdge, StartIndex,
EndIndex, &numGhostEdges, EndIndex, &numGhostEdges,
&numGhostVertices, &insertMe, &S, &numGhostVertices, &S,
verLocInd, verLocPtr, verLocInd, verLocPtr,
MateLock, MateLock,
Ghost2LocalMap, Counter, Ghost2LocalMap, Counter,
@ -226,20 +226,27 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
tempCounter, GMate, tempCounter, GMate,
Message, QLocalVtx, Message, QLocalVtx,
QGhostVtx, QMsgType, QOwner, QGhostVtx, QMsgType, QOwner,
candidateMate, U); candidateMate, U,
privateU,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner
);
finishTime = MPI_Wtime(); finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; //Time taken for Phase-0: Initialization *ph0_time = finishTime - startTime; //Time taken for Phase-0: Initialization
startTime = MPI_Wtime(); startTime = MPI_Wtime();
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// INITIALIZATION ///////////////////////////////////// //////////////////////////////////// INITIALIZATION /////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
//Compute the Initial Matching Set: //Compute the Initial Matching Set:
#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) #pragma omp parallel private(k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateMyCard, isEmpty) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(4)
{ {
/* /*
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
@ -272,21 +279,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
*/ */
MilanLongInt size = numGhostVertices; //TODO how can I decide a more meaningfull size?
//Fail messages
privateQLocalVtx.~staticQueue();
privateQGhostVtx.~staticQueue();
privateQMsgType.~staticQueue();
privateQOwner.~staticQueue();
privateU.~staticQueue();
new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
new(&privateQLocalVtx) staticQueue(size);
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size);
#pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static) #pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static)
for (v = 0; v < NLVer; v++) { for (v = 0; v < NLVer; v++) {
//Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
@ -334,8 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
//TODO why does it fail if I use a private data structure??? //TODO why does it fail if I use a private data structure???
/* /*
privateQLocalVtx.push_back(v + StartIndex); privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
@ -351,6 +343,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
QMsgType.push_back(REQUEST); QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner); QOwner.push_back(ghostOwner);
} // end of critical region } // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {

@ -80,9 +80,11 @@ class staticQueue
MilanLongInt squeueTail; MilanLongInt squeueTail;
MilanLongInt NumNodes; MilanLongInt NumNodes;
//FIXME I had to comment this piece of code in order to make everything work.
// why?
//Prevent Assignment and Pass by Value: //Prevent Assignment and Pass by Value:
staticQueue(const staticQueue& src); //staticQueue(const staticQueue& src);
staticQueue& operator=(const staticQueue& rhs); //staticQueue& operator=(const staticQueue& rhs);
public: public:
//Constructors and Destructors //Constructors and Destructors

@ -12,7 +12,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr, MilanLongInt* numGhostEdgesPtr,
MilanLongInt* numGhostVerticesPtr, MilanLongInt* numGhostVerticesPtr,
MilanLongInt* insertMePtr,
MilanLongInt* S, MilanLongInt* S,
MilanLongInt* verLocInd, MilanLongInt* verLocInd,
MilanLongInt* verLocPtr, MilanLongInt* verLocPtr,
@ -29,7 +28,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
vector<MilanLongInt>& QMsgType, vector<MilanLongInt>& QMsgType,
vector<MilanInt>& QOwner, vector<MilanInt>& QOwner,
MilanLongInt* &candidateMate, MilanLongInt* &candidateMate,
staticQueue& U staticQueue& U,
staticQueue& privateU,
staticQueue& privateQLocalVtx,
staticQueue& privateQGhostVtx,
staticQueue& privateQMsgType,
staticQueue& privateQOwner
) )
{ {
@ -37,7 +41,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt adj1, adj2; MilanLongInt adj1, adj2;
int i, v, k, w; int i, v, k, w;
// index that starts with zero to |Vg| - 1 // index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready; map<MilanLongInt, MilanLongInt>::iterator storedAlready;
@ -64,10 +67,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* *
* Despite the critical region it is still productive to * Despite the critical region it is still productive to
* parallelize this for because the critical region is exeuted * parallelize this for because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority. * only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests.
*/ */
// TODO comments about the reduction
#pragma omp for reduction(+ : numGhostEdges) #pragma omp for reduction(+ : numGhostEdges)
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i]; insertMe = verLocInd[i];
@ -90,8 +92,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) } //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices) } //End of for(ghost vertices)
#pragma omp single #pragma omp single
{ {
//numGhostEdges = atomicNumGhostEdges; //numGhostEdges = atomicNumGhostEdges;
@ -143,7 +143,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
/* /*
* Not parallelizable * Not parallelizable
*/ */
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
@ -163,6 +162,10 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
/* /*
* OMP verGhostIndInitialization * OMP verGhostIndInitialization
* *
@ -175,13 +178,8 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* Despite the critical region it's still useful to * Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes * parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed * are a minority hence the critical region is executed
* few times. * few times, circa 3.5% of the times in the tests.
*/ */
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
#pragma omp for nowait schedule(static) #pragma omp for nowait schedule(static)
for (v = 0; v < NLVer; v++) { for (v = 0; v < NLVer; v++) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
@ -192,17 +190,14 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp critical #pragma omp critical
{ {
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
} }
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
} //End of if((w < StartIndex) || (w > EndIndex)) } //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } //End of for(k)
} //End of for (v) } //End of for (v)
}
#pragma omp single } // End of parallel region
{
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
@ -216,11 +211,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout<<endl; fflush(stdout); cout<<endl; fflush(stdout);
#endif #endif
Message.resize(3, -1);
//message_type = 0;
//NumMessagesBundled = 0;
//ghostOwner = 0;
try { try {
QLocalVtx.reserve(numGhostEdges); //Local Vertex QLocalVtx.reserve(numGhostEdges); //Local Vertex
QGhostVtx.reserve(numGhostEdges); //Ghost Vertex QGhostVtx.reserve(numGhostEdges); //Ghost Vertex
@ -232,23 +222,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
exit(1); exit(1);
} }
} // end of single region
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout); cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
#endif #endif
*numGhostEdgesPtr = numGhostEdges; *numGhostEdgesPtr = numGhostEdges;
*numGhostVerticesPtr = numGhostVertices; *numGhostVerticesPtr = numGhostVertices;
*insertMePtr = insertMe;
//Allocate Data Structures: //Allocate Data Structures:
/* /*
* candidateMate was a vector and has been replaced with a raw array * candidateMate was a vector and has been replaced with a raw array
* there is no point in using the vector (or maybe there is???) * there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster * so I replaced it with an array wich is slightly faster
*/ */
//candidateMate = new MilanLongInt[NLVer + numGhostVertices];
candidateMate = new MilanLongInt[NLVer + numGhostVertices]; candidateMate = new MilanLongInt[NLVer + numGhostVertices];
@ -267,7 +253,6 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
*S = numGhostVertices; //Initialize S with number of Ghost Vertices *S = numGhostVertices; //Initialize S with number of Ghost Vertices
/* /*
* Create the Queue Data Structure for the Dominating Set * Create the Queue Data Structure for the Dominating Set
* *
@ -276,6 +261,14 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
* of a staticQueue I had to destroy the previous object and instantiate * of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size. * a new one of the correct size.
*/ */
U.~staticQueue();
new(&U) staticQueue(NLVer + numGhostVertices); new(&U) staticQueue(NLVer + numGhostVertices);
//TODO how can I decide a more meaningfull size?
MilanLongInt size = numGhostVertices;
new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
new(&privateQLocalVtx) staticQueue(size);
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size);
} }

Loading…
Cancel
Save