Refactoring private queues, still not working

omp-walther
StefanoPetrilli 2 years ago
parent deab695294
commit b5e52d31f5

@ -179,7 +179,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr,
MilanLongInt* numGhostVerticesPtr,
MilanLongInt* insertMePtr,
MilanLongInt* S,
MilanLongInt* verLocInd,
MilanLongInt* verLocPtr,
@ -196,7 +195,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
vector<MilanLongInt>& QMsgType,
vector<MilanInt>& QOwner,
MilanLongInt* &candidateMate,
staticQueue& U
staticQueue& U,
staticQueue& privateU,
staticQueue& privateQLocalVtx,
staticQueue& privateQGhostVtx,
staticQueue& privateQMsgType,
staticQueue& privateQOwner
);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP

@ -185,7 +185,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//Build the Ghost Vertex Set: Vg
map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex
vector <MilanLongInt> Counter; //Store the edge count for each ghost vertex
MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices
MilanLongInt numGhostVertices = 0, numGhostEdges = 0; //Number of Ghost vertices
#ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
@ -218,7 +218,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
initialize(NLVer, NLEdge, StartIndex,
EndIndex, &numGhostEdges,
&numGhostVertices, &insertMe, &S,
&numGhostVertices, &S,
verLocInd, verLocPtr,
MateLock,
Ghost2LocalMap, Counter,
@ -226,20 +226,27 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
tempCounter, GMate,
Message, QLocalVtx,
QGhostVtx, QMsgType, QOwner,
candidateMate, U);
candidateMate, U,
privateU,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner
);
finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; //Time taken for Phase-0: Initialization
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
//Compute the Initial Matching Set:
#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
#pragma omp parallel private(k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateMyCard, isEmpty) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(4)
{
/*
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
@ -272,21 +279,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
*/
MilanLongInt size = numGhostVertices; //TODO how can I decide a more meaningfull size?
//Fail messages
privateQLocalVtx.~staticQueue();
privateQGhostVtx.~staticQueue();
privateQMsgType.~staticQueue();
privateQOwner.~staticQueue();
privateU.~staticQueue();
new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
new(&privateQLocalVtx) staticQueue(size);
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size);
#pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static)
for (v = 0; v < NLVer; v++) {
//Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
@ -334,8 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
//TODO why does it fail if I use a private data structure???
/*
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
@ -351,6 +343,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner);
} // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {

@ -80,9 +80,11 @@ class staticQueue
MilanLongInt squeueTail;
MilanLongInt NumNodes;
//FIXME I had to comment this piece of code in order to make everything work.
// why?
//Prevent Assignment and Pass by Value:
staticQueue(const staticQueue& src);
staticQueue& operator=(const staticQueue& rhs);
//staticQueue(const staticQueue& src);
//staticQueue& operator=(const staticQueue& rhs);
public:
//Constructors and Destructors

@ -12,7 +12,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr,
MilanLongInt* numGhostVerticesPtr,
MilanLongInt* insertMePtr,
MilanLongInt* S,
MilanLongInt* verLocInd,
MilanLongInt* verLocPtr,
@ -29,7 +28,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
vector<MilanLongInt>& QMsgType,
vector<MilanInt>& QOwner,
MilanLongInt* &candidateMate,
staticQueue& U
staticQueue& U,
staticQueue& privateU,
staticQueue& privateQLocalVtx,
staticQueue& privateQGhostVtx,
staticQueue& privateQMsgType,
staticQueue& privateQOwner
)
{
@ -37,7 +41,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt adj1, adj2;
int i, v, k, w;
// index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready;
@ -64,10 +67,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
*
* Despite the critical region it is still productive to
* parallelize this for because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority.
* only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests.
*/
// TODO comments about the reduction
#pragma omp for reduction(+ : numGhostEdges)
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i];
@ -90,8 +92,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices)
#pragma omp single
{
//numGhostEdges = atomicNumGhostEdges;
@ -143,7 +143,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
/*
* Not parallelizable
*/
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_
@ -163,6 +162,10 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
fflush(stdout);
#endif
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
/*
* OMP verGhostIndInitialization
*
@ -175,13 +178,8 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times.
* few times, circa 3.5% of the times in the tests.
*/
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
#pragma omp for nowait schedule(static)
for (v = 0; v < NLVer; v++) {
adj1 = verLocPtr[v]; //Vertex Pointer
@ -192,17 +190,14 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp critical
{
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
}
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
} //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k)
} //End of for (v)
}
#pragma omp single
{
} // End of parallel region
#ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
@ -216,11 +211,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout<<endl; fflush(stdout);
#endif
Message.resize(3, -1);
//message_type = 0;
//NumMessagesBundled = 0;
//ghostOwner = 0;
try {
QLocalVtx.reserve(numGhostEdges); //Local Vertex
QGhostVtx.reserve(numGhostEdges); //Ghost Vertex
@ -232,23 +222,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
exit(1);
}
} // end of single region
#ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
#endif
*numGhostEdgesPtr = numGhostEdges;
*numGhostVerticesPtr = numGhostVertices;
*insertMePtr = insertMe;
//Allocate Data Structures:
/*
* candidateMate was a vector and has been replaced with a raw array
* there is no point in using the vector (or maybe there is???)
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
//candidateMate = new MilanLongInt[NLVer + numGhostVertices];
candidateMate = new MilanLongInt[NLVer + numGhostVertices];
@ -267,7 +253,6 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
*S = numGhostVertices; //Initialize S with number of Ghost Vertices
/*
* Create the Queue Data Structure for the Dominating Set
*
@ -276,6 +261,14 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
* of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size.
*/
U.~staticQueue();
new(&U) staticQueue(NLVer + numGhostVertices);
//TODO how can I decide a more meaningfull size?
MilanLongInt size = numGhostVertices;
new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
new(&privateQLocalVtx) staticQueue(size);
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size);
}

Loading…
Cancel
Save