Initialize parallelized with task

omp-walther
StefanoPetrilli 3 years ago
parent b5e52d31f5
commit 7741abd45d

@ -326,9 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
assert(ghostOwner != myRank); assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
//TODO why does it fail if I use a private data structure???
/* /*
//TODO why does it fail if I use a private data structure???
privateQLocalVtx.push_back(v + StartIndex); privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST); privateQMsgType.push_back(REQUEST);
@ -343,7 +342,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
QMsgType.push_back(REQUEST); QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner); QOwner.push_back(ghostOwner);
} // end of critical region } // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
@ -430,6 +429,20 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} //End of for ( v=0; v < NLVer; v++ ) } //End of for ( v=0; v < NLVer; v++ )
#pragma omp critical(privateMsg)
{
while (!privateQLocalVtx.empty()) {
QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_front());
}
}
#pragma omp critical(U) #pragma omp critical(U)
{ {
while (!privateU.empty()) while (!privateU.empty())
@ -699,16 +712,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
} }
} //End of while ( /*!Q.empty()*/ !U.empty() ) } //End of while ( /*!Q.empty()*/ !U.empty() )
#pragma omp critical(privateMsg) #pragma omp critical(privateMsg)
{ {
while (!privateQLocalVtx.empty()) { while (!privateQLocalVtx.empty()) {
QLocalVtx.push_back(privateQLocalVtx.pop_back()); QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_back()); QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_back()); QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_back()); QOwner.push_back(privateQOwner.pop_front());
} }
} }

@ -8,6 +8,8 @@
#include "dataStrStaticQueue.h" #include "dataStrStaticQueue.h"
#include "omp.h" #include "omp.h"
#define NUM_THREAD 4
inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr, MilanLongInt* numGhostEdgesPtr,
@ -44,17 +46,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
// index that starts with zero to |Vg| - 1 // index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready; map<MilanLongInt, MilanLongInt>::iterator storedAlready;
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) #pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
{ {
#pragma omp single
{
//Initialize the locks //Initialize the locks
//TODO this can be executed as task in parallel with other unparallelizable tasks //TODO this can be executed as task in parallel with other unparallelizable tasks
//TODO destroy the locks //TODO destroy the locks
#pragma omp for schedule(static) #pragma omp taskloop num_tasks(NUM_THREAD)
for(i = 0; i < NLVer; i++) for(i = 0; i < NLVer; i++)
omp_init_lock(&MateLock[i]); omp_init_lock(&MateLock[i]);
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
#endif #endif
@ -70,7 +74,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* only when a ghost edge is found and ghost edges are a minority, * only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests. * circa 3.5% during the tests.
*/ */
#pragma omp for reduction(+ : numGhostEdges) #pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ : numGhostEdges) depend ( out : numGhostEdges, Counter, Ghost2LocalMap )
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i]; insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl; //cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
@ -92,9 +96,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) } //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices) } //End of for(ghost vertices)
#pragma omp single
{
//numGhostEdges = atomicNumGhostEdges;
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
@ -114,6 +115,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} }
#endif #endif
#pragma omp task depend ( out : verGhostPtr, tempCounter, verGhostInd, GMate) depend ( in : numGhostVertices)
{
//Initialize adjacency Lists for Ghost Vertices: //Initialize adjacency Lists for Ghost Vertices:
try { try {
verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector
@ -139,18 +143,17 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
double verGhostPtrInitialization = MPI_Wtime(); double verGhostPtrInitialization = MPI_Wtime();
#endif #endif
} // End of task
/* #pragma omp task depent ( out : verGhostPtr ) depend ( in : Counter, numGhostVertices)
* Not parallelizable {
*/
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<verGhostPtr[i]<<"\t"; fflush(stdout); cout<<verGhostPtr[i]<<"\t"; fflush(stdout);
#endif #endif
} }
} // End of single region }//End of task
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization; verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization); fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
@ -180,7 +183,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* are a minority hence the critical region is executed * are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests. * few times, circa 3.5% of the times in the tests.
*/ */
#pragma omp for nowait schedule(static) #pragma omp taskloop num_tasks(NUM_THREAD) depend ( in : insertMe, Ghost2LocalMap, tempCounter) depend ( out : verGhostInd)
for (v = 0; v < NLVer; v++) { for (v = 0; v < NLVer; v++) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
adj2 = verLocPtr[v + 1]; adj2 = verLocPtr[v + 1];
@ -211,6 +214,8 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout<<endl; fflush(stdout); cout<<endl; fflush(stdout);
#endif #endif
#pragma omp task depend ( in : numGhostEdges) depend ( out : QLocalVtx, QGhostVtx, QMsgType, QOwner )
{
try { try {
QLocalVtx.reserve(numGhostEdges); //Local Vertex QLocalVtx.reserve(numGhostEdges); //Local Vertex
QGhostVtx.reserve(numGhostEdges); //Ghost Vertex QGhostVtx.reserve(numGhostEdges); //Ghost Vertex
@ -221,6 +226,10 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout << "Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
}
#pragma omp task depend( in : numGhostEdges, numGhostVertices ) depend ( out : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout); cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
@ -231,7 +240,7 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
//Allocate Data Structures: //Allocate Data Structures:
/* /*
* candidateMate was a vector and has been replaced with a raw array * candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???)) * there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster * so I replaced it with an array wich is slightly faster
*/ */
@ -271,4 +280,6 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
new(&privateQGhostVtx) staticQueue(size); new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size); new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size); new(&privateQOwner) staticQueue(size);
}
} // End of single
} }

Loading…
Cancel
Save