Initialize parallelized with task

omp-walther
StefanoPetrilli 3 years ago
parent b5e52d31f5
commit 7741abd45d

@ -326,9 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
//TODO why does it fail if I use a private data structure???
/*
//TODO why does it fail if I use a private data structure???
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
@ -343,7 +342,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
QMsgType.push_back(REQUEST);
QOwner.push_back(ghostOwner);
} // end of critical region
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
@ -430,6 +429,20 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} //End of for ( v=0; v < NLVer; v++ )
#pragma omp critical(privateMsg)
{
while (!privateQLocalVtx.empty()) {
QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_front());
}
}
#pragma omp critical(U)
{
while (!privateU.empty())
@ -699,16 +712,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
}
} //End of while ( /*!Q.empty()*/ !U.empty() )
#pragma omp critical(privateMsg)
#pragma omp critical(privateMsg)
{
while (!privateQLocalVtx.empty()) {
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
QLocalVtx.push_back(privateQLocalVtx.pop_front());
QGhostVtx.push_back(privateQGhostVtx.pop_front());
QMsgType.push_back(privateQMsgType.pop_front());
QOwner.push_back(privateQOwner.pop_front());
}
}

@ -8,6 +8,8 @@
#include "dataStrStaticQueue.h"
#include "omp.h"
#define NUM_THREAD 4
inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr,
@ -44,17 +46,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
// index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready;
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
{
#pragma omp single
{
//Initialize the locks
//TODO this can be executed as task in parallel with other unparallelizable tasks
//TODO destroy the locks
#pragma omp for schedule(static)
#pragma omp taskloop num_tasks(NUM_THREAD)
for(i = 0; i < NLVer; i++)
omp_init_lock(&MateLock[i]);
#ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime();
#endif
@ -70,7 +74,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests.
*/
#pragma omp for reduction(+ : numGhostEdges)
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ : numGhostEdges) depend ( out : numGhostEdges, Counter, Ghost2LocalMap )
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
@ -92,9 +96,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices)
#pragma omp single
{
//numGhostEdges = atomicNumGhostEdges;
#ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
@ -114,6 +115,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
}
#endif
#pragma omp task depend ( out : verGhostPtr, tempCounter, verGhostInd, GMate) depend ( in : numGhostVertices)
{
//Initialize adjacency Lists for Ghost Vertices:
try {
verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector
@ -139,18 +143,17 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
double verGhostPtrInitialization = MPI_Wtime();
#endif
} // End of task
/*
* Not parallelizable
*/
#pragma omp task depent ( out : verGhostPtr ) depend ( in : Counter, numGhostVertices)
{
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_
cout<<verGhostPtr[i]<<"\t"; fflush(stdout);
#endif
}
} // End of single region
}//End of task
#ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
@ -180,7 +183,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests.
*/
#pragma omp for nowait schedule(static)
#pragma omp taskloop num_tasks(NUM_THREAD) depend ( in : insertMe, Ghost2LocalMap, tempCounter) depend ( out : verGhostInd)
for (v = 0; v < NLVer; v++) {
adj1 = verLocPtr[v]; //Vertex Pointer
adj2 = verLocPtr[v + 1];
@ -211,6 +214,8 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout<<endl; fflush(stdout);
#endif
#pragma omp task depend ( in : numGhostEdges) depend ( out : QLocalVtx, QGhostVtx, QMsgType, QOwner )
{
try {
QLocalVtx.reserve(numGhostEdges); //Local Vertex
QGhostVtx.reserve(numGhostEdges); //Ghost Vertex
@ -221,6 +226,10 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
}
#pragma omp task depend( in : numGhostEdges, numGhostVertices ) depend ( out : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{
#ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
@ -231,7 +240,7 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
//Allocate Data Structures:
/*
* candidateMate was a vector and has been replaced with a raw array
* candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
@ -271,4 +280,6 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size);
new(&privateQOwner) staticQueue(size);
}
} // End of single
}

Loading…
Cancel
Save