diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index b4ead45d..9d5b6417 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -326,9 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( assert(ghostOwner != myRank); PCounter[ghostOwner]++; - //TODO why does it fail if I use a private data structure??? - /* + //TODO why does it fail if I use a private data structure??? privateQLocalVtx.push_back(v + StartIndex); privateQGhostVtx.push_back(w); privateQMsgType.push_back(REQUEST); @@ -343,7 +342,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( QMsgType.push_back(REQUEST); QOwner.push_back(ghostOwner); } // end of critical region - + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { @@ -430,6 +429,20 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) } //End of for ( v=0; v < NLVer; v++ ) + + #pragma omp critical(privateMsg) + { + while (!privateQLocalVtx.empty()) { + + QLocalVtx.push_back(privateQLocalVtx.pop_front()); + QGhostVtx.push_back(privateQGhostVtx.pop_front()); + QMsgType.push_back(privateQMsgType.pop_front()); + QOwner.push_back(privateQOwner.pop_front()); + + } + + } + #pragma omp critical(U) { while (!privateU.empty()) @@ -699,16 +712,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } } //End of while ( /*!Q.empty()*/ !U.empty() ) -#pragma omp critical(privateMsg) + #pragma omp critical(privateMsg) { while (!privateQLocalVtx.empty()) { - QLocalVtx.push_back(privateQLocalVtx.pop_back()); - QGhostVtx.push_back(privateQGhostVtx.pop_back()); - QMsgType.push_back(privateQMsgType.pop_back()); - QOwner.push_back(privateQOwner.pop_back()); + QLocalVtx.push_back(privateQLocalVtx.pop_front()); + QGhostVtx.push_back(privateQGhostVtx.pop_front()); + QMsgType.push_back(privateQMsgType.pop_front()); + QOwner.push_back(privateQOwner.pop_front()); } + } diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 117057b5..908bd1d3 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -8,6 +8,8 @@ #include "dataStrStaticQueue.h" #include "omp.h" +#define NUM_THREAD 4 + inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt* numGhostEdgesPtr, @@ -44,17 +46,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, // index that starts with zero to |Vg| - 1 map::iterator storedAlready; -#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) +#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD) { + #pragma omp single + { + //Initialize the locks //TODO this can be executed as task in parallel with other unparallelizable tasks //TODO destroy the locks -#pragma omp for schedule(static) +#pragma omp taskloop num_tasks(NUM_THREAD) for(i = 0; i < NLVer; i++) omp_init_lock(&MateLock[i]); - - + #ifdef TIME_TRACKER double Ghost2LocalInitialization = MPI_Wtime(); #endif @@ -70,7 +74,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, * only when a ghost edge is found and ghost edges are a minority, * circa 3.5% during the tests. */ -#pragma omp for reduction(+ : numGhostEdges) +#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ : numGhostEdges) depend ( out : numGhostEdges, Counter, Ghost2LocalMap ) for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice insertMe = verLocInd[i]; //cout<<"InsertMe on Process "< EndIndex) ) } //End of for(ghost vertices) - #pragma omp single - { - //numGhostEdges = atomicNumGhostEdges; #ifdef TIME_TRACKER Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); @@ -114,6 +115,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, } #endif + #pragma omp task depend ( out : verGhostPtr, tempCounter, verGhostInd, GMate) depend ( in : numGhostVertices) + { + //Initialize adjacency Lists for Ghost Vertices: try { verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector @@ -139,18 +143,17 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, double verGhostPtrInitialization = MPI_Wtime(); #endif + } // End of task - /* - * Not parallelizable - */ +#pragma omp task depent ( out : verGhostPtr ) depend ( in : Counter, numGhostVertices) + { for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; #ifdef PRINT_DEBUG_INFO_ cout<