diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 21210c34..c5ae3f26 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -8,7 +8,7 @@ #include "dataStrStaticQueue.h" #include "omp.h" -#define NUM_THREAD 12 +#define NUM_THREAD 4 inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, @@ -50,7 +50,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, #pragma omp single { - // Initialize the locks #pragma omp taskloop num_tasks(NUM_THREAD) for (i = 0; i < NLVer; i++) @@ -71,32 +70,38 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, * only when a ghost edge is found and ghost edges are a minority, * circa 3.5% during the tests. */ -#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \ - : numGhostEdges) depend(out \ - : numGhostEdges, Counter, Ghost2LocalMap) - for (i = 0; i < NLEdge; i++) - { // O(m) - Each edge stored twice - insertMe = verLocInd[i]; - if ((insertMe < StartIndex) || (insertMe > EndIndex)) - { // Find a ghost - numGhostEdges++; + +#pragma omp task depend(out \ + : numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, numGhostVertices) + { + +#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \ + : numGhostEdges) + for (i = 0; i < NLEdge; i++) + { // O(m) - Each edge stored twice + insertMe = verLocInd[i]; + if ((insertMe < StartIndex) || (insertMe > EndIndex)) + { // Find a ghost + numGhostEdges++; #pragma omp critical - { - storedAlready = Ghost2LocalMap.find(insertMe); - if (storedAlready != Ghost2LocalMap.end()) - { // Has already been added - Counter[storedAlready->second]++; // Increment the counter + { + storedAlready = Ghost2LocalMap.find(insertMe); + if (storedAlready != Ghost2LocalMap.end()) + { // Has already been added + Counter[storedAlready->second]++; // Increment the counter + } + else + { // Insert an entry for the ghost: + Ghost2LocalMap[insertMe] = numGhostVertices; // Add a map entry + Counter.push_back(1); // Initialize the counter + numGhostVertices++; // Increment the number of ghost vertices + } // End of else() } - else - { // Insert an entry for the ghost: - Ghost2LocalMap[insertMe] = numGhostVertices; // Add a map entry - Counter.push_back(1); // Initialize the counter - numGhostVertices++; // Increment the number of ghost vertices - } // End of else() - } - } // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) - } // End of for(ghost vertices) + } // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) + } // End of for(ghost vertices) + } // end of task depend + // numGhostEdges = atomicNumGhostEdges; #ifdef TIME_TRACKER Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); @@ -121,7 +126,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, #pragma omp task depend(out \ : verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \ - : numGhostVertices) + : numGhostVertices, numGhostEdges) { // Initialize adjacency Lists for Ghost Vertices: @@ -151,7 +156,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, } // End of task -#pragma omp task depent(out \ +#pragma omp task depend(out \ : verGhostPtr) depend(in \ : Counter, numGhostVertices) { @@ -198,123 +203,126 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, * are a minority hence the critical region is executed * few times, circa 3.5% of the times in the tests. */ -#pragma omp taskloop num_tasks(NUM_THREAD) depend(in \ - : insertMe, Ghost2LocalMap, tempCounter) depend(out \ - : verGhostInd) - for (v = 0; v < NLVer; v++) +#pragma omp task depend(in \ + : insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \ + : verGhostInd) { - adj1 = verLocPtr[v]; // Vertex Pointer - adj2 = verLocPtr[v + 1]; - for (k = adj1; k < adj2; k++) +#pragma omp taskloop num_tasks(NUM_THREAD) + for (v = 0; v < NLVer; v++) { - w = verLocInd[k]; // Get the adjacent vertex - if ((w < StartIndex) || (w > EndIndex)) - { // Find a ghost + adj1 = verLocPtr[v]; // Vertex Pointer + adj2 = verLocPtr[v + 1]; + for (k = adj1; k < adj2; k++) + { + w = verLocInd[k]; // Get the adjacent vertex + if ((w < StartIndex) || (w > EndIndex)) + { // Find a ghost #pragma omp critical - { - insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert - tempCounter[Ghost2LocalMap[w]]++; // Increment the counter - } - verGhostInd[insertMe] = v + StartIndex; // Add the adjacency - } // End of if((w < StartIndex) || (w > EndIndex)) - } // End of for(k) - } // End of for (v) - - } // End of parallel region + { + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert + tempCounter[Ghost2LocalMap[w]]++; // Increment the counter + } + verGhostInd[insertMe] = v + StartIndex; // Add the adjacency + } // End of if((w < StartIndex) || (w > EndIndex)) + } // End of for(k) + } // End of for (v) + } // end of tasklopp #ifdef TIME_TRACKER - verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; - fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); + verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; + fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); #endif #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Ghost Vertex Index: "; - for (v = 0; v < numGhostEdges; v++) - cout << verGhostInd[v] << "\t"; - cout << endl; - fflush(stdout); + cout << "\n(" << myRank << ")Ghost Vertex Index: "; + for (v = 0; v < numGhostEdges; v++) + cout << verGhostInd[v] << "\t"; + cout << endl; + fflush(stdout); #endif #pragma omp task depend(in \ : numGhostEdges) depend(out \ : QLocalVtx, QGhostVtx, QMsgType, QOwner) - { - try { - QLocalVtx.reserve(numGhostEdges); // Local Vertex - QGhostVtx.reserve(numGhostEdges); // Ghost Vertex - QMsgType.reserve(numGhostEdges); // Message Type (Request/Failure) - QOwner.reserve(numGhostEdges); // Owner of the ghost: COmpute once and use later - } - catch (length_error) - { - cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; - cout << "Not enough memory to allocate the internal variables \n"; - exit(1); - } - } + try + { + QLocalVtx.reserve(numGhostEdges); // Local Vertex + QGhostVtx.reserve(numGhostEdges); // Ghost Vertex + QMsgType.reserve(numGhostEdges); // Message Type (Request/Failure) + QOwner.reserve(numGhostEdges); // Owner of the ghost: COmpute once and use later + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + } // end of task #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Allocating CandidateMate.. "; - fflush(stdout); + cout << "\n(" << myRank << ")Allocating CandidateMate.. "; + fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << "=========================************===============================" << endl; - fflush(stdout); - fflush(stdout); + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; - fflush(stdout); - fflush(stdout); + cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; + fflush(stdout); + fflush(stdout); #endif + #ifdef DEBUG_HANG_ - if (myRank == 0) - cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; - fflush(stdout); + if (myRank == 0) + cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; + fflush(stdout); #endif #pragma omp task depend(in \ : numGhostEdges, numGhostVertices) depend(out \ : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) - { - - - //The values calculated in this function are sent back to the calling function - *numGhostEdgesPtr = numGhostEdges; - *numGhostVerticesPtr = numGhostVertices; - - // Allocate Data Structures: - /* - * candidateMate was a vector and has been replaced with an array - * there is no point in using the vector (or maybe there is (???)) - * so I replaced it with an array wich is slightly faster - */ - candidateMate = new MilanLongInt[NLVer + numGhostVertices]; - - *S = numGhostVertices; // Initialize S with number of Ghost Vertices + { - /* - * Create the Queue Data Structure for the Dominating Set - * - * I had to declare the staticuQueue U before the parallel region - * to have it in the correct scope. Since we can't change the dimension - * of a staticQueue I had to destroy the previous object and instantiate - * a new one of the correct size. - */ - new (&U) staticQueue(NLVer + numGhostVertices); - - // TODO how can I decide a more meaningfull size? - MilanLongInt size = numGhostVertices; - - // Initialize the privte data structure - new (&privateU) staticQueue(NLVer + numGhostVertices); // TODO how can I put a meaningfull size? - new (&privateQLocalVtx) staticQueue(size); - new (&privateQGhostVtx) staticQueue(size); - new (&privateQMsgType) staticQueue(size); - new (&privateQOwner) staticQueue(size); - } - } // End of single + // The values calculated in this function are sent back to the calling function + *numGhostEdgesPtr = numGhostEdges; + *numGhostVerticesPtr = numGhostVertices; + + // Allocate Data Structures: + /* + * candidateMate was a vector and has been replaced with an array + * there is no point in using the vector (or maybe there is (???)) + * so I replaced it with an array wich is slightly faster + */ + candidateMate = new MilanLongInt[NLVer + numGhostVertices]; + + *S = numGhostVertices; // Initialize S with number of Ghost Vertices + + /* + * Create the Queue Data Structure for the Dominating Set + * + * I had to declare the staticuQueue U before the parallel region + * to have it in the correct scope. Since we can't change the dimension + * of a staticQueue I had to destroy the previous object and instantiate + * a new one of the correct size. + */ + new (&U) staticQueue(NLVer + numGhostVertices); + + // TODO how can I decide a more meaningfull size? + MilanLongInt size = numGhostVertices; + + // Initialize the privte data structure + new (&privateU) staticQueue(NLVer + numGhostVertices); // TODO how can I put a meaningfull size? + new (&privateQLocalVtx) staticQueue(size); + new (&privateQGhostVtx) staticQueue(size); + new (&privateQMsgType) staticQueue(size); + new (&privateQOwner) staticQueue(size); + } // end of task + + } // End of single region + } // End of parallel region } diff --git a/exec.sh b/exec.sh index 3bb7bd90..50edf4ad 100755 --- a/exec.sh +++ b/exec.sh @@ -1,3 +1,4 @@ +rm amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o make all cd samples/advanced/pdegen make amg_d_pde3d