From ea040ae5ee05e517e3e9779a2e602e43e5c9d555 Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Sun, 26 Jun 2022 04:48:49 -0500 Subject: [PATCH] Reformat initialize, refactoring of initialize completed --- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 1 + amgprec/impl/aggregator/initialize.cpp | 435 ++++++++++-------- 2 files changed, 236 insertions(+), 200 deletions(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 9d5b6417..d6c58852 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -214,6 +214,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( bool isEmpty; //Declare the locks + // TODO destroy the locks omp_lock_t MateLock[NLVer]; initialize(NLVer, NLEdge, StartIndex, diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 908bd1d3..21210c34 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -8,35 +8,34 @@ #include "dataStrStaticQueue.h" #include "omp.h" -#define NUM_THREAD 4 +#define NUM_THREAD 12 inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt StartIndex, MilanLongInt EndIndex, - MilanLongInt* numGhostEdgesPtr, - MilanLongInt* numGhostVerticesPtr, - MilanLongInt* S, - MilanLongInt* verLocInd, - MilanLongInt* verLocPtr, - omp_lock_t* MateLock, - map &Ghost2LocalMap, - vector & Counter, - vector & verGhostPtr, - vector & verGhostInd, - vector & tempCounter, - vector & GMate, - vector& Message, - vector& QLocalVtx, - vector& QGhostVtx, - vector& QMsgType, - vector& QOwner, - MilanLongInt* &candidateMate, - staticQueue& U, - staticQueue& privateU, - staticQueue& privateQLocalVtx, - staticQueue& privateQGhostVtx, - staticQueue& privateQMsgType, - staticQueue& privateQOwner - ) + MilanLongInt StartIndex, MilanLongInt EndIndex, + MilanLongInt *numGhostEdgesPtr, + MilanLongInt *numGhostVerticesPtr, + MilanLongInt *S, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + omp_lock_t *MateLock, + map &Ghost2LocalMap, + vector &Counter, + vector &verGhostPtr, + vector &verGhostInd, + vector &tempCounter, + vector &GMate, + vector &Message, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + MilanLongInt *&candidateMate, + staticQueue &U, + staticQueue &privateU, + staticQueue &privateQLocalVtx, + staticQueue &privateQGhostVtx, + staticQueue &privateQMsgType, + staticQueue &privateQOwner) { MilanLongInt insertMe = 0, numGhostEdges = 0, numGhostVertices = 0; @@ -48,53 +47,55 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, #pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD) { - - #pragma omp single + +#pragma omp single { - //Initialize the locks - //TODO this can be executed as task in parallel with other unparallelizable tasks - //TODO destroy the locks + // Initialize the locks #pragma omp taskloop num_tasks(NUM_THREAD) - for(i = 0; i < NLVer; i++) - omp_init_lock(&MateLock[i]); - + for (i = 0; i < NLVer; i++) + omp_init_lock(&MateLock[i]); + #ifdef TIME_TRACKER - double Ghost2LocalInitialization = MPI_Wtime(); + double Ghost2LocalInitialization = MPI_Wtime(); #endif - /* - * OMP Ghost2LocalInitialization - * The cycle analyzes all the edges and when finds a ghost edge - * puts it in the Ghost2LocalMap. - * A critical region is needed when inserting data in the map. - * - * Despite the critical region it is still productive to - * parallelize this for because the critical region is exeuted - * only when a ghost edge is found and ghost edges are a minority, - * circa 3.5% during the tests. - */ -#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ : numGhostEdges) depend ( out : numGhostEdges, Counter, Ghost2LocalMap ) - for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice - insertMe = verLocInd[i]; - //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost - numGhostEdges++; + /* + * OMP Ghost2LocalInitialization + * This loop analyzes all the edges and when finds a ghost edge + * puts it in the Ghost2LocalMap. + * A critical region is needed when inserting data in the map. + * + * Despite the critical region it is still productive to + * parallelize this cycle because the critical region is exeuted + * only when a ghost edge is found and ghost edges are a minority, + * circa 3.5% during the tests. + */ +#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \ + : numGhostEdges) depend(out \ + : numGhostEdges, Counter, Ghost2LocalMap) + for (i = 0; i < NLEdge; i++) + { // O(m) - Each edge stored twice + insertMe = verLocInd[i]; + if ((insertMe < StartIndex) || (insertMe > EndIndex)) + { // Find a ghost + numGhostEdges++; #pragma omp critical - { - storedAlready = Ghost2LocalMap.find(insertMe); - if (storedAlready != Ghost2LocalMap.end()) { //Has already been added - //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter - } else { //Insert an entry for the ghost: - //cout<<"Process "<second<<" - "<first<<" : "<second]<second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl; fflush(stdout); storedAlready++; - } while ( storedAlready != Ghost2LocalMap.end() ); + } while (storedAlready != Ghost2LocalMap.end()); } #endif - #pragma omp task depend ( out : verGhostPtr, tempCounter, verGhostInd, GMate) depend ( in : numGhostVertices) - { +#pragma omp task depend(out \ + : verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \ + : numGhostVertices) + { - //Initialize adjacency Lists for Ghost Vertices: - try { - verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector - tempCounter.reserve(numGhostVertices); //Pointer Vector - verGhostInd.reserve(numGhostEdges); //Index Vector - GMate.reserve(numGhostVertices); //Ghost Mate Vector - } catch (length_error) { - cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; - cout << "Not enough memory to allocate the internal variables \n"; - exit(1); - } - //Initialize the Vectors: - verGhostPtr.resize(numGhostVertices + 1, 0); //Pointer Vector - tempCounter.resize(numGhostVertices, 0); //Temporary Counter - verGhostInd.resize(numGhostEdges, -1); //Index Vector - GMate.resize(numGhostVertices, -1); //Temporary Counter - verGhostPtr[0] = 0; //The first value + // Initialize adjacency Lists for Ghost Vertices: + try + { + verGhostPtr.reserve(numGhostVertices + 1); // Pointer Vector + tempCounter.reserve(numGhostVertices); // Pointer Vector + verGhostInd.reserve(numGhostEdges); // Index Vector + GMate.reserve(numGhostVertices); // Ghost Mate Vector + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + // Initialize the Vectors: + verGhostPtr.resize(numGhostVertices + 1, 0); // Pointer Vector + tempCounter.resize(numGhostVertices, 0); // Temporary Counter + verGhostInd.resize(numGhostEdges, -1); // Index Vector + GMate.resize(numGhostVertices, -1); // Temporary Counter + verGhostPtr[0] = 0; // The first value #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< 0 ) - cout< 0) + cout << verGhostPtr[numGhostVertices] << "\n"; + fflush(stdout); #endif #ifdef TIME_TRACKER - double verGhostIndInitialization = MPI_Wtime(); + double verGhostIndInitialization = MPI_Wtime(); #endif - /* - * OMP verGhostIndInitialization - * - * In this cycle the verGhostInd is initialized - * with the datas related to ghost edges. - * The check to see if a node is a ghost node is - * executed in paralle and when a ghost node - * is found a critical region is started. - * - * Despite the critical region it's still useful to - * parallelize the for cause the ghost nodes - * are a minority hence the critical region is executed - * few times, circa 3.5% of the times in the tests. - */ -#pragma omp taskloop num_tasks(NUM_THREAD) depend ( in : insertMe, Ghost2LocalMap, tempCounter) depend ( out : verGhostInd) - for (v = 0; v < NLVer; v++) { - adj1 = verLocPtr[v]; //Vertex Pointer - adj2 = verLocPtr[v + 1]; - for (k = adj1; k < adj2; k++) { - w = verLocInd[k]; //Get the adjacent vertex - if ((w < StartIndex) || (w > EndIndex)) { //Find a ghost + /* + * OMP verGhostIndInitialization + * + * In this cycle the verGhostInd is initialized + * with the datas related to ghost edges. + * The check to see if a node is a ghost node is + * executed in paralle and when a ghost node + * is found a critical region is started. + * + * Despite the critical region it's still useful to + * parallelize the for cause the ghost nodes + * are a minority hence the critical region is executed + * few times, circa 3.5% of the times in the tests. + */ +#pragma omp taskloop num_tasks(NUM_THREAD) depend(in \ + : insertMe, Ghost2LocalMap, tempCounter) depend(out \ + : verGhostInd) + for (v = 0; v < NLVer; v++) + { + adj1 = verLocPtr[v]; // Vertex Pointer + adj2 = verLocPtr[v + 1]; + for (k = adj1; k < adj2; k++) + { + w = verLocInd[k]; // Get the adjacent vertex + if ((w < StartIndex) || (w > EndIndex)) + { // Find a ghost #pragma omp critical - { - insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert - tempCounter[Ghost2LocalMap[w]]++; //Increment the counter - } - verGhostInd[insertMe] = v + StartIndex; //Add the adjacency - } //End of if((w < StartIndex) || (w > EndIndex)) - } //End of for(k) - } //End of for (v) + { + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert + tempCounter[Ghost2LocalMap[w]]++; // Increment the counter + } + verGhostInd[insertMe] = v + StartIndex; // Add the adjacency + } // End of if((w < StartIndex) || (w > EndIndex)) + } // End of for(k) + } // End of for (v) - } // End of parallel region + } // End of parallel region #ifdef TIME_TRACKER - verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; - fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); + verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; + fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); #endif #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("<