From 0a8debe43af0fda4208fb6e6fd9764cbe5ca4159 Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Sun, 1 May 2022 15:26:47 -0500 Subject: [PATCH] Single parallel regions with multiple for cycles Added OMP for testing --- amgprec/impl/aggregator/Makefile | 2 +- amgprec/impl/aggregator/MatchBoxPC.cpp | 3 +- ...DomEdgesLinearSearchMesgBndlSmallMateC.cpp | 191 ++++++++++-------- exec.sh | 2 +- 4 files changed, 115 insertions(+), 83 deletions(-) diff --git a/amgprec/impl/aggregator/Makefile b/amgprec/impl/aggregator/Makefile index 0444e60d..d857a3b0 100644 --- a/amgprec/impl/aggregator/Makefile +++ b/amgprec/impl/aggregator/Makefile @@ -4,7 +4,7 @@ INCDIR=../../../include MODDIR=../../../modules HERE=../.. -FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) -fopenmp +FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/. #CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index fc30e8fd..270c6d04 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -66,6 +66,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); #endif +#define TIME_TRACKER #ifdef TIME_TRACKER double tmr = MPI_Wtime(); #endif @@ -80,7 +81,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #ifdef TIME_TRACKER tmr = MPI_Wtime() - tmr; - fprintf(stderr, "Elaboration time: %f\n", tmr); + fprintf(stderr, "Elaboration time: %f for $ld\n", tmr, NLEdge); #endif #endif diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp index 6f5dd9be..818c9f07 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp @@ -124,7 +124,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( //inputSubGraph.getStartEndIndices(StartIndex, EndIndex); MilanLongInt StartIndex = verDistance[myRank]; //The starting vertex owned by the current rank //MilanLongInt EndIndex = verDistance[myRank+1]; //The ending vertex owned by the current rank - MilanLongInt EndIndex = verDistance[myRank+1]-1; //The ending vertex owned by the current rank + MilanLongInt EndIndex = verDistance[myRank + 1] - 1; //The ending vertex owned by the current rank MPI_Status computeStatus; const int ComputeTag = 7; //Predefined tag @@ -135,8 +135,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( int message_length; //MilanLongInt NLVer=0, NLEdge=0, StartIndex=0, EndIndex=0; - MilanLongInt msgActual=0, msgInd=0; - MilanReal heaviestEdgeWt=0.0f; //Assumes positive weight + MilanLongInt msgActual = 0, msgInd = 0; + MilanReal heaviestEdgeWt = 0.0f; //Assumes positive weight MilanReal startTime, finishTime; //MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer startTime = MPI_Wtime(); @@ -150,18 +150,18 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex + map Ghost2LocalMap; //Map each ghost vertex to a local vertex // index that starts with zero to |Vg| - 1 map::iterator storedAlready; - vector Counter; //Store the edge count for each ghost vertex - MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices + vector Counter; //Store the edge count for each ghost vertex + MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< verGhostPtr, verGhostInd, tempCounter; + //Mate array for ghost vertices: + vector GMate; //Proportional to the number of ghost vertices + #ifdef TIME_TRACKER double Ghost2LocalInitialization = MPI_Wtime(); #endif -#pragma omp parallel for private(insertMe) firstprivate(StartIndex, EndIndex) default(shared) - for ( i=0; i EndIndex) ) { //Find a ghost +//#define OMP +#ifdef OMP +#pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) + { +#endif + //printf("Id %d\n", omp_get_thread_num()); + +#ifdef OMP +#pragma omp for +#endif + for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice + insertMe = verLocInd[i]; + //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost +#ifdef OMP #pragma omp critical - { - numGhostEdges++; - storedAlready = Ghost2LocalMap.find(insertMe); - if (storedAlready != Ghost2LocalMap.end()) { //Has already been added - //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter - } else { //Insert an entry for the ghost: - //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]<second<<" - "<first<<" : "<second]< verGhostPtr, verGhostInd, tempCounter; - //Mate array for ghost vertices: - vector GMate; //Proportional to the number of ghost vertices - try { - verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector - tempCounter.reserve(numGhostVertices); //Pointer Vector - verGhostInd.reserve(numGhostEdges); //Index Vector - GMate.reserve(numGhostVertices); //Ghost Mate Vector - } catch ( length_error ) { - cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; - cout<<"Not enough memory to allocate the internal variables \n"; - exit(1); - } - //Initialize the Vectors: - verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector - tempCounter.resize(numGhostVertices, 0); //Temporary Counter - verGhostInd.resize(numGhostEdges, -1); //Index Vector - GMate.resize(numGhostVertices, -1); //Temporary Counter - verGhostPtr[0] = 0; //The first value + + //Initialize adjacency Lists for Ghost Vertices: + try { + verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector + tempCounter.reserve(numGhostVertices); //Pointer Vector + verGhostInd.reserve(numGhostEdges); //Index Vector + GMate.reserve(numGhostVertices); //Ghost Mate Vector + } catch (length_error) { + cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + verGhostPtr.resize(numGhostVertices + 1, 0); //Pointer Vector + tempCounter.resize(numGhostVertices, 0); //Temporary Counter + verGhostInd.resize(numGhostEdges, -1); //Index Vector + GMate.resize(numGhostVertices, -1); //Temporary Counter + verGhostPtr[0] = 0; //The first value #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< 0 ) cout< EndIndex) ) { //Find a ghost +#ifdef OMP #pragma omp critical { +#endif insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert verGhostInd[insertMe] = v + StartIndex; //Add the adjacency tempCounter[Ghost2LocalMap[w]]++; //Increment the counter +#ifdef OMP } +#endif } //End of if((w < StartIndex) || (w > EndIndex)) } //End of for(k) } //End of for (v) tempCounter.clear(); //Do not need this any more +#ifdef OMP + } //end of parallel region +#endif #ifdef TIME_TRACKER verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); diff --git a/exec.sh b/exec.sh index 02f4012a..d6e77a21 100755 --- a/exec.sh +++ b/exec.sh @@ -2,6 +2,6 @@ make all cd samples/advanced/pdegen make amg_d_pde3d cd runs -mpirun -np 8 amg_d_pde3d amg_pde3d.inp +mpirun -np 2 amg_d_pde3d amg_pde3d.inp