Single parallel regions with multiple for cycles

Added OMP for testing
omp-walther
StefanoPetrilli 3 years ago
parent 8f6dc5fac2
commit 0a8debe43a

@ -4,7 +4,7 @@ INCDIR=../../../include
MODDIR=../../../modules MODDIR=../../../modules
HERE=../.. HERE=../..
FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) -fopenmp FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES)
CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/. CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/.
#CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm #CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm

@ -66,6 +66,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif #endif
#define TIME_TRACKER
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double tmr = MPI_Wtime(); double tmr = MPI_Wtime();
#endif #endif
@ -80,7 +81,7 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
tmr = MPI_Wtime() - tmr; tmr = MPI_Wtime() - tmr;
fprintf(stderr, "Elaboration time: %f\n", tmr); fprintf(stderr, "Elaboration time: %f for $ld\n", tmr, NLEdge);
#endif #endif
#endif #endif

@ -124,7 +124,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
//inputSubGraph.getStartEndIndices(StartIndex, EndIndex); //inputSubGraph.getStartEndIndices(StartIndex, EndIndex);
MilanLongInt StartIndex = verDistance[myRank]; //The starting vertex owned by the current rank MilanLongInt StartIndex = verDistance[myRank]; //The starting vertex owned by the current rank
//MilanLongInt EndIndex = verDistance[myRank+1]; //The ending vertex owned by the current rank //MilanLongInt EndIndex = verDistance[myRank+1]; //The ending vertex owned by the current rank
MilanLongInt EndIndex = verDistance[myRank+1]-1; //The ending vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank + 1] - 1; //The ending vertex owned by the current rank
MPI_Status computeStatus; MPI_Status computeStatus;
const int ComputeTag = 7; //Predefined tag const int ComputeTag = 7; //Predefined tag
@ -135,8 +135,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
int message_length; int message_length;
//MilanLongInt NLVer=0, NLEdge=0, StartIndex=0, EndIndex=0; //MilanLongInt NLVer=0, NLEdge=0, StartIndex=0, EndIndex=0;
MilanLongInt msgActual=0, msgInd=0; MilanLongInt msgActual = 0, msgInd = 0;
MilanReal heaviestEdgeWt=0.0f; //Assumes positive weight MilanReal heaviestEdgeWt = 0.0f; //Assumes positive weight
MilanReal startTime, finishTime; MilanReal startTime, finishTime;
//MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer //MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer
startTime = MPI_Wtime(); startTime = MPI_Wtime();
@ -150,18 +150,18 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
cout<<"\n("<<myRank<<")StartIndex: "<<StartIndex<<" EndIndex: "<<EndIndex; fflush(stdout); cout<<"\n("<<myRank<<")StartIndex: "<<StartIndex<<" EndIndex: "<<EndIndex; fflush(stdout);
#endif #endif
//Other Variables: //Other Variables:
MilanLongInt u=-1, v=-1, w=-1, i=0; MilanLongInt u = -1, v = -1, w = -1, i = 0;
MilanLongInt k=-1, adj1=-1, adj2=-1; MilanLongInt k = -1, adj1 = -1, adj2 = -1;
MilanLongInt k1=-1, adj11=-1, adj12=-1; MilanLongInt k1 = -1, adj11 = -1, adj12 = -1;
MilanLongInt myCard = 0; MilanLongInt myCard = 0;
MilanInt Sender=0; // This is the rank of the sending nodes, it has to be an integer! Fabio MilanInt Sender = 0; // This is the rank of the sending nodes, it has to be an integer! Fabio
//Build the Ghost Vertex Set: Vg //Build the Ghost Vertex Set: Vg
map<MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex
// index that starts with zero to |Vg| - 1 // index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready; map<MilanLongInt, MilanLongInt>::iterator storedAlready;
vector<MilanLongInt> Counter; //Store the edge count for each ghost vertex vector <MilanLongInt> Counter; //Store the edge count for each ghost vertex
MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
#endif #endif
@ -180,101 +180,123 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
* only when a ghost edge is found and ghost edges are a minority. * only when a ghost edge is found and ghost edges are a minority.
*/ */
//Define Adjacency Lists for Ghost Vertices:
//cout<<"Building Ghost data structures ... \n\n";
vector <MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
//Mate array for ghost vertices:
vector <MilanLongInt> GMate; //Proportional to the number of ghost vertices
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
#endif #endif
#pragma omp parallel for private(insertMe) firstprivate(StartIndex, EndIndex) default(shared) //#define OMP
for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice #ifdef OMP
insertMe = verLocInd[i]; #pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl; {
if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost #endif
//printf("Id %d\n", omp_get_thread_num());
#ifdef OMP
#pragma omp for
#endif
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
if ((insertMe < StartIndex) || (insertMe > EndIndex)) { //Find a ghost
#ifdef OMP
#pragma omp critical #pragma omp critical
{ {
numGhostEdges++; #endif
storedAlready = Ghost2LocalMap.find(insertMe); numGhostEdges++;
if (storedAlready != Ghost2LocalMap.end()) { //Has already been added storedAlready = Ghost2LocalMap.find(insertMe);
//cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl; if (storedAlready != Ghost2LocalMap.end()) { //Has already been added
Counter[storedAlready->second]++; //Increment the counter //cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl;
} else { //Insert an entry for the ghost: Counter[storedAlready->second]++; //Increment the counter
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl; } else { //Insert an entry for the ghost:
Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry //cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl;
Counter.push_back(1); //Initialize the counter Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry
numGhostVertices++; //Increment the number of ghost vertices Counter.push_back(1); //Initialize the counter
} //End of else() numGhostVertices++; //Increment the number of ghost vertices
} } //End of else()
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) #ifdef OMP
} //End of for(ghost vertices) }
#endif
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices)
#ifdef OMP
#pragma omp single
{
#endif
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges; cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges;
if (!Ghost2LocalMap.empty()) { if (!Ghost2LocalMap.empty()) {
cout<<"\n("<<myRank<<")Final Map : on process "; cout<<"\n("<<myRank<<")Final Map : on process ";
cout<<"\n("<<myRank<<")Key \t Value \t Counter \n"; fflush(stdout); cout<<"\n("<<myRank<<")Key \t Value \t Counter \n"; fflush(stdout);
storedAlready = Ghost2LocalMap.begin(); storedAlready = Ghost2LocalMap.begin();
do { do {
cout<<storedAlready->second<<" - "<<storedAlready->first<<" : "<<Counter[storedAlready->second]<<endl; cout<<storedAlready->second<<" - "<<storedAlready->first<<" : "<<Counter[storedAlready->second]<<endl;
fflush(stdout); fflush(stdout);
storedAlready++; storedAlready++;
} while ( storedAlready != Ghost2LocalMap.end() ); } while ( storedAlready != Ghost2LocalMap.end() );
} }
#endif #endif
//Build Adjacency Lists for Ghost Vertices:
//cout<<"Building Ghost data structures ... \n\n"; //Initialize adjacency Lists for Ghost Vertices:
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter; try {
//Mate array for ghost vertices: verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector
vector<MilanLongInt> GMate; //Proportional to the number of ghost vertices tempCounter.reserve(numGhostVertices); //Pointer Vector
try { verGhostInd.reserve(numGhostEdges); //Index Vector
verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector GMate.reserve(numGhostVertices); //Ghost Mate Vector
tempCounter.reserve(numGhostVertices); //Pointer Vector } catch (length_error) {
verGhostInd.reserve(numGhostEdges); //Index Vector cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
GMate.reserve(numGhostVertices); //Ghost Mate Vector cout << "Not enough memory to allocate the internal variables \n";
} catch ( length_error ) { exit(1);
cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; }
cout<<"Not enough memory to allocate the internal variables \n"; //Initialize the Vectors:
exit(1); verGhostPtr.resize(numGhostVertices + 1, 0); //Pointer Vector
} tempCounter.resize(numGhostVertices, 0); //Temporary Counter
//Initialize the Vectors: verGhostInd.resize(numGhostEdges, -1); //Index Vector
verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector GMate.resize(numGhostVertices, -1); //Temporary Counter
tempCounter.resize(numGhostVertices, 0); //Temporary Counter verGhostPtr[0] = 0; //The first value
verGhostInd.resize(numGhostEdges, -1); //Index Vector
GMate.resize(numGhostVertices, -1); //Temporary Counter
verGhostPtr[0] = 0; //The first value
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout); cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout);
#endif #endif
#define TIME_TRACKER
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double verGhostPtrInitialization = MPI_Wtime(); double verGhostPtrInitialization = MPI_Wtime();
#endif #endif
/* #ifdef OMP
* OMP verGhostPtrInitialization }
* #endif
*/ /*
* OMP verGhostPtrInitialization
*
*/
#pragma omp parallel for default(shared) #ifdef OMP
for ( i=0; i<numGhostVertices; i++ ) { //O(|Ghost Vertices|) #pragma omp for nowait
verGhostPtr[i+1] = verGhostPtr[i] + Counter[i]; #endif
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<verGhostPtr[i]<<"\t"; fflush(stdout); cout<<verGhostPtr[i]<<"\t"; fflush(stdout);
#endif #endif
} }
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization; verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization); fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif #endif
#undef TIME_TRACKER
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
if ( numGhostVertices > 0 ) if ( numGhostVertices > 0 )
cout<<verGhostPtr[numGhostVertices]<<"\n"; cout<<verGhostPtr[numGhostVertices]<<"\n";
@ -299,24 +321,33 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime(); double verGhostIndInitialization = MPI_Wtime();
#endif #endif
#pragma omp parallel for private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) #ifdef OMP
#pragma omp for
#endif
for ( v=0; v < NLVer; v++ ) { for ( v=0; v < NLVer; v++ ) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
adj2 = verLocPtr[v+1]; adj2 = verLocPtr[v+1];
for( k = adj1; k < adj2; k++ ) { for( k = adj1; k < adj2; k++ ) {
w = verLocInd[k]; //Get the adjacent vertex w = verLocInd[k]; //Get the adjacent vertex
if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost
#ifdef OMP
#pragma omp critical #pragma omp critical
{ {
#endif
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
#ifdef OMP
} }
#endif
} //End of if((w < StartIndex) || (w > EndIndex)) } //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } //End of for(k)
} //End of for (v) } //End of for (v)
tempCounter.clear(); //Do not need this any more tempCounter.clear(); //Do not need this any more
#ifdef OMP
} //end of parallel region
#endif
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);

@ -2,6 +2,6 @@ make all
cd samples/advanced/pdegen cd samples/advanced/pdegen
make amg_d_pde3d make amg_d_pde3d
cd runs cd runs
mpirun -np 8 amg_d_pde3d amg_pde3d.inp mpirun -np 2 amg_d_pde3d amg_pde3d.inp

Loading…
Cancel
Save