Reformat initialize, refactoring of initialize completed

omp-walther
StefanoPetrilli 3 years ago
parent 7741abd45d
commit ea040ae5ee

@ -214,6 +214,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
bool isEmpty; bool isEmpty;
//Declare the locks //Declare the locks
// TODO destroy the locks
omp_lock_t MateLock[NLVer]; omp_lock_t MateLock[NLVer];
initialize(NLVer, NLEdge, StartIndex, initialize(NLVer, NLEdge, StartIndex,

@ -8,35 +8,34 @@
#include "dataStrStaticQueue.h" #include "dataStrStaticQueue.h"
#include "omp.h" #include "omp.h"
#define NUM_THREAD 4 #define NUM_THREAD 12
inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt* numGhostEdgesPtr, MilanLongInt *numGhostEdgesPtr,
MilanLongInt* numGhostVerticesPtr, MilanLongInt *numGhostVerticesPtr,
MilanLongInt* S, MilanLongInt *S,
MilanLongInt* verLocInd, MilanLongInt *verLocInd,
MilanLongInt* verLocPtr, MilanLongInt *verLocPtr,
omp_lock_t* MateLock, omp_lock_t *MateLock,
map <MilanLongInt, MilanLongInt> &Ghost2LocalMap, map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector <MilanLongInt>& Counter, vector<MilanLongInt> &Counter,
vector <MilanLongInt>& verGhostPtr, vector<MilanLongInt> &verGhostPtr,
vector <MilanLongInt>& verGhostInd, vector<MilanLongInt> &verGhostInd,
vector <MilanLongInt>& tempCounter, vector<MilanLongInt> &tempCounter,
vector <MilanLongInt>& GMate, vector<MilanLongInt> &GMate,
vector<MilanLongInt>& Message, vector<MilanLongInt> &Message,
vector<MilanLongInt>& QLocalVtx, vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt>& QGhostVtx, vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt>& QMsgType, vector<MilanLongInt> &QMsgType,
vector<MilanInt>& QOwner, vector<MilanInt> &QOwner,
MilanLongInt* &candidateMate, MilanLongInt *&candidateMate,
staticQueue& U, staticQueue &U,
staticQueue& privateU, staticQueue &privateU,
staticQueue& privateQLocalVtx, staticQueue &privateQLocalVtx,
staticQueue& privateQGhostVtx, staticQueue &privateQGhostVtx,
staticQueue& privateQMsgType, staticQueue &privateQMsgType,
staticQueue& privateQOwner staticQueue &privateQOwner)
)
{ {
MilanLongInt insertMe = 0, numGhostEdges = 0, numGhostVertices = 0; MilanLongInt insertMe = 0, numGhostEdges = 0, numGhostVertices = 0;
@ -48,53 +47,55 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD) #pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
{ {
#pragma omp single #pragma omp single
{ {
//Initialize the locks // Initialize the locks
//TODO this can be executed as task in parallel with other unparallelizable tasks
//TODO destroy the locks
#pragma omp taskloop num_tasks(NUM_THREAD) #pragma omp taskloop num_tasks(NUM_THREAD)
for(i = 0; i < NLVer; i++) for (i = 0; i < NLVer; i++)
omp_init_lock(&MateLock[i]); omp_init_lock(&MateLock[i]);
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
#endif #endif
/* /*
* OMP Ghost2LocalInitialization * OMP Ghost2LocalInitialization
* The cycle analyzes all the edges and when finds a ghost edge * This loop analyzes all the edges and when finds a ghost edge
* puts it in the Ghost2LocalMap. * puts it in the Ghost2LocalMap.
* A critical region is needed when inserting data in the map. * A critical region is needed when inserting data in the map.
* *
* Despite the critical region it is still productive to * Despite the critical region it is still productive to
* parallelize this for because the critical region is exeuted * parallelize this cycle because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority, * only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests. * circa 3.5% during the tests.
*/ */
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ : numGhostEdges) depend ( out : numGhostEdges, Counter, Ghost2LocalMap ) #pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice : numGhostEdges) depend(out \
insertMe = verLocInd[i]; : numGhostEdges, Counter, Ghost2LocalMap)
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl; for (i = 0; i < NLEdge; i++)
if ((insertMe < StartIndex) || (insertMe > EndIndex)) { //Find a ghost { // O(m) - Each edge stored twice
numGhostEdges++; insertMe = verLocInd[i];
if ((insertMe < StartIndex) || (insertMe > EndIndex))
{ // Find a ghost
numGhostEdges++;
#pragma omp critical #pragma omp critical
{ {
storedAlready = Ghost2LocalMap.find(insertMe); storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end()) { //Has already been added if (storedAlready != Ghost2LocalMap.end())
//cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl; { // Has already been added
Counter[storedAlready->second]++; //Increment the counter Counter[storedAlready->second]++; // Increment the counter
} else { //Insert an entry for the ghost: }
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl; else
Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry { // Insert an entry for the ghost:
Counter.push_back(1); //Initialize the counter Ghost2LocalMap[insertMe] = numGhostVertices; // Add a map entry
numGhostVertices++; //Increment the number of ghost vertices Counter.push_back(1); // Initialize the counter
} //End of else() numGhostVertices++; // Increment the number of ghost vertices
} } // End of else()
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) }
} //End of for(ghost vertices) } // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} // End of for(ghost vertices)
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
@ -102,184 +103,218 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges; cout << "\n(" << myRank << ")NGhosts:" << numGhostVertices << " GhostEdges: " << numGhostEdges;
if (!Ghost2LocalMap.empty()) { if (!Ghost2LocalMap.empty())
cout<<"\n("<<myRank<<")Final Map : on process "; {
cout<<"\n("<<myRank<<")Key \t Value \t Counter \n"; fflush(stdout); cout << "\n(" << myRank << ")Final Map : on process ";
cout << "\n(" << myRank << ")Key \t Value \t Counter \n";
fflush(stdout);
storedAlready = Ghost2LocalMap.begin(); storedAlready = Ghost2LocalMap.begin();
do { do
cout<<storedAlready->second<<" - "<<storedAlready->first<<" : "<<Counter[storedAlready->second]<<endl; {
cout << storedAlready->second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl;
fflush(stdout); fflush(stdout);
storedAlready++; storedAlready++;
} while ( storedAlready != Ghost2LocalMap.end() ); } while (storedAlready != Ghost2LocalMap.end());
} }
#endif #endif
#pragma omp task depend ( out : verGhostPtr, tempCounter, verGhostInd, GMate) depend ( in : numGhostVertices) #pragma omp task depend(out \
{ : verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \
: numGhostVertices)
{
//Initialize adjacency Lists for Ghost Vertices: // Initialize adjacency Lists for Ghost Vertices:
try { try
verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector {
tempCounter.reserve(numGhostVertices); //Pointer Vector verGhostPtr.reserve(numGhostVertices + 1); // Pointer Vector
verGhostInd.reserve(numGhostEdges); //Index Vector tempCounter.reserve(numGhostVertices); // Pointer Vector
GMate.reserve(numGhostVertices); //Ghost Mate Vector verGhostInd.reserve(numGhostEdges); // Index Vector
} catch (length_error) { GMate.reserve(numGhostVertices); // Ghost Mate Vector
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; }
cout << "Not enough memory to allocate the internal variables \n"; catch (length_error)
exit(1); {
} cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
//Initialize the Vectors: cout << "Not enough memory to allocate the internal variables \n";
verGhostPtr.resize(numGhostVertices + 1, 0); //Pointer Vector exit(1);
tempCounter.resize(numGhostVertices, 0); //Temporary Counter }
verGhostInd.resize(numGhostEdges, -1); //Index Vector // Initialize the Vectors:
GMate.resize(numGhostVertices, -1); //Temporary Counter verGhostPtr.resize(numGhostVertices + 1, 0); // Pointer Vector
verGhostPtr[0] = 0; //The first value tempCounter.resize(numGhostVertices, 0); // Temporary Counter
verGhostInd.resize(numGhostEdges, -1); // Index Vector
GMate.resize(numGhostVertices, -1); // Temporary Counter
verGhostPtr[0] = 0; // The first value
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout); cout << "\n(" << myRank << ")Ghost Vertex Pointer: ";
fflush(stdout);
#endif #endif
#ifdef TIME_TRACKER } // End of task
double verGhostPtrInitialization = MPI_Wtime();
#endif
} // End of task #pragma omp task depent(out \
: verGhostPtr) depend(in \
: Counter, numGhostVertices)
{
#pragma omp task depent ( out : verGhostPtr ) depend ( in : Counter, numGhostVertices) #ifdef TIME_TRACKER
{ double verGhostPtrInitialization = MPI_Wtime();
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) #endif
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; for (i = 0; i < numGhostVertices; i++)
{ // O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<verGhostPtr[i]<<"\t"; fflush(stdout); cout << verGhostPtr[i] << "\t";
fflush(stdout);
#endif #endif
} }
}//End of task
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization; verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization); fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif #endif
} // End of task
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
if ( numGhostVertices > 0 ) if (numGhostVertices > 0)
cout<<verGhostPtr[numGhostVertices]<<"\n"; cout << verGhostPtr[numGhostVertices] << "\n";
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime(); double verGhostIndInitialization = MPI_Wtime();
#endif #endif
/* /*
* OMP verGhostIndInitialization * OMP verGhostIndInitialization
* *
* In this cycle the verGhostInd is initialized * In this cycle the verGhostInd is initialized
* with the datas related to ghost edges. * with the datas related to ghost edges.
* The check to see if a node is a ghost node is * The check to see if a node is a ghost node is
* executed in paralle and when a ghost node * executed in paralle and when a ghost node
* is found a critical region is started. * is found a critical region is started.
* *
* Despite the critical region it's still useful to * Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes * parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed * are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests. * few times, circa 3.5% of the times in the tests.
*/ */
#pragma omp taskloop num_tasks(NUM_THREAD) depend ( in : insertMe, Ghost2LocalMap, tempCounter) depend ( out : verGhostInd) #pragma omp taskloop num_tasks(NUM_THREAD) depend(in \
for (v = 0; v < NLVer; v++) { : insertMe, Ghost2LocalMap, tempCounter) depend(out \
adj1 = verLocPtr[v]; //Vertex Pointer : verGhostInd)
adj2 = verLocPtr[v + 1]; for (v = 0; v < NLVer; v++)
for (k = adj1; k < adj2; k++) { {
w = verLocInd[k]; //Get the adjacent vertex adj1 = verLocPtr[v]; // Vertex Pointer
if ((w < StartIndex) || (w > EndIndex)) { //Find a ghost adj2 = verLocPtr[v + 1];
for (k = adj1; k < adj2; k++)
{
w = verLocInd[k]; // Get the adjacent vertex
if ((w < StartIndex) || (w > EndIndex))
{ // Find a ghost
#pragma omp critical #pragma omp critical
{ {
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter tempCounter[Ghost2LocalMap[w]]++; // Increment the counter
} }
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency verGhostInd[insertMe] = v + StartIndex; // Add the adjacency
} //End of if((w < StartIndex) || (w > EndIndex)) } // End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } // End of for(k)
} //End of for (v) } // End of for (v)
} // End of parallel region } // End of parallel region
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Index: "; cout << "\n(" << myRank << ")Ghost Vertex Index: ";
for ( v=0; v < numGhostEdges; v++ ) for (v = 0; v < numGhostEdges; v++)
cout<<verGhostInd[v]<<"\t"; cout << verGhostInd[v] << "\t";
cout<<endl; fflush(stdout); cout << endl;
fflush(stdout);
#endif #endif
#pragma omp task depend ( in : numGhostEdges) depend ( out : QLocalVtx, QGhostVtx, QMsgType, QOwner ) #pragma omp task depend(in \
{ : numGhostEdges) depend(out \
try { : QLocalVtx, QGhostVtx, QMsgType, QOwner)
QLocalVtx.reserve(numGhostEdges); //Local Vertex {
QGhostVtx.reserve(numGhostEdges); //Ghost Vertex try
QMsgType.reserve(numGhostEdges); //Message Type (Request/Failure) {
QOwner.reserve(numGhostEdges); //Owner of the ghost: COmpute once and use later QLocalVtx.reserve(numGhostEdges); // Local Vertex
} catch (length_error) { QGhostVtx.reserve(numGhostEdges); // Ghost Vertex
QMsgType.reserve(numGhostEdges); // Message Type (Request/Failure)
QOwner.reserve(numGhostEdges); // Owner of the ghost: COmpute once and use later
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
} }
#pragma omp task depend( in : numGhostEdges, numGhostVertices ) depend ( out : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout); cout << "\n(" << myRank << ")Allocating CandidateMate.. ";
fflush(stdout);
#endif #endif
*numGhostEdgesPtr = numGhostEdges; #ifdef PRINT_DEBUG_INFO_
*numGhostVerticesPtr = numGhostVertices; cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
//Allocate Data Structures: fflush(stdout);
/*
* candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
candidateMate = new MilanLongInt[NLVer + numGhostVertices];
#ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<"=========================************==============================="<<endl; fflush(stdout);
fflush(stdout);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<") Setup Time :"<< *ph0_time <<endl; fflush(stdout); cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout); fflush(stdout);
fflush(stdout);
#endif #endif
#ifdef DEBUG_HANG_ #ifdef DEBUG_HANG_
if (myRank == 0) cout<<"\n("<<myRank<<") Setup Time :"<< *ph0_time <<endl; fflush(stdout); if (myRank == 0)
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout);
#endif #endif
*S = numGhostVertices; //Initialize S with number of Ghost Vertices #pragma omp task depend(in \
: numGhostEdges, numGhostVertices) depend(out \
/* : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
* Create the Queue Data Structure for the Dominating Set {
*
* I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't change the dimension //The values calculated in this function are sent back to the calling function
* of a staticQueue I had to destroy the previous object and instantiate *numGhostEdgesPtr = numGhostEdges;
* a new one of the correct size. *numGhostVerticesPtr = numGhostVertices;
*/
new(&U) staticQueue(NLVer + numGhostVertices); // Allocate Data Structures:
/*
//TODO how can I decide a more meaningfull size? * candidateMate was a vector and has been replaced with an array
MilanLongInt size = numGhostVertices; * there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size? */
new(&privateQLocalVtx) staticQueue(size); candidateMate = new MilanLongInt[NLVer + numGhostVertices];
new(&privateQGhostVtx) staticQueue(size);
new(&privateQMsgType) staticQueue(size); *S = numGhostVertices; // Initialize S with number of Ghost Vertices
new(&privateQOwner) staticQueue(size);
} /*
* Create the Queue Data Structure for the Dominating Set
*
* I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't change the dimension
* of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size.
*/
new (&U) staticQueue(NLVer + numGhostVertices);
// TODO how can I decide a more meaningfull size?
MilanLongInt size = numGhostVertices;
// Initialize the privte data structure
new (&privateU) staticQueue(NLVer + numGhostVertices); // TODO how can I put a meaningfull size?
new (&privateQLocalVtx) staticQueue(size);
new (&privateQGhostVtx) staticQueue(size);
new (&privateQMsgType) staticQueue(size);
new (&privateQOwner) staticQueue(size);
}
} // End of single } // End of single
} }

Loading…
Cancel
Save