initialize fix

omp-walther
StefanoPetrilli 2 years ago
parent ea040ae5ee
commit 1aca17cd44

@ -8,7 +8,7 @@
#include "dataStrStaticQueue.h" #include "dataStrStaticQueue.h"
#include "omp.h" #include "omp.h"
#define NUM_THREAD 12 #define NUM_THREAD 4
inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
@ -50,7 +50,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp single #pragma omp single
{ {
// Initialize the locks // Initialize the locks
#pragma omp taskloop num_tasks(NUM_THREAD) #pragma omp taskloop num_tasks(NUM_THREAD)
for (i = 0; i < NLVer; i++) for (i = 0; i < NLVer; i++)
@ -71,32 +70,38 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* only when a ghost edge is found and ghost edges are a minority, * only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests. * circa 3.5% during the tests.
*/ */
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
: numGhostEdges) depend(out \ #pragma omp task depend(out \
: numGhostEdges, Counter, Ghost2LocalMap) : numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, numGhostVertices)
for (i = 0; i < NLEdge; i++) {
{ // O(m) - Each edge stored twice
insertMe = verLocInd[i]; #pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
if ((insertMe < StartIndex) || (insertMe > EndIndex)) : numGhostEdges)
{ // Find a ghost for (i = 0; i < NLEdge; i++)
numGhostEdges++; { // O(m) - Each edge stored twice
insertMe = verLocInd[i];
if ((insertMe < StartIndex) || (insertMe > EndIndex))
{ // Find a ghost
numGhostEdges++;
#pragma omp critical #pragma omp critical
{ {
storedAlready = Ghost2LocalMap.find(insertMe); storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end()) if (storedAlready != Ghost2LocalMap.end())
{ // Has already been added { // Has already been added
Counter[storedAlready->second]++; // Increment the counter Counter[storedAlready->second]++; // Increment the counter
}
else
{ // Insert an entry for the ghost:
Ghost2LocalMap[insertMe] = numGhostVertices; // Add a map entry
Counter.push_back(1); // Initialize the counter
numGhostVertices++; // Increment the number of ghost vertices
} // End of else()
} }
else } // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
{ // Insert an entry for the ghost: } // End of for(ghost vertices)
Ghost2LocalMap[insertMe] = numGhostVertices; // Add a map entry } // end of task depend
Counter.push_back(1); // Initialize the counter
numGhostVertices++; // Increment the number of ghost vertices
} // End of else()
}
} // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} // End of for(ghost vertices)
// numGhostEdges = atomicNumGhostEdges;
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
@ -121,7 +126,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp task depend(out \ #pragma omp task depend(out \
: verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \ : verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \
: numGhostVertices) : numGhostVertices, numGhostEdges)
{ {
// Initialize adjacency Lists for Ghost Vertices: // Initialize adjacency Lists for Ghost Vertices:
@ -151,7 +156,7 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} // End of task } // End of task
#pragma omp task depent(out \ #pragma omp task depend(out \
: verGhostPtr) depend(in \ : verGhostPtr) depend(in \
: Counter, numGhostVertices) : Counter, numGhostVertices)
{ {
@ -198,123 +203,126 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
* are a minority hence the critical region is executed * are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests. * few times, circa 3.5% of the times in the tests.
*/ */
#pragma omp taskloop num_tasks(NUM_THREAD) depend(in \ #pragma omp task depend(in \
: insertMe, Ghost2LocalMap, tempCounter) depend(out \ : insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \
: verGhostInd) : verGhostInd)
for (v = 0; v < NLVer; v++)
{ {
adj1 = verLocPtr[v]; // Vertex Pointer #pragma omp taskloop num_tasks(NUM_THREAD)
adj2 = verLocPtr[v + 1]; for (v = 0; v < NLVer; v++)
for (k = adj1; k < adj2; k++)
{ {
w = verLocInd[k]; // Get the adjacent vertex adj1 = verLocPtr[v]; // Vertex Pointer
if ((w < StartIndex) || (w > EndIndex)) adj2 = verLocPtr[v + 1];
{ // Find a ghost for (k = adj1; k < adj2; k++)
{
w = verLocInd[k]; // Get the adjacent vertex
if ((w < StartIndex) || (w > EndIndex))
{ // Find a ghost
#pragma omp critical #pragma omp critical
{ {
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert
tempCounter[Ghost2LocalMap[w]]++; // Increment the counter tempCounter[Ghost2LocalMap[w]]++; // Increment the counter
} }
verGhostInd[insertMe] = v + StartIndex; // Add the adjacency verGhostInd[insertMe] = v + StartIndex; // Add the adjacency
} // End of if((w < StartIndex) || (w > EndIndex)) } // End of if((w < StartIndex) || (w > EndIndex))
} // End of for(k) } // End of for(k)
} // End of for (v) } // End of for (v)
} // end of tasklopp
} // End of parallel region
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Ghost Vertex Index: "; cout << "\n(" << myRank << ")Ghost Vertex Index: ";
for (v = 0; v < numGhostEdges; v++) for (v = 0; v < numGhostEdges; v++)
cout << verGhostInd[v] << "\t"; cout << verGhostInd[v] << "\t";
cout << endl; cout << endl;
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp task depend(in \ #pragma omp task depend(in \
: numGhostEdges) depend(out \ : numGhostEdges) depend(out \
: QLocalVtx, QGhostVtx, QMsgType, QOwner) : QLocalVtx, QGhostVtx, QMsgType, QOwner)
{
try
{ {
QLocalVtx.reserve(numGhostEdges); // Local Vertex try
QGhostVtx.reserve(numGhostEdges); // Ghost Vertex {
QMsgType.reserve(numGhostEdges); // Message Type (Request/Failure) QLocalVtx.reserve(numGhostEdges); // Local Vertex
QOwner.reserve(numGhostEdges); // Owner of the ghost: COmpute once and use later QGhostVtx.reserve(numGhostEdges); // Ghost Vertex
} QMsgType.reserve(numGhostEdges); // Message Type (Request/Failure)
catch (length_error) QOwner.reserve(numGhostEdges); // Owner of the ghost: COmpute once and use later
{ }
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; catch (length_error)
cout << "Not enough memory to allocate the internal variables \n"; {
exit(1); cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
} cout << "Not enough memory to allocate the internal variables \n";
} exit(1);
}
} // end of task
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Allocating CandidateMate.. "; cout << "\n(" << myRank << ")Allocating CandidateMate.. ";
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl; cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout); fflush(stdout);
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout); fflush(stdout);
fflush(stdout); fflush(stdout);
#endif #endif
#ifdef DEBUG_HANG_ #ifdef DEBUG_HANG_
if (myRank == 0) if (myRank == 0)
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp task depend(in \ #pragma omp task depend(in \
: numGhostEdges, numGhostVertices) depend(out \ : numGhostEdges, numGhostVertices) depend(out \
: candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{ {
//The values calculated in this function are sent back to the calling function
*numGhostEdgesPtr = numGhostEdges;
*numGhostVerticesPtr = numGhostVertices;
// Allocate Data Structures:
/*
* candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
candidateMate = new MilanLongInt[NLVer + numGhostVertices];
*S = numGhostVertices; // Initialize S with number of Ghost Vertices
/* // The values calculated in this function are sent back to the calling function
* Create the Queue Data Structure for the Dominating Set *numGhostEdgesPtr = numGhostEdges;
* *numGhostVerticesPtr = numGhostVertices;
* I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't change the dimension // Allocate Data Structures:
* of a staticQueue I had to destroy the previous object and instantiate /*
* a new one of the correct size. * candidateMate was a vector and has been replaced with an array
*/ * there is no point in using the vector (or maybe there is (???))
new (&U) staticQueue(NLVer + numGhostVertices); * so I replaced it with an array wich is slightly faster
*/
// TODO how can I decide a more meaningfull size? candidateMate = new MilanLongInt[NLVer + numGhostVertices];
MilanLongInt size = numGhostVertices;
*S = numGhostVertices; // Initialize S with number of Ghost Vertices
// Initialize the privte data structure
new (&privateU) staticQueue(NLVer + numGhostVertices); // TODO how can I put a meaningfull size? /*
new (&privateQLocalVtx) staticQueue(size); * Create the Queue Data Structure for the Dominating Set
new (&privateQGhostVtx) staticQueue(size); *
new (&privateQMsgType) staticQueue(size); * I had to declare the staticuQueue U before the parallel region
new (&privateQOwner) staticQueue(size); * to have it in the correct scope. Since we can't change the dimension
} * of a staticQueue I had to destroy the previous object and instantiate
} // End of single * a new one of the correct size.
*/
new (&U) staticQueue(NLVer + numGhostVertices);
// TODO how can I decide a more meaningfull size?
MilanLongInt size = numGhostVertices;
// Initialize the privte data structure
new (&privateU) staticQueue(NLVer + numGhostVertices); // TODO how can I put a meaningfull size?
new (&privateQLocalVtx) staticQueue(size);
new (&privateQGhostVtx) staticQueue(size);
new (&privateQMsgType) staticQueue(size);
new (&privateQOwner) staticQueue(size);
} // end of task
} // End of single region
} // End of parallel region
} }

@ -1,3 +1,4 @@
rm amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
make all make all
cd samples/advanced/pdegen cd samples/advanced/pdegen
make amg_d_pde3d make amg_d_pde3d

Loading…
Cancel
Save