You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
amg4psblas/amgprec/impl/aggregator/initialize.cpp

306 lines
14 KiB
C++

#include "MatchBoxPC.h"
#ifdef OMP
void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt *numGhostEdges,
MilanLongInt *numGhostVertices,
MilanLongInt *S,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &Counter,
vector<MilanLongInt> &verGhostPtr,
vector<MilanLongInt> &verGhostInd,
vector<MilanLongInt> &tempCounter,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Message,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
MilanLongInt *&candidateMate,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt insertMe = 0;
MilanLongInt adj1, adj2;
int i, v, k, w;
// index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready;
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
{
#pragma omp single
{
#ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime();
#endif
/*
* OMP Ghost2LocalInitialization
* This loop analyzes all the edges and when finds a ghost edge
* puts it in the Ghost2LocalMap.
* A critical region is needed when inserting data in the map.
*
* Despite the critical region it is still productive to
* parallelize this cycle because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests.
*/
#pragma omp task depend(out \
: *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices)
{
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
: numGhostEdges[:1])
for (i = 0; i < NLEdge; i++)
{ // O(m) - Each edge stored twice
insertMe = verLocInd[i];
if ((insertMe < StartIndex) || (insertMe > EndIndex))
{ // Find a ghost
(*numGhostEdges)++;
#pragma omp critical
{
storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end())
{ // Has already been added
Counter[storedAlready->second]++; // Increment the counter
}
else
{ // Insert an entry for the ghost:
Ghost2LocalMap[insertMe] = *numGhostVertices; // Add a map entry
Counter.push_back(1); // Initialize the counter
(*numGhostVertices)++; // Increment the number of ghost vertices
} // End of else()
}
} // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} // End of for(ghost vertices)
} // end of task depend
// *numGhostEdges = atomicNumGhostEdges;
#ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")NGhosts:" << *numGhostVertices << " GhostEdges: " << *numGhostEdges;
if (!Ghost2LocalMap.empty())
{
cout << "\n(" << myRank << ")Final Map : on process ";
cout << "\n(" << myRank << ")Key \t Value \t Counter \n";
fflush(stdout);
storedAlready = Ghost2LocalMap.begin();
do
{
cout << storedAlready->second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl;
fflush(stdout);
storedAlready++;
} while (storedAlready != Ghost2LocalMap.end());
}
#endif
#pragma omp task depend(out \
: verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \
: *numGhostVertices, *numGhostEdges)
{
// Initialize adjacency Lists for Ghost Vertices:
try
{
verGhostPtr.reserve(*numGhostVertices + 1); // Pointer Vector
tempCounter.reserve(*numGhostVertices); // Pointer Vector
verGhostInd.reserve(*numGhostEdges); // Index Vector
GMate.reserve(*numGhostVertices); // Ghost Mate Vector
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
// Initialize the Vectors:
verGhostPtr.resize(*numGhostVertices + 1, 0); // Pointer Vector
tempCounter.resize(*numGhostVertices, 0); // Temporary Counter
verGhostInd.resize(*numGhostEdges, -1); // Index Vector
GMate.resize(*numGhostVertices, -1); // Temporary Counter
verGhostPtr[0] = 0; // The first value
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Ghost Vertex Pointer: ";
fflush(stdout);
#endif
} // End of task
#pragma omp task depend(out \
: verGhostPtr) depend(in \
: Counter, *numGhostVertices)
{
#ifdef TIME_TRACKER
double verGhostPtrInitialization = MPI_Wtime();
#endif
for (i = 0; i < *numGhostVertices; i++)
{ // O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_
cout << verGhostPtr[i] << "\t";
fflush(stdout);
#endif
}
#ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif
} // End of task
#ifdef PRINT_DEBUG_INFO_
if (*numGhostVertices > 0)
cout << verGhostPtr[*numGhostVertices] << "\n";
fflush(stdout);
#endif
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
/*
* OMP verGhostIndInitialization
*
* In this cycle the verGhostInd is initialized
* with the datas related to ghost edges.
* The check to see if a node is a ghost node is
* executed in paralle and when a ghost node
* is found a critical region is started.
*
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests.
*/
#pragma omp task depend(in \
: insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \
: verGhostInd)
{
#pragma omp taskloop num_tasks(NUM_THREAD)
for (v = 0; v < NLVer; v++)
{
adj1 = verLocPtr[v]; // Vertex Pointer
adj2 = verLocPtr[v + 1];
for (k = adj1; k < adj2; k++)
{
w = verLocInd[k]; // Get the adjacent vertex
if ((w < StartIndex) || (w > EndIndex))
{ // Find a ghost
#pragma omp critical
{
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert
tempCounter[Ghost2LocalMap[w]]++; // Increment the counter
}
verGhostInd[insertMe] = v + StartIndex; // Add the adjacency
} // End of if((w < StartIndex) || (w > EndIndex))
} // End of for(k)
} // End of for (v)
} // end of tasklopp
#ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Ghost Vertex Index: ";
for (v = 0; v < *numGhostEdges; v++)
cout << verGhostInd[v] << "\t";
cout << endl;
fflush(stdout);
#endif
#pragma omp task depend(in \
: *numGhostEdges) depend(out \
: QLocalVtx, QGhostVtx, QMsgType, QOwner)
{
try
{
QLocalVtx.reserve(*numGhostEdges); // Local Vertex
QGhostVtx.reserve(*numGhostEdges); // Ghost Vertex
QMsgType.reserve(*numGhostEdges); // Message Type (Request/Failure)
QOwner.reserve(*numGhostEdges); // Owner of the ghost: COmpute once and use later
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
} // end of task
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Allocating CandidateMate.. ";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout);
#endif
#pragma omp task depend(in \
: *numGhostVertices) depend(out \
: candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{
// Allocate Data Structures:
/*
* candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
candidateMate = new MilanLongInt[NLVer + (*numGhostVertices)];
*S = (*numGhostVertices); // Initialize S with number of Ghost Vertices
/*
* Create the Queue Data Structure for the Dominating Set
*
* I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't change the dimension
* of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size.
*/
//new (&U) staticQueue(NLVer + (*numGhostVertices));
U.reserve(NLVer + (*numGhostVertices));
// Initialize the private vectors
privateQLocalVtx.reserve(*numGhostVertices);
privateQGhostVtx.reserve(*numGhostVertices);
privateQMsgType.reserve(*numGhostVertices);
privateQOwner.reserve(*numGhostVertices);
privateU.reserve(*numGhostVertices);
} // end of task
} // End of single region
} // End of parallel region
}
#endif