Replaced some staticQueues with vectors for performance reasons

omp-walther
StefanoPetrilli 2 years ago
parent 066c1a5e62
commit 500403dbda

@ -189,10 +189,10 @@ extern "C"
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner);
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
bool isAlreadyMatched(MilanLongInt node,
MilanLongInt StartIndex,
@ -233,10 +233,10 @@ extern "C"
MilanLongInt *&candidateMate,
staticQueue &U,
staticQueue &privateU,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner);
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void clean(MilanLongInt NLVer,
MilanInt myRank,
@ -284,10 +284,10 @@ extern "C"
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner);
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void PROCESS_CROSS_EDGE(MilanLongInt *edge,
MilanLongInt *SPtr);
@ -319,10 +319,10 @@ extern "C"
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner);
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void processMatchedVerticesAndSendMessages(
MilanLongInt NLVer,
@ -351,6 +351,10 @@ extern "C"
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message);

@ -182,7 +182,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
vector<MilanLongInt> GMate; // Proportional to the number of ghost vertices
MilanLongInt S;
MilanLongInt privateMyCard = 0;
staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner;
staticQueue U, privateU;
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
vector<MPI_Request> SRequest; // Requests that are used for each send message
vector<MPI_Status> SStatus; // Status of sent messages, used in MPI_Wait
@ -190,6 +190,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanInt BufferSize;
MilanLongInt *Buffer;
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType;
vector<MilanInt> privateQOwner;
initialize(NLVer, NLEdge, StartIndex,
EndIndex, &numGhostEdges,
&numGhostVertices, &S,
@ -370,34 +373,38 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessages(NLVer,
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
comm,
&msgActual,
Message);
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner,
comm,
&msgActual,
Message);
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////

@ -21,10 +21,10 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *&candidateMate,
staticQueue &U,
staticQueue &privateU,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner)
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt insertMe = 0;
@ -295,10 +295,11 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
// Initialize the privte data structure
new (&privateU) staticQueue(NLVer + (*numGhostVertices)); // TODO how can I put a meaningfull size?
new (&privateQLocalVtx) staticQueue(size);
new (&privateQGhostVtx) staticQueue(size);
new (&privateQMsgType) staticQueue(size);
new (&privateQOwner) staticQueue(size);
privateQLocalVtx.reserve(*numGhostVertices);
privateQGhostVtx.reserve(*numGhostVertices);
privateQMsgType.reserve(*numGhostVertices);
privateQOwner.reserve(*numGhostVertices);
} // end of task
} // End of single region

@ -25,10 +25,10 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner)
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
@ -39,8 +39,11 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
num_threads(NUM_THREAD)
{
#pragma omp for reduction(+ \
: PCounter[:numProcs], myCard[:1], msgInd[:1], NumMessagesBundled[:1]) schedule(static)
#pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \
[:1], msgInd \
[:1], NumMessagesBundled \
[:1]) schedule(static)
for (v = 0; v < NLVer; v++)
{
option = -1;

@ -27,10 +27,10 @@ void processMatchedVertices(
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner)
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;

@ -27,6 +27,10 @@ void processMatchedVerticesAndSendMessages(
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message)
@ -37,13 +41,6 @@ void processMatchedVerticesAndSendMessages(
int option;
MilanLongInt mateVal;
// TODO reserve!!!
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner;
privateQLocalVtx.reserve(100000);
privateQGhostVtx.reserve(100000);
privateQMsgType.reserve(100000);
privateQOwner.reserve(100000);
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
@ -53,7 +50,7 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
@ -286,25 +283,12 @@ void processMatchedVerticesAndSendMessages(
}
} // End of outer for
#pragma omp critical(U)
{
while (!privateU.empty())
U.push_back(privateU.pop_back());
}
#pragma omp critical(sendMessageTransfer)
{
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
privateQLocalVtx.clear();
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
}
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of while ( !U.empty() )
@ -317,7 +301,7 @@ void processMatchedVerticesAndSendMessages(
#endif
} // End of parallel region
//Send the messages
// Send the messages
for (int i = initialSize; i < QOwner.size(); i++)
{

@ -1,32 +1,34 @@
#include "MatchBoxPC.h"
void queuesTransfer(staticQueue &U,
staticQueue &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
staticQueue &privateQLocalVtx,
staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType,
staticQueue &privateQOwner)
staticQueue &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
#pragma omp critical(U)
{
while (!privateU.empty())
U.push_back(privateU.pop_back());
}
#pragma omp critical(privateMsg)
#pragma omp critical(sendMessageTransfer)
{
while (!privateQLocalVtx.empty())
{
QLocalVtx.push_back(privateQLocalVtx.pop_back());
QGhostVtx.push_back(privateQGhostVtx.pop_back());
QMsgType.push_back(privateQMsgType.pop_back());
QOwner.push_back(privateQOwner.pop_back());
}
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
}
privateQLocalVtx.clear();
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
}
Loading…
Cancel
Save