Improved performance of processMatchedVerticesAndSendMessages.cpp

omp-walther
StefanoPetrilli 2 years ago
parent 5efee20041
commit 1ab166b38b

@ -355,7 +355,6 @@ extern "C"
staticQueue &privateQGhostVtx, staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType, staticQueue &privateQMsgType,
staticQueue &privateQOwner, staticQueue &privateQOwner,
bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
vector<MilanLongInt> &Message); vector<MilanLongInt> &Message);

@ -399,7 +399,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
privateQGhostVtx, privateQGhostVtx,
privateQMsgType, privateQMsgType,
privateQOwner, privateQOwner,
true,
comm, comm,
&msgActual, &msgActual,
Message); Message);

@ -31,7 +31,6 @@ void processMatchedVerticesAndSendMessages(
staticQueue &privateQGhostVtx, staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType, staticQueue &privateQMsgType,
staticQueue &privateQOwner, staticQueue &privateQOwner,
bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
vector<MilanLongInt> &Message) vector<MilanLongInt> &Message)
@ -41,7 +40,7 @@ void processMatchedVerticesAndSendMessages(
int option; int option;
MilanLongInt mateVal; MilanLongInt mateVal;
vector<MilanLongInt> messagesToSend; vector<MilanLongInt> privatemessagesToSend, messagesToSend;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl; cout << "\n(" << myRank << "=========================************===============================" << endl;
@ -52,16 +51,15 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ #pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option, privatemessagesToSend) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
default(shared) \ num_threads(NUM_THREAD) \
num_threads(NUM_THREAD) \ reduction(+ \
reduction(+ \ : msgInd[:1], PCounter \
: msgInd[:1], PCounter \ [:numProcs], myCard \
[:numProcs], myCard \ [:1], NumMessagesBundled \
[:1], NumMessagesBundled \ [:1], msgActual \
[:1], msgActual \ [:1])
[:1])
{ {
while (!U.empty()) while (!U.empty())
@ -199,33 +197,22 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);
#pragma omp critical(sendMessage)
{
messagesToSend.push_back(v);
messagesToSend.push_back(w);
messagesToSend.push_back(REQUEST);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
}
else
{
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
}
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);
privatemessagesToSend.push_back(v);
privatemessagesToSend.push_back(w);
privatemessagesToSend.push_back(REQUEST);
privatemessagesToSend.push_back(ghostOwner);
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
@ -257,32 +244,22 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);
#pragma omp critical(sendMessage)
{
messagesToSend.push_back(v);
messagesToSend.push_back(w);
messagesToSend.push_back(FAILURE);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
}
else
{
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
}
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);
privatemessagesToSend.push_back(v);
privatemessagesToSend.push_back(w);
privatemessagesToSend.push_back(FAILURE);
privatemessagesToSend.push_back(ghostOwner);
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
@ -305,32 +282,22 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet:
Message[0] = u; // LOCAL
Message[1] = v; // GHOST
Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);
#pragma omp critical(sendMessage)
{
messagesToSend.push_back(u);
messagesToSend.push_back(v);
messagesToSend.push_back(SUCCESS);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
}
else
{
(*NumMessagesBundled)++;
PCounter[ghostOwner]++;
}
// Build the Message Packet:
Message[0] = u; // LOCAL
Message[1] = v; // GHOST
Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous)
// printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout);)
privatemessagesToSend.push_back(u);
privatemessagesToSend.push_back(v);
privatemessagesToSend.push_back(SUCCESS);
privatemessagesToSend.push_back(ghostOwner);
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
@ -340,11 +307,17 @@ num_threads(NUM_THREAD)
break; break;
} // End of switch } // End of switch
} // End of inner for
} // End of inner for
} }
} // End of outer for } // End of outer for
#pragma omp critical(sendMessageTransfer)
{
messagesToSend.insert(messagesToSend.end(), privatemessagesToSend.begin(), privatemessagesToSend.end());
privatemessagesToSend.clear();
}
queuesTransfer(U, privateU, QLocalVtx, queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx, QGhostVtx,
QMsgType, QOwner, privateQLocalVtx, QMsgType, QOwner, privateQLocalVtx,

Loading…
Cancel
Save