Improved performance of processMatchedVerticesAndSendMessages.cpp

omp-walther
StefanoPetrilli 3 years ago
parent 5efee20041
commit 1ab166b38b

@ -355,7 +355,6 @@ extern "C"
staticQueue &privateQGhostVtx, staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType, staticQueue &privateQMsgType,
staticQueue &privateQOwner, staticQueue &privateQOwner,
bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
vector<MilanLongInt> &Message); vector<MilanLongInt> &Message);

@ -399,7 +399,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
privateQGhostVtx, privateQGhostVtx,
privateQMsgType, privateQMsgType,
privateQOwner, privateQOwner,
true,
comm, comm,
&msgActual, &msgActual,
Message); Message);

@ -31,7 +31,6 @@ void processMatchedVerticesAndSendMessages(
staticQueue &privateQGhostVtx, staticQueue &privateQGhostVtx,
staticQueue &privateQMsgType, staticQueue &privateQMsgType,
staticQueue &privateQOwner, staticQueue &privateQOwner,
bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
vector<MilanLongInt> &Message) vector<MilanLongInt> &Message)
@ -41,7 +40,7 @@ void processMatchedVerticesAndSendMessages(
int option; int option;
MilanLongInt mateVal; MilanLongInt mateVal;
vector<MilanLongInt> messagesToSend; vector<MilanLongInt> privatemessagesToSend, messagesToSend;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl; cout << "\n(" << myRank << "=========================************===============================" << endl;
@ -52,9 +51,8 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ #pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option, privatemessagesToSend) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
default(shared) \
num_threads(NUM_THREAD) \ num_threads(NUM_THREAD) \
reduction(+ \ reduction(+ \
: msgInd[:1], PCounter \ : msgInd[:1], PCounter \
@ -199,8 +197,7 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet: // Build the Message Packet:
Message[0] = v; // LOCAL Message[0] = v; // LOCAL
Message[1] = w; // GHOST Message[1] = w; // GHOST
@ -209,23 +206,13 @@ num_threads(NUM_THREAD)
// printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout); // fflush(stdout);
#pragma omp critical(sendMessage) privatemessagesToSend.push_back(v);
{ privatemessagesToSend.push_back(w);
messagesToSend.push_back(v); privatemessagesToSend.push_back(REQUEST);
messagesToSend.push_back(w); privatemessagesToSend.push_back(ghostOwner);
messagesToSend.push_back(REQUEST);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++; (*msgActual)++;
}
else
{
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
}
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
@ -257,8 +244,7 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet: // Build the Message Packet:
Message[0] = v; // LOCAL Message[0] = v; // LOCAL
Message[1] = w; // GHOST Message[1] = w; // GHOST
@ -267,22 +253,13 @@ num_threads(NUM_THREAD)
// printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout); // fflush(stdout);
#pragma omp critical(sendMessage) privatemessagesToSend.push_back(v);
{ privatemessagesToSend.push_back(w);
messagesToSend.push_back(v); privatemessagesToSend.push_back(FAILURE);
messagesToSend.push_back(w); privatemessagesToSend.push_back(ghostOwner);
messagesToSend.push_back(FAILURE);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
}
else
{
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
}
(*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
@ -305,8 +282,7 @@ num_threads(NUM_THREAD)
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// assert(ghostOwner != -1); // assert(ghostOwner != -1);
// assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages)
{
// Build the Message Packet: // Build the Message Packet:
Message[0] = u; // LOCAL Message[0] = u; // LOCAL
Message[1] = v; // GHOST Message[1] = v; // GHOST
@ -314,23 +290,14 @@ num_threads(NUM_THREAD)
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
// printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
// fflush(stdout); // fflush(stdout);)
#pragma omp critical(sendMessage) privatemessagesToSend.push_back(u);
{ privatemessagesToSend.push_back(v);
messagesToSend.push_back(u); privatemessagesToSend.push_back(SUCCESS);
messagesToSend.push_back(v); privatemessagesToSend.push_back(ghostOwner);
messagesToSend.push_back(SUCCESS);
messagesToSend.push_back(ghostOwner);
}
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
}
else
{
(*NumMessagesBundled)++;
PCounter[ghostOwner]++;
}
(*msgActual)++;
(*msgInd)++; (*msgInd)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
@ -340,11 +307,17 @@ num_threads(NUM_THREAD)
break; break;
} // End of switch } // End of switch
} // End of inner for } // End of inner for
} }
} // End of outer for } // End of outer for
#pragma omp critical(sendMessageTransfer)
{
messagesToSend.insert(messagesToSend.end(), privatemessagesToSend.begin(), privatemessagesToSend.end());
privatemessagesToSend.clear();
}
queuesTransfer(U, privateU, QLocalVtx, queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx, QGhostVtx,
QMsgType, QOwner, privateQLocalVtx, QMsgType, QOwner, privateQLocalVtx,

Loading…
Cancel
Save