Optimization, replaced all useless atomic with reduction

omp-walther
StefanoPetrilli 2 years ago
parent aa45e2fe93
commit 5efee20041

@ -358,7 +358,6 @@ extern "C"
bool sendMessages, bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
MilanLongInt *msgInd,
vector<MilanLongInt> &Message); vector<MilanLongInt> &Message);
void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, void sendBundledMessages(MilanLongInt *numGhostEdgesPtr,

@ -402,7 +402,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
true, true,
comm, comm,
&msgActual, &msgActual,
&msgInd,
Message); Message);
///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// ///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////

@ -57,13 +57,13 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
#pragma omp task depend(out \ #pragma omp task depend(out \
: *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices) : *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices)
{ {
#pragma omp taskloop num_tasks(NUM_THREAD) #pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
: numGhostEdges[:1])
for (i = 0; i < NLEdge; i++) for (i = 0; i < NLEdge; i++)
{ // O(m) - Each edge stored twice { // O(m) - Each edge stored twice
insertMe = verLocInd[i]; insertMe = verLocInd[i];
if ((insertMe < StartIndex) || (insertMe > EndIndex)) if ((insertMe < StartIndex) || (insertMe > EndIndex))
{ // Find a ghost { // Find a ghost
#pragma omp atomic
(*numGhostEdges)++; (*numGhostEdges)++;
#pragma omp critical #pragma omp critical
{ {

@ -34,9 +34,13 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option; MilanInt ghostOwner = 0, option;
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(NUM_THREAD) #pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) \
num_threads(NUM_THREAD)
{ {
#pragma omp for reduction(+ : PCounter[:numProcs]) schedule(static) #pragma omp for reduction(+ \
: PCounter[:numProcs], myCard[:1], msgInd[:1], NumMessagesBundled[:1]) schedule(static)
for (v = 0; v < NLVer; v++) for (v = 0; v < NLVer; v++)
{ {
option = -1; option = -1;
@ -76,7 +80,6 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
if (w >= 0) if (w >= 0)
{ {
#pragma omp atomic
(*myCard)++; (*myCard)++;
if ((w < StartIndex) || (w > EndIndex)) if ((w < StartIndex) || (w > EndIndex))
{ // w is a ghost vertex { // w is a ghost vertex
@ -129,13 +132,11 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp atomic
(*msgInd)++; (*msgInd)++;
#pragma omp atomic
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex); privateQLocalVtx.push_back(v + StartIndex);
@ -169,13 +170,11 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout); fflush(stdout);
#endif #endif
#pragma omp atomic
(*msgInd)++; (*msgInd)++;
#pragma omp atomic
(*NumMessagesBundled)++; (*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex); privateQLocalVtx.push_back(v + StartIndex);

@ -7,9 +7,9 @@ void processMatchedVertices(
staticQueue &privateU, staticQueue &privateU,
MilanLongInt StartIndex, MilanLongInt StartIndex,
MilanLongInt EndIndex, MilanLongInt EndIndex,
MilanLongInt *myCardPtr, MilanLongInt *myCard,
MilanLongInt *msgIndPtr, MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundledPtr, MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr, MilanLongInt *SPtr,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
@ -46,7 +46,14 @@ void processMatchedVertices(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) num_threads(NUM_THREAD) #pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1])
{ {
while (!U.empty()) while (!U.empty())
@ -171,8 +178,8 @@ void processMatchedVertices(
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
#pragma omp atomic
(*myCardPtr)++; (*myCard)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
@ -183,14 +190,11 @@ void processMatchedVertices(
// Found a dominating edge, it is a ghost // Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++; (*msgInd)++;
#pragma omp atomic
(*msgIndPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
@ -200,8 +204,8 @@ void processMatchedVertices(
case 3: case 3:
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
#pragma omp atomic
(*myCardPtr)++; (*myCard)++;
break; break;
case 4: case 4:
// Could not find a dominating vertex // Could not find a dominating vertex
@ -220,14 +224,12 @@ void processMatchedVertices(
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++; (*msgInd)++;
#pragma omp atomic
(*msgIndPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
@ -247,16 +249,12 @@ void processMatchedVertices(
#endif #endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++;
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
(*msgInd)++;
#pragma omp atomic
(*msgIndPtr)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v); privateQGhostVtx.push_back(v);

@ -7,9 +7,9 @@ void processMatchedVerticesAndSendMessages(
staticQueue &privateU, staticQueue &privateU,
MilanLongInt StartIndex, MilanLongInt StartIndex,
MilanLongInt EndIndex, MilanLongInt EndIndex,
MilanLongInt *myCardPtr, MilanLongInt *myCard,
MilanLongInt *msgIndPtr, MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundledPtr, MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr, MilanLongInt *SPtr,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
@ -34,7 +34,6 @@ void processMatchedVerticesAndSendMessages(
bool sendMessages, bool sendMessages,
MPI_Comm comm, MPI_Comm comm,
MilanLongInt *msgActual, MilanLongInt *msgActual,
MilanLongInt *msgInd,
vector<MilanLongInt> &Message) vector<MilanLongInt> &Message)
{ {
@ -53,7 +52,16 @@ void processMatchedVerticesAndSendMessages(
#ifdef COUNT_LOCAL_VERTEX #ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) num_threads(NUM_THREAD) #pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \
default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1], msgActual \
[:1])
{ {
while (!U.empty()) while (!U.empty())
@ -178,8 +186,7 @@ void processMatchedVerticesAndSendMessages(
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
#pragma omp atomic (*myCard)++;
(*myCardPtr)++;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout); fflush(stdout);
@ -190,8 +197,8 @@ void processMatchedVerticesAndSendMessages(
// Found a dominating edge, it is a ghost // Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages) if (sendMessages)
{ {
// Build the Message Packet: // Build the Message Packet:
@ -200,8 +207,8 @@ void processMatchedVerticesAndSendMessages(
Message[2] = REQUEST; // TYPE Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
//printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 2: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
//fflush(stdout); // fflush(stdout);
#pragma omp critical(sendMessage) #pragma omp critical(sendMessage)
{ {
messagesToSend.push_back(v); messagesToSend.push_back(v);
@ -211,19 +218,15 @@ void processMatchedVerticesAndSendMessages(
} }
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
#pragma omp atomic
(*msgActual)++; (*msgActual)++;
} }
else else
{ {
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++;
} }
#pragma omp atomic (*msgInd)++;
(*msgIndPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
@ -233,8 +236,7 @@ void processMatchedVerticesAndSendMessages(
case 3: case 3:
privateU.push_back(v); privateU.push_back(v);
privateU.push_back(w); privateU.push_back(w);
#pragma omp atomic (*myCard)++;
(*myCardPtr)++;
break; break;
case 4: case 4:
// Could not find a dominating vertex // Could not find a dominating vertex
@ -253,8 +255,8 @@ void processMatchedVerticesAndSendMessages(
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages) if (sendMessages)
{ {
// Build the Message Packet: // Build the Message Packet:
@ -263,8 +265,8 @@ void processMatchedVerticesAndSendMessages(
Message[2] = FAILURE; // TYPE Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
//printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 4: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
//fflush(stdout); // fflush(stdout);
#pragma omp critical(sendMessage) #pragma omp critical(sendMessage)
{ {
messagesToSend.push_back(v); messagesToSend.push_back(v);
@ -273,19 +275,15 @@ void processMatchedVerticesAndSendMessages(
messagesToSend.push_back(ghostOwner); messagesToSend.push_back(ghostOwner);
} }
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
#pragma omp atomic
(*msgActual)++; (*msgActual)++;
} }
else else
{ {
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++;
} }
#pragma omp atomic (*msgInd)++;
(*msgIndPtr)++;
privateQLocalVtx.push_back(v); privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w); privateQGhostVtx.push_back(w);
@ -305,8 +303,8 @@ void processMatchedVerticesAndSendMessages(
#endif #endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
assert(ghostOwner != -1); // assert(ghostOwner != -1);
assert(ghostOwner != myRank); // assert(ghostOwner != myRank);
if (sendMessages) if (sendMessages)
{ {
// Build the Message Packet: // Build the Message Packet:
@ -315,8 +313,8 @@ void processMatchedVerticesAndSendMessages(
Message[2] = SUCCESS; // TYPE Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous) // Send a Request (Asynchronous)
//printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]); // printf("Send case 5: (%ld, %ld, %ld)\n", Message[0], Message[1], Message[2]);
//fflush(stdout); // fflush(stdout);
#pragma omp critical(sendMessage) #pragma omp critical(sendMessage)
{ {
messagesToSend.push_back(u); messagesToSend.push_back(u);
@ -325,19 +323,15 @@ void processMatchedVerticesAndSendMessages(
messagesToSend.push_back(ghostOwner); messagesToSend.push_back(ghostOwner);
} }
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); // MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
#pragma omp atomic
(*msgActual)++; (*msgActual)++;
} }
else else
{ {
#pragma omp atomic (*NumMessagesBundled)++;
(*NumMessagesBundledPtr)++;
#pragma omp atomic
PCounter[ghostOwner]++; PCounter[ghostOwner]++;
} }
#pragma omp atomic (*msgInd)++;
(*msgIndPtr)++;
privateQLocalVtx.push_back(u); privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v); privateQGhostVtx.push_back(v);

@ -243,8 +243,8 @@ void processMessages(
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); //assert(ghostOwner != -1);
assert(ghostOwner != myRank); //assert(ghostOwner != myRank);
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++; (*msgInd)++;
@ -301,8 +301,8 @@ void processMessages(
fflush(stdout); fflush(stdout);
#endif #endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
assert(ghostOwner != -1); //assert(ghostOwner != -1);
assert(ghostOwner != myRank); //assert(ghostOwner != myRank);
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm); MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++; (*msgInd)++;
(*msgActual)++; (*msgActual)++;

Loading…
Cancel
Save