Extendend parallel region after SEND PACKET BUNDLE

Nothing parallelizable founded
omp-walther
StefanoPetrilli 3 years ago
parent b079d71f30
commit 532701031e

@ -213,7 +213,15 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt S; MilanLongInt S;
MilanLongInt privateMyCard = 0; MilanLongInt privateMyCard = 0;
staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner; staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner;
MilanLongInt myIndex = 0;
vector <MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
vector <MPI_Request> SRequest; //Requests that are used for each send message
vector <MPI_Status> SStatus; //Status of sent messages, used in MPI_Wait
MilanLongInt MessageIndex = 0; //Pointer for current message
MilanInt OneMessageSize = 0;
MilanLongInt numMessagesToSend;
MilanInt BufferSize;
MilanLongInt *Buffer;
bool isEmpty; bool isEmpty;
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
@ -868,7 +876,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
omp_get_thread_num(), omp_get_thread_num(),
myRank); myRank);
#endif #endif
} // end of parallel region
///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// ///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
@ -878,28 +885,34 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES ///////////////////////////////////// ///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
#pragma omp barrier
#pragma omp master
{
//Data structures for Bundled Messages: //Data structures for Bundled Messages:
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
MilanLongInt myIndex=0;
try { try {
PMessageBundle.reserve(NumMessagesBundled*3); //Three integers per message PMessageBundle.reserve(NumMessagesBundled * 3); //Three integers per message
PCumulative.reserve(numProcs+1); //Similar to Row Pointer vector in CSR data structure PCumulative.reserve(numProcs + 1); //Similar to Row Pointer vector in CSR data structure
PSizeInfoMessages.reserve(numProcs*3); //Buffer to hold the Size info message packets PSizeInfoMessages.reserve(numProcs * 3); //Buffer to hold the Size info message packets
} catch ( length_error ) { } catch (length_error) {
cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout<<"Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
PMessageBundle.resize(NumMessagesBundled*3, -1);//Initialize PMessageBundle.resize(NumMessagesBundled * 3, -1);//Initialize
PCumulative.resize(numProcs+1, 0); //Only initialize the counter variable PCumulative.resize(numProcs + 1, 0); //Only initialize the counter variable
PSizeInfoMessages.resize(numProcs*3, 0); PSizeInfoMessages.resize(numProcs * 3, 0);
for (MilanInt i=0; i<numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCumulative[i+1]=PCumulative[i]+PCounter[i]; for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCumulative[i + 1] = PCumulative[i] + PCounter[i];
//OMP not worth parallelizing
//Reuse PCounter to keep track of how many messages were inserted: //Reuse PCounter to keep track of how many messages were inserted:
for (MilanInt i=0; i<numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers! for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCounter[i]=0; PCounter[i] = 0;
//Build the Message Bundle packet: //Build the Message Bundle packet:
//OMP Not parallelizable
for (MilanInt i=0; i<NumMessagesBundled; i++) { // Changed by Fabio to be an integer, addresses needs to be integers! for (MilanInt i=0; i<NumMessagesBundled; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
myIndex = ( PCumulative[QOwner[i]] + PCounter[QOwner[i]] )*3; myIndex = ( PCumulative[QOwner[i]] + PCounter[QOwner[i]] )*3;
PMessageBundle[myIndex+0] = QLocalVtx[i]; PMessageBundle[myIndex+0] = QLocalVtx[i];
@ -907,38 +920,39 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
PMessageBundle[myIndex+2] = QMsgType[i]; PMessageBundle[myIndex+2] = QMsgType[i];
PCounter[QOwner[i]]++; PCounter[QOwner[i]]++;
} }
//Send the Bundled Messages: Use ISend //Send the Bundled Messages: Use ISend
vector<MPI_Request> SRequest; //Requests that are used for each send message
vector<MPI_Status> SStatus; //Status of sent messages, used in MPI_Wait
MilanLongInt MessageIndex=0; //Pointer for current message
try { try {
SRequest.reserve(numProcs*2); //At most two messages per processor SRequest.reserve(numProcs * 2); //At most two messages per processor
SStatus.reserve(numProcs*2);//At most two messages per processor SStatus.reserve(numProcs * 2);//At most two messages per processor
} catch ( length_error ) { } catch (length_error) {
cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n";
cout<<"Not enough memory to allocate the internal variables \n"; cout << "Not enough memory to allocate the internal variables \n";
exit(1); exit(1);
} }
MPI_Request myReq; //A sample request MPI_Request myReq; //A sample request
SRequest.resize(numProcs*2,myReq); SRequest.resize(numProcs * 2, myReq);
MPI_Status myStat; //A sample status MPI_Status myStat; //A sample status
SStatus.resize(numProcs*2,myStat); SStatus.resize(numProcs * 2, myStat);
//Send the Messages //Send the Messages
for (MilanInt i=0; i<numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers! for (MilanInt i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
if (i==myRank) //Do not send anything to yourself if (i == myRank) //Do not send anything to yourself
continue; continue;
//Send the Message with information about the size of next message: //Send the Message with information about the size of next message:
//Build the Message Packet: //Build the Message Packet:
PSizeInfoMessages[i*3+0] = (PCumulative[i+1]-PCumulative[i])*3; // # of integers in the next message PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message
PSizeInfoMessages[i*3+1] = -1; //Dummy packet PSizeInfoMessages[i * 3 + 1] = -1; //Dummy packet
PSizeInfoMessages[i*3+2] = SIZEINFO; //TYPE PSizeInfoMessages[i * 3 + 2] = SIZEINFO; //TYPE
//Send a Request (Asynchronous) //Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Sending bundled message to process "<<i<<" size: "<<PSizeInfoMessages[i*3+0]<<endl; cout<<"\n("<<myRank<<")Sending bundled message to process "<<i<<" size: "<<PSizeInfoMessages[i*3+0]<<endl;
fflush(stdout); fflush(stdout);
#endif #endif
if ( PSizeInfoMessages[i*3+0] > 0 ) { //Send only if it is a nonempty packet if (PSizeInfoMessages[i * 3 + 0] > 0) { //Send only if it is a nonempty packet
MPI_Isend(&PSizeInfoMessages[i*3+0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm, &SRequest[MessageIndex]); MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm,
&SRequest[MessageIndex]);
msgActual++; msgActual++;
MessageIndex++; MessageIndex++;
//Now Send the message with the data packet: //Now Send the message with the data packet:
@ -949,7 +963,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
cout<<endl; cout<<endl;
fflush(stdout); fflush(stdout);
#endif #endif
MPI_Isend(&PMessageBundle[PCumulative[i]*3], PSizeInfoMessages[i*3+0], TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[MessageIndex]); MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0],
TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[MessageIndex]);
MessageIndex++; MessageIndex++;
} //End of if size > 0 } //End of if size > 0
} }
@ -959,6 +974,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
QGhostVtx.clear(); QGhostVtx.clear();
QMsgType.clear(); QMsgType.clear();
QOwner.clear(); QOwner.clear();
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Number of Ghost edges = "<<numGhostEdges; cout<<"\n("<<myRank<<")Number of Ghost edges = "<<numGhostEdges;
cout<<"\n("<<myRank<<")Total number of potential message X 2 = "<<numGhostEdges*2; cout<<"\n("<<myRank<<")Total number of potential message X 2 = "<<numGhostEdges*2;
@ -971,17 +988,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//Allocate memory for MPI Send messages: //Allocate memory for MPI Send messages:
/* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */ /* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */
MilanInt OneMessageSize=0; OneMessageSize=0;
MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); //Size of one message packet MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); //Size of one message packet
//How many messages to send? //How many messages to send?
//Potentially three kinds of messages will be sent/received: //Potentially three kinds of messages will be sent/received:
//Request, Success, Failure. //Request, Success, Failure.
//But only two will be sent from a given processor. //But only two will be sent from a given processor.
//Substract the number of messages that have already been sent as bundled messages: //Substract the number of messages that have already been sent as bundled messages:
MilanLongInt numMessagesToSend = numGhostEdges*2 - NumMessagesBundled; numMessagesToSend = numGhostEdges*2 - NumMessagesBundled;
MilanInt BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend; BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend;
MilanLongInt *Buffer=0; Buffer=0;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Size of One Message from PACK= "<<OneMessageSize; cout<<"\n("<<myRank<<")Size of One Message from PACK= "<<OneMessageSize;
cout<<"\n("<<myRank<<")Size of Message overhead = "<<MPI_BSEND_OVERHEAD; cout<<"\n("<<myRank<<")Size of Message overhead = "<<MPI_BSEND_OVERHEAD;
@ -1000,11 +1017,14 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
} }
MPI_Buffer_attach(Buffer, BufferSize); //Attach the Buffer MPI_Buffer_attach(Buffer, BufferSize); //Attach the Buffer
} }
} //End of master
} // end of parallel region
///////////////////////// END OF SEND BUNDLED MESSAGES ////////////////////////////////// ///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
finishTime = MPI_Wtime(); finishTime = MPI_Wtime();
*ph1_time = finishTime-startTime; //Time taken for Phase-1 *ph1_time = finishTime-startTime; //Time taken for Phase-1
*ph1_card = myCard ; //Cardinality at the end of Phase-1 *ph1_card = myCard; //Cardinality at the end of Phase-1
startTime = MPI_Wtime(); startTime = MPI_Wtime();
///////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////// MAIN LOOP ////////////////////////////////////// //////////////////////////////////////// MAIN LOOP //////////////////////////////////////

@ -1,6 +1,6 @@
%%%%%%%%%%% General arguments % Lines starting with % are ignored. %%%%%%%%%%% General arguments % Lines starting with % are ignored.
CSR ! Storage format CSR COO JAD CSR ! Storage format CSR COO JAD
00123 ! IDIM; domain size. Linear system size is IDIM**3 0123 ! IDIM; domain size. Linear system size is IDIM**3
CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE
BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES
2 ! ISTOPC 2 ! ISTOPC

Loading…
Cancel
Save