Extendend parallel region after SEND PACKET BUNDLE

Nothing parallelizable founded
3 years ago · 532701031e
parent b079d71f30
commit 532701031e
2 changed files with 99 additions and 79 deletions
--- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
+++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
@ -213,7 +213,15 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
    MilanLongInt S;
    MilanLongInt privateMyCard = 0;
    staticQueue U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner;
-
+    MilanLongInt myIndex = 0;
+    vector <MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
+    vector <MPI_Request> SRequest; //Requests that are used for each send message
+    vector <MPI_Status> SStatus;   //Status of sent messages, used in MPI_Wait
+    MilanLongInt MessageIndex = 0; //Pointer for current message
+    MilanInt OneMessageSize = 0;
+    MilanLongInt numMessagesToSend;
+    MilanInt BufferSize;
+    MilanLongInt *Buffer;
    bool isEmpty;
 #ifdef TIME_TRACKER
    double Ghost2LocalInitialization = MPI_Wtime();
@ -868,7 +876,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
              omp_get_thread_num(),
              myRank);
 #endif
-    } // end of parallel region


    ///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
@ -878,9 +885,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
    /////////////////////////////////////////////////////////////////////////////////////////
    ///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
    /////////////////////////////////////////////////////////////////////////////////////////
+#pragma omp barrier
+#pragma omp master
+        {
            //Data structures for Bundled Messages:
-    vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
-    MilanLongInt myIndex=0;
            try {
                PMessageBundle.reserve(NumMessagesBundled * 3); //Three integers per message
                PCumulative.reserve(numProcs + 1); //Similar to Row Pointer vector in CSR data structure
@ -894,12 +902,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
            PCumulative.resize(numProcs + 1, 0); //Only initialize the counter variable
            PSizeInfoMessages.resize(numProcs * 3, 0);

+
            for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
                PCumulative[i + 1] = PCumulative[i] + PCounter[i];
+
+            //OMP not worth parallelizing
            //Reuse PCounter to keep track of how many messages were inserted:
            for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
                PCounter[i] = 0;
            //Build the Message Bundle packet:
+
+            //OMP Not parallelizable
    for (MilanInt i=0; i<NumMessagesBundled; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
        myIndex = ( PCumulative[QOwner[i]] + PCounter[QOwner[i]] )*3;
        PMessageBundle[myIndex+0] = QLocalVtx[i];
@ -907,10 +920,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
        PMessageBundle[myIndex+2] = QMsgType[i];
        PCounter[QOwner[i]]++;
    }
+
    //Send the Bundled Messages: Use ISend
-    vector<MPI_Request> SRequest; //Requests that are used for each send message
-    vector<MPI_Status> SStatus;   //Status of sent messages, used in MPI_Wait
-    MilanLongInt MessageIndex=0; //Pointer for current message
+
            try {
                SRequest.reserve(numProcs * 2); //At most two messages per processor
                SStatus.reserve(numProcs * 2);//At most two messages per processor
@ -923,6 +935,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
            SRequest.resize(numProcs * 2, myReq);
            MPI_Status myStat; //A sample status
            SStatus.resize(numProcs * 2, myStat);
+
            //Send the Messages
            for (MilanInt i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
                if (i == myRank) //Do not send anything to yourself
@ -938,7 +951,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                fflush(stdout);
 #endif
                if (PSizeInfoMessages[i * 3 + 0] > 0) { //Send only if it is a nonempty packet
-            MPI_Isend(&PSizeInfoMessages[i*3+0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm, &SRequest[MessageIndex]);
+                    MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm,
+                              &SRequest[MessageIndex]);
                    msgActual++;
                    MessageIndex++;
                    //Now Send the message with the data packet:
@ -949,7 +963,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                    cout<<endl;
                    fflush(stdout);
 #endif
-            MPI_Isend(&PMessageBundle[PCumulative[i]*3], PSizeInfoMessages[i*3+0], TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[MessageIndex]);
+                    MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0],
+                              TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[MessageIndex]);
                    MessageIndex++;
                } //End of if size > 0
            }
@ -959,6 +974,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
            QGhostVtx.clear();
            QMsgType.clear();
            QOwner.clear();
+
+
 #ifdef PRINT_DEBUG_INFO_
    cout<<"\n("<<myRank<<")Number of Ghost edges = "<<numGhostEdges;
    cout<<"\n("<<myRank<<")Total number of potential message X 2 = "<<numGhostEdges*2;
@ -971,17 +988,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(

    //Allocate memory for MPI Send messages:
    /* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */
-    MilanInt OneMessageSize=0;
+    OneMessageSize=0;
    MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); //Size of one message packet
    //How many messages to send?
    //Potentially three kinds of messages will be sent/received:
    //Request, Success, Failure.
    //But only two will be sent from a given processor.
    //Substract the number of messages that have already been sent as bundled messages:
-    MilanLongInt numMessagesToSend = numGhostEdges*2 - NumMessagesBundled;
-    MilanInt     BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend;
+    numMessagesToSend = numGhostEdges*2 - NumMessagesBundled;
+    BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend;

-    MilanLongInt *Buffer=0;
+    Buffer=0;
 #ifdef PRINT_DEBUG_INFO_
    cout<<"\n("<<myRank<<")Size of One Message from PACK= "<<OneMessageSize;
    cout<<"\n("<<myRank<<")Size of Message overhead = "<<MPI_BSEND_OVERHEAD;
@ -1000,6 +1017,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
        }
        MPI_Buffer_attach(Buffer, BufferSize); //Attach the Buffer
    }
+        } //End of master
+
+    } // end of parallel region
    ///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////

    finishTime = MPI_Wtime();
--- a/samples/advanced/pdegen/runs/amg_pde3d.inp
+++ b/samples/advanced/pdegen/runs/amg_pde3d.inp
@ -1,6 +1,6 @@
 %%%%%%%%%%%  General  arguments % Lines starting with % are ignored.
 CSR                         ! Storage format CSR COO JAD
-00123                        ! IDIM; domain size. Linear system size is IDIM**3
+0123                        ! IDIM; domain size. Linear system size is IDIM**3
 CONST                       ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE
 BICGSTAB                    ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES
 2                           ! ISTOPC