Refactoring private queues, still not working

3 years ago · b5e52d31f5
parent deab695294
commit b5e52d31f5
4 changed files with 48 additions and 56 deletions
--- a/amgprec/impl/aggregator/MatchBoxPC.h
+++ b/amgprec/impl/aggregator/MatchBoxPC.h
@ -179,7 +179,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
                        MilanLongInt StartIndex, MilanLongInt EndIndex,
                        MilanLongInt* numGhostEdgesPtr,
                        MilanLongInt* numGhostVerticesPtr,
-                        MilanLongInt* insertMePtr,
                        MilanLongInt* S,
                        MilanLongInt* verLocInd,
                        MilanLongInt* verLocPtr,
@ -196,7 +195,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
                        vector<MilanLongInt>& QMsgType,
                        vector<MilanInt>& QOwner,
                        MilanLongInt* &candidateMate,
-                        staticQueue& U
+                        staticQueue& U,
+                        staticQueue& privateU,
+                        staticQueue& privateQLocalVtx,
+                        staticQueue& privateQGhostVtx,
+                        staticQueue& privateQMsgType,
+                        staticQueue& privateQOwner
                        );

 void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP
--- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
+++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
@ -185,7 +185,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
    //Build the Ghost Vertex Set: Vg
    map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex
    vector <MilanLongInt> Counter;  //Store the edge count for each ghost vertex
-    MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices
+    MilanLongInt numGhostVertices = 0, numGhostEdges = 0; //Number of Ghost vertices

 #ifdef PRINT_DEBUG_INFO_
    cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
@ -218,7 +218,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(

    initialize(NLVer, NLEdge, StartIndex, 
                EndIndex, &numGhostEdges, 
-                &numGhostVertices, &insertMe, &S,
+                &numGhostVertices, &S,
                verLocInd, verLocPtr,
                MateLock, 
                Ghost2LocalMap, Counter,
@ -226,20 +226,27 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                tempCounter, GMate,
                Message, QLocalVtx,
                QGhostVtx, QMsgType, QOwner, 
-                candidateMate, U);
+                candidateMate, U,
+                privateU,
+                privateQLocalVtx,
+                privateQGhostVtx,
+                privateQMsgType,
+                privateQOwner
+                );
                        
    finishTime = MPI_Wtime();
    *ph0_time = finishTime - startTime; //Time taken for Phase-0: Initialization      

                    
    startTime = MPI_Wtime();
+    

    /////////////////////////////////////////////////////////////////////////////////////////
    //////////////////////////////////// INITIALIZATION /////////////////////////////////////
    /////////////////////////////////////////////////////////////////////////////////////////
    //Compute the Initial Matching Set:

-#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
+#pragma omp parallel private(k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateMyCard, isEmpty) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(4)
    {
        /*
        * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
@ -272,21 +279,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
         */


-        MilanLongInt size = numGhostVertices; //TODO how can I decide a more meaningfull size?
-        //Fail messages
-        privateQLocalVtx.~staticQueue();
-        privateQGhostVtx.~staticQueue();
-        privateQMsgType.~staticQueue();
-        privateQOwner.~staticQueue();
-        privateU.~staticQueue();
-
-        new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
-        new(&privateQLocalVtx) staticQueue(size);
-        new(&privateQGhostVtx) staticQueue(size);
-        new(&privateQMsgType) staticQueue(size);
-        new(&privateQOwner) staticQueue(size);
-
-
 #pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs]) schedule(static)
        for (v = 0; v < NLVer; v++) {
            //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
@ -334,8 +326,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                        assert(ghostOwner != myRank);
                        PCounter[ghostOwner]++;

-
                        //TODO why does it fail if I use a private data structure???
+                        
                        /*
                        privateQLocalVtx.push_back(v + StartIndex);
                        privateQGhostVtx.push_back(w);
@ -351,6 +343,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                            QMsgType.push_back(REQUEST);
                            QOwner.push_back(ghostOwner);
                        } // end of critical region
+                      

                        if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {

--- a/amgprec/impl/aggregator/dataStrStaticQueue.h
+++ b/amgprec/impl/aggregator/dataStrStaticQueue.h
@ -80,9 +80,11 @@ class staticQueue
 		MilanLongInt squeueTail;
 		MilanLongInt NumNodes;

+		//FIXME I had to comment this piece of code in order to make everything work.
+		//		why?
 		//Prevent Assignment and Pass by Value:
-		staticQueue(const staticQueue& src);
-		staticQueue& operator=(const staticQueue& rhs);
+		//staticQueue(const staticQueue& src);
+		//staticQueue& operator=(const staticQueue& rhs);

 	public:
 		//Constructors and Destructors
--- a/amgprec/impl/aggregator/initialize.cpp
+++ b/amgprec/impl/aggregator/initialize.cpp
@ -12,7 +12,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
                        MilanLongInt StartIndex, MilanLongInt EndIndex,
                        MilanLongInt* numGhostEdgesPtr,
                        MilanLongInt* numGhostVerticesPtr,
-                        MilanLongInt* insertMePtr,
                        MilanLongInt* S,
                        MilanLongInt* verLocInd,
                        MilanLongInt* verLocPtr,
@ -29,7 +28,12 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
                        vector<MilanLongInt>& QMsgType,
                        vector<MilanInt>& QOwner,
                        MilanLongInt* &candidateMate,
-                        staticQueue& U
+                        staticQueue& U,
+                        staticQueue& privateU,
+                        staticQueue& privateQLocalVtx,
+                        staticQueue& privateQGhostVtx,
+                        staticQueue& privateQMsgType,
+                        staticQueue& privateQOwner
                        )
 {

@ -37,7 +41,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
    MilanLongInt adj1, adj2;
    int i, v, k, w;

-    
    // index that starts with zero to |Vg|  - 1
    map<MilanLongInt, MilanLongInt>::iterator storedAlready;

@ -64,10 +67,9 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
     *
     * Despite the critical region it is still productive to
     * parallelize this for because the critical region is exeuted
-     * only when a ghost edge is found and ghost edges are a minority.
+     * only when a ghost edge is found and ghost edges are a minority,
+     * circa 3.5% during the tests.
     */
-
-        // TODO comments about the reduction
 #pragma omp for reduction(+ : numGhostEdges)
        for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
            insertMe = verLocInd[i];
@ -90,8 +92,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
            } //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
        } //End of for(ghost vertices)

-
-
        #pragma omp single
        {
            //numGhostEdges = atomicNumGhostEdges;
@ -143,7 +143,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
            /*
             * Not parallelizable
             */
-
            for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|)
                verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
 #ifdef PRINT_DEBUG_INFO_
@ -163,6 +162,10 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
        fflush(stdout);
 #endif

+#ifdef TIME_TRACKER
+        double verGhostIndInitialization = MPI_Wtime();
+#endif
+
        /*
         * OMP verGhostIndInitialization
         *
@ -175,13 +178,8 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
         * Despite the critical region it's still useful to
         * parallelize the for cause the ghost nodes
         * are a minority hence the critical region is executed
-         * few times.
+         * few times, circa 3.5% of the times in the tests.
         */
-
-#ifdef TIME_TRACKER
-        double verGhostIndInitialization = MPI_Wtime();
-#endif
-
 #pragma omp for nowait schedule(static)
        for (v = 0; v < NLVer; v++) {
            adj1 = verLocPtr[v];   //Vertex Pointer
@ -192,17 +190,14 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
 #pragma omp critical
                    {
                        insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
-                        verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
                        tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
                    }
+                    verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
                } //End of if((w < StartIndex) || (w > EndIndex))
            } //End of for(k)
        } //End of for (v)
-    
-    }

-    #pragma omp single
-        {
+    } // End of parallel region

 #ifdef TIME_TRACKER
            verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
@ -216,11 +211,6 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
            cout<<endl; fflush(stdout);
 #endif

-
-            Message.resize(3, -1);
-            //message_type = 0;
-            //NumMessagesBundled = 0;
-            //ghostOwner = 0;
            try {
                QLocalVtx.reserve(numGhostEdges); //Local Vertex
                QGhostVtx.reserve(numGhostEdges); //Ghost Vertex
@ -232,23 +222,19 @@ inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
                exit(1);
            }

-        } // end of single region
-
 #ifdef PRINT_DEBUG_INFO_
 cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
 #endif

    *numGhostEdgesPtr = numGhostEdges;
    *numGhostVerticesPtr = numGhostVertices;  
-    *insertMePtr = insertMe; 

    //Allocate Data Structures:
    /*
     * candidateMate was a vector and has been replaced with a raw array
-     * there is no point in using the vector (or maybe there is???)
+     * there is no point in using the vector (or maybe there is (???))
     * so I replaced it with an array wich is slightly faster
     */
-    //candidateMate = new MilanLongInt[NLVer + numGhostVertices];
    candidateMate = new MilanLongInt[NLVer + numGhostVertices];


@ -267,7 +253,6 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);

    *S = numGhostVertices; //Initialize S with number of Ghost Vertices

-
    /*
     * Create the Queue Data Structure for the Dominating Set
     *
@ -276,6 +261,14 @@ cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
     * of a staticQueue I had to destroy the previous object and instantiate
     * a new one of the correct size.
     */
-    U.~staticQueue();
    new(&U) staticQueue(NLVer + numGhostVertices);
+
+    //TODO how can I decide a more meaningfull size?
+    MilanLongInt size = numGhostVertices;
+
+    new(&privateU) staticQueue(NLVer + numGhostVertices); //TODO how can I put a meaningfull size?
+    new(&privateQLocalVtx) staticQueue(size);
+    new(&privateQGhostVtx) staticQueue(size);
+    new(&privateQMsgType) staticQueue(size);
+    new(&privateQOwner) staticQueue(size);
 }