PARALLEL_COMPUTE_CANDIDATE_MATE_B

3 years ago · 25a603debe
parent a20f0d47e7
commit 25a603debe
1 changed files with 33 additions and 27 deletions
--- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
+++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp
@ -162,7 +162,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
    vector<MilanLongInt> PCounter;
    MilanLongInt NumMessagesBundled;
    MilanInt ghostOwner; // Changed by Fabio to be an integer, addresses needs to be integers!
-    vector<MilanLongInt> candidateMate;
+    //vector<MilanLongInt> candidateMate;
    MilanLongInt* candidateMate = new MilanLongInt[1];
 #ifdef PRINT_DEBUG_INFO_
    cout<<"\n("<<myRank<<")NV: "<<NLVer<<"  Edges: "<<NLEdge; fflush(stdout);
    cout<<"\n("<<myRank<<")StartIndex: "<<StartIndex<<"  EndIndex: "<<EndIndex; fflush(stdout);
@ -210,10 +211,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
    double Ghost2LocalInitialization = MPI_Wtime();
 #endif
-#pragma omp parallel private(insertMe, k, adj1, adj2, heaviestEdgeWt) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
+#pragma omp parallel private(insertMe, k, adj1, adj2, heaviestEdgeWt, w) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
    {
-        // TODO comments about the fking reduction
+        // TODO comments about the reduction
 #pragma omp for reduction(+ : numGhostEdges)
        for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
@ -327,8 +328,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
        double verGhostIndInitialization = MPI_Wtime();
 #endif
        //TODO why the nowait here fails?
 #pragma omp for nowait
        for (v = 0; v < NLVer; v++) {
            adj1 = verLocPtr[v];   //Vertex Pointer
@ -383,15 +382,13 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
            cout<<"\n("<<myRank<<")Allocating CandidateMate.. "; fflush(stdout);
 #endif
            //Allocate Data Structures:
-            try {
+            /*
-                candidateMate.reserve(NLVer + numGhostVertices); //Dominating edge
+             * candidateMate was a vector and has been replaced with a raw array
-            } catch (length_error) {
+             * there is no point in using the vector (or maybe there is???)
-                cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
+             * so I replaced it with an array wich is slightly faster
-                cout << "Not enough memory to allocate the internal variables \n";
+             */
-                exit(1);
+            delete[] candidateMate;
-            }
+            candidateMate = new MilanLongInt[NLVer + numGhostVertices];
            //Initialize the Vectors:
            candidateMate.resize(NLVer + numGhostVertices, -1);
            /*
             * Create the Queue Data Structure for the Dominating Set
@ -427,13 +424,22 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
            S = numGhostVertices; //Initialize S with number of Ghost Vertices
        } // end of single region
-    } // end of parallel region
+        /*
-//#pragma omp for
+         * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B
         * The next portion of code has been splitted
         * to make it 100% parallelized
         *
         * TODO: would it make any sense to parallelize also the
         *       inner for?
         *
         * TODO: we have a false sharing on candidateMate
         */
 #pragma omp for
    for ( v=0; v < NLVer; v++ ) {
 #ifdef PRINT_DEBUG_INFO_
        cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout);
 #endif
        //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
        //Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
        adj1 = verLocPtr[v];
        adj2 = verLocPtr[v + 1];
@ -452,25 +458,25 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
                ((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
                heaviestEdgeWt = edgeLocWeight[k];
                w = verLocInd[k];
            }
        } //End of for loop
        candidateMate[v] = w;
-  //  }
+        //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
    }
    } // end of parallel region
    /*
        for ( v=0; v < NLVer; v++ ) {
            //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
 #ifdef PRINT_DEBUG_INFO_
            cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout);
 #endif
-            //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
+
            //Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
            adj1 = verLocPtr[v];
            adj2 = verLocPtr[v + 1];
            w = candidateMate[v];
-*/
+
 //#pragma omp critical
      //  {
            //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
 #ifdef PRINT_DEBUG_INFO_
            cout<<"\n("<<myRank<<")"<<v+StartIndex<<" Points to: "<<w; fflush(stdout);
 #endif