#include "MatchBoxPC.h" void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt *numGhostEdges, MilanLongInt *numGhostVertices, MilanLongInt *S, MilanLongInt *verLocInd, MilanLongInt *verLocPtr, map &Ghost2LocalMap, vector &Counter, vector &verGhostPtr, vector &verGhostInd, vector &tempCounter, vector &GMate, vector &Message, vector &QLocalVtx, vector &QGhostVtx, vector &QMsgType, vector &QOwner, MilanLongInt *&candidateMate, vector &U, vector &privateU, vector &privateQLocalVtx, vector &privateQGhostVtx, vector &privateQMsgType, vector &privateQOwner) { MilanLongInt insertMe = 0; MilanLongInt adj1, adj2; int i, v, k, w; // index that starts with zero to |Vg| - 1 map::iterator storedAlready; #pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD) { #pragma omp single { #ifdef TIME_TRACKER double Ghost2LocalInitialization = MPI_Wtime(); #endif /* * OMP Ghost2LocalInitialization * This loop analyzes all the edges and when finds a ghost edge * puts it in the Ghost2LocalMap. * A critical region is needed when inserting data in the map. * * Despite the critical region it is still productive to * parallelize this cycle because the critical region is exeuted * only when a ghost edge is found and ghost edges are a minority, * circa 3.5% during the tests. */ #pragma omp task depend(out \ : *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices) { #pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \ : numGhostEdges[:1]) for (i = 0; i < NLEdge; i++) { // O(m) - Each edge stored twice insertMe = verLocInd[i]; if ((insertMe < StartIndex) || (insertMe > EndIndex)) { // Find a ghost (*numGhostEdges)++; #pragma omp critical { storedAlready = Ghost2LocalMap.find(insertMe); if (storedAlready != Ghost2LocalMap.end()) { // Has already been added Counter[storedAlready->second]++; // Increment the counter } else { // Insert an entry for the ghost: Ghost2LocalMap[insertMe] = *numGhostVertices; // Add a map entry Counter.push_back(1); // Initialize the counter (*numGhostVertices)++; // Increment the number of ghost vertices } // End of else() } } // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) } // End of for(ghost vertices) } // end of task depend // *numGhostEdges = atomicNumGhostEdges; #ifdef TIME_TRACKER Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization; fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NGhosts:" << *numGhostVertices << " GhostEdges: " << *numGhostEdges; if (!Ghost2LocalMap.empty()) { cout << "\n(" << myRank << ")Final Map : on process "; cout << "\n(" << myRank << ")Key \t Value \t Counter \n"; fflush(stdout); storedAlready = Ghost2LocalMap.begin(); do { cout << storedAlready->second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl; fflush(stdout); storedAlready++; } while (storedAlready != Ghost2LocalMap.end()); } #endif #pragma omp task depend(out \ : verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \ : *numGhostVertices, *numGhostEdges) { // Initialize adjacency Lists for Ghost Vertices: try { verGhostPtr.reserve(*numGhostVertices + 1); // Pointer Vector tempCounter.reserve(*numGhostVertices); // Pointer Vector verGhostInd.reserve(*numGhostEdges); // Index Vector GMate.reserve(*numGhostVertices); // Ghost Mate Vector } catch (length_error) { cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; cout << "Not enough memory to allocate the internal variables \n"; exit(1); } // Initialize the Vectors: verGhostPtr.resize(*numGhostVertices + 1, 0); // Pointer Vector tempCounter.resize(*numGhostVertices, 0); // Temporary Counter verGhostInd.resize(*numGhostEdges, -1); // Index Vector GMate.resize(*numGhostVertices, -1); // Temporary Counter verGhostPtr[0] = 0; // The first value #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Ghost Vertex Pointer: "; fflush(stdout); #endif } // End of task #pragma omp task depend(out \ : verGhostPtr) depend(in \ : Counter, *numGhostVertices) { #ifdef TIME_TRACKER double verGhostPtrInitialization = MPI_Wtime(); #endif for (i = 0; i < *numGhostVertices; i++) { // O(|Ghost Vertices|) verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; #ifdef PRINT_DEBUG_INFO_ cout << verGhostPtr[i] << "\t"; fflush(stdout); #endif } #ifdef TIME_TRACKER verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization; fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization); #endif } // End of task #ifdef PRINT_DEBUG_INFO_ if (*numGhostVertices > 0) cout << verGhostPtr[*numGhostVertices] << "\n"; fflush(stdout); #endif #ifdef TIME_TRACKER double verGhostIndInitialization = MPI_Wtime(); #endif /* * OMP verGhostIndInitialization * * In this cycle the verGhostInd is initialized * with the datas related to ghost edges. * The check to see if a node is a ghost node is * executed in paralle and when a ghost node * is found a critical region is started. * * Despite the critical region it's still useful to * parallelize the for cause the ghost nodes * are a minority hence the critical region is executed * few times, circa 3.5% of the times in the tests. */ #pragma omp task depend(in \ : insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \ : verGhostInd) { #pragma omp taskloop num_tasks(NUM_THREAD) for (v = 0; v < NLVer; v++) { adj1 = verLocPtr[v]; // Vertex Pointer adj2 = verLocPtr[v + 1]; for (k = adj1; k < adj2; k++) { w = verLocInd[k]; // Get the adjacent vertex if ((w < StartIndex) || (w > EndIndex)) { // Find a ghost #pragma omp critical { insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert tempCounter[Ghost2LocalMap[w]]++; // Increment the counter } verGhostInd[insertMe] = v + StartIndex; // Add the adjacency } // End of if((w < StartIndex) || (w > EndIndex)) } // End of for(k) } // End of for (v) } // end of tasklopp #ifdef TIME_TRACKER verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Ghost Vertex Index: "; for (v = 0; v < *numGhostEdges; v++) cout << verGhostInd[v] << "\t"; cout << endl; fflush(stdout); #endif #pragma omp task depend(in \ : *numGhostEdges) depend(out \ : QLocalVtx, QGhostVtx, QMsgType, QOwner) { try { QLocalVtx.reserve(*numGhostEdges); // Local Vertex QGhostVtx.reserve(*numGhostEdges); // Ghost Vertex QMsgType.reserve(*numGhostEdges); // Message Type (Request/Failure) QOwner.reserve(*numGhostEdges); // Owner of the ghost: COmpute once and use later } catch (length_error) { cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; cout << "Not enough memory to allocate the internal variables \n"; exit(1); } } // end of task #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Allocating CandidateMate.. "; fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << "=========================************===============================" << endl; fflush(stdout); fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; fflush(stdout); fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl; fflush(stdout); #endif #pragma omp task depend(in \ : *numGhostVertices) depend(out \ : candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) { // Allocate Data Structures: /* * candidateMate was a vector and has been replaced with an array * there is no point in using the vector (or maybe there is (???)) * so I replaced it with an array wich is slightly faster */ candidateMate = new MilanLongInt[NLVer + (*numGhostVertices)]; *S = (*numGhostVertices); // Initialize S with number of Ghost Vertices /* * Create the Queue Data Structure for the Dominating Set * * I had to declare the staticuQueue U before the parallel region * to have it in the correct scope. Since we can't change the dimension * of a staticQueue I had to destroy the previous object and instantiate * a new one of the correct size. */ //new (&U) staticQueue(NLVer + (*numGhostVertices)); U.reserve(NLVer + (*numGhostVertices)); // Initialize the private vectors privateQLocalVtx.reserve(*numGhostVertices); privateQGhostVtx.reserve(*numGhostVertices); privateQMsgType.reserve(*numGhostVertices); privateQOwner.reserve(*numGhostVertices); privateU.reserve(*numGhostVertices); } // end of task } // End of single region } // End of parallel region }