diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index ba7cb5c8..9b0218bc 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -59,7 +59,7 @@ #include #include #include -// #include "matchboxp.h" +#include "omp.h" #include "primitiveDataTypeDefinitions.h" #include "dataStrStaticQueue.h" @@ -175,6 +175,29 @@ inline MilanLongInt computeCandidateMate(MilanLongInt adj1, MilanLongInt* Mate, map &Ghost2LocalMap); +inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt StartIndex, MilanLongInt EndIndex, + MilanLongInt* numGhostEdgesPtr, + MilanLongInt* numGhostVerticesPtr, + MilanLongInt* insertMePtr, + MilanLongInt* verLocInd, + MilanLongInt* verLocPtr, + omp_lock_t* MateLock, + map &Ghost2LocalMap, + vector & Counter, + vector & verGhostPtr, + vector & verGhostInd, + vector & tempCounter, + vector & GMate, + vector& Message, + vector& QLocalVtx, + vector& QGhostVtx, + vector& QMsgType, + vector& QOwner, + MilanLongInt* candidateMate, + staticQueue& U + ); + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP ( MilanLongInt NLVer, MilanLongInt NLEdge, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index ea0e460f..dc3606c3 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -4,6 +4,7 @@ #include "isAlreadyMatched.cpp" #include "findOwnerOfGhost.cpp" #include "computeCandidateMate.cpp" +#include "initialize.cpp" // *********************************************************************** // @@ -146,10 +147,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanReal startTime, finishTime; //MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer startTime = MPI_Wtime(); - //Get the iterators for the graph: - //vector::iterator verLocPtr = inputSubGraph.getVerPtr_b(); - //vector::iterator verLocInd = inputSubGraph.getVerInd_b(); - //vector::iterator edgeLocWeight = inputSubGraph.getEdgeWt_b(); //Data structures for sending and receiving messages: vector Message; // [ u, v, message_type ] @@ -171,9 +168,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( PCounter[i] = 0; - MilanLongInt NumMessagesBundled; - MilanInt ghostOwner; // Changed by Fabio to be an integer, addresses needs to be integers! - //vector candidateMate; + MilanLongInt NumMessagesBundled = 0; + MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers! MilanLongInt* candidateMate = new MilanLongInt[1]; #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex - // index that starts with zero to |Vg| - 1 - map::iterator storedAlready; vector Counter; //Store the edge count for each ghost vertex MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices @@ -200,17 +194,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( if (myRank == 0) cout<<"\n("< verGhostPtr, verGhostInd, tempCounter; @@ -232,182 +215,37 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( //Declare the locks omp_lock_t MateLock[NLVer]; -#ifdef TIME_TRACKER - double Ghost2LocalInitialization = MPI_Wtime(); -#endif - -#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) - { - - //Initialize the locks - //TODO this can be executed as task in parallel with other unparallelizable tasks - //TODO destroy the locks -//#pragma omp for schedule(static) -// for(int i = 0; i < NLVer; i++) -// omp_init_lock(&MateLock[i]); - - // TODO comments about the reduction -#pragma omp for reduction(+ : numGhostEdges) - for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice - insertMe = verLocInd[i]; - //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost - numGhostEdges++; -#pragma omp critical - { - storedAlready = Ghost2LocalMap.find(insertMe); - if (storedAlready != Ghost2LocalMap.end()) { //Has already been added - //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter - } else { //Insert an entry for the ghost: - //cout<<"Process "<second<<" - "<first<<" : "<second]< 0 ) - cout< EndIndex)) { //Find a ghost -#pragma omp critical - { - insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert - verGhostInd[insertMe] = v + StartIndex; //Add the adjacency - tempCounter[Ghost2LocalMap[w]]++; //Increment the counter - } - } //End of if((w < StartIndex) || (w > EndIndex)) - } //End of for(k) - } //End of for (v) +#pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) + { #pragma omp single - { - -#ifdef TIME_TRACKER - verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; - fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); -#endif - -#ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< +#include +#include +#include +#include +#include "primitiveDataTypeDefinitions.h" +#include "dataStrStaticQueue.h" +#include "omp.h" + +inline void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt StartIndex, MilanLongInt EndIndex, + MilanLongInt* numGhostEdgesPtr, + MilanLongInt* numGhostVerticesPtr, + MilanLongInt* insertMePtr, + MilanLongInt* verLocInd, + MilanLongInt* verLocPtr, + omp_lock_t* MateLock, + map &Ghost2LocalMap, + vector & Counter, + vector & verGhostPtr, + vector & verGhostInd, + vector & tempCounter, + vector & GMate, + vector& Message, + vector& QLocalVtx, + vector& QGhostVtx, + vector& QMsgType, + vector& QOwner, + MilanLongInt* candidateMate, + staticQueue& U + ) +{ + + MilanLongInt insertMe = 0, numGhostEdges = 0, numGhostVertices = 0; + MilanLongInt adj1, adj2; + int i, v, k, w; + + + // index that starts with zero to |Vg| - 1 + map::iterator storedAlready; + +#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) + { + + //Initialize the locks + //TODO this can be executed as task in parallel with other unparallelizable tasks + //TODO destroy the locks +#pragma omp for schedule(static) + for(i = 0; i < NLVer; i++) + omp_init_lock(&MateLock[i]); + + +#ifdef TIME_TRACKER + double Ghost2LocalInitialization = MPI_Wtime(); +#endif + + /* + * OMP Ghost2LocalInitialization + * The cycle analyzes all the edges and when finds a ghost edge + * puts it in the Ghost2LocalMap. + * A critical region is needed when inserting data in the map. + * + * Despite the critical region it is still productive to + * parallelize this for because the critical region is exeuted + * only when a ghost edge is found and ghost edges are a minority. + */ + + // TODO comments about the reduction +#pragma omp for reduction(+ : numGhostEdges) + for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice + insertMe = verLocInd[i]; + //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost + numGhostEdges++; +#pragma omp critical + { + storedAlready = Ghost2LocalMap.find(insertMe); + if (storedAlready != Ghost2LocalMap.end()) { //Has already been added + //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]< 0 ) + cout< EndIndex)) { //Find a ghost +#pragma omp critical + { + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert + verGhostInd[insertMe] = v + StartIndex; //Add the adjacency + tempCounter[Ghost2LocalMap[w]]++; //Increment the counter + } + } //End of if((w < StartIndex) || (w > EndIndex)) + } //End of for(k) + } //End of for (v) + + } + + #pragma omp single + { + +#ifdef TIME_TRACKER + verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; + fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); +#endif + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<