diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 530933e5..88e205ba 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -64,6 +64,7 @@ #include "dataStrStaticQueue.h" using namespace std; +#define NUM_THREAD 4 #ifdef __cplusplus extern "C" @@ -203,6 +204,13 @@ extern "C" staticQueue &privateQMsgType, staticQueue &privateQOwner); + inline void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanReal *edgeLocWeight, + MilanLongInt *candidateMate); + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index d470b1ab..980824aa 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -5,6 +5,7 @@ #include "findOwnerOfGhost.cpp" #include "computeCandidateMate.cpp" #include "initialize.cpp" +#include "parallelComputeCandidateMateB.cpp" // *********************************************************************** // @@ -258,26 +259,22 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( ///////////////////////////////////////////////////////////////////////////////////////// // Compute the Initial Matching Set: + /* + * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from + * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize + * the two. + * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. + */ + + PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer, + verLocPtr, + verLocInd, + myRank, + edgeLocWeight, + candidateMate); + #pragma omp parallel private(k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateMyCard, isEmpty) firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) num_threads(4) { - /* - * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from - * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize - * the two. - * In particular PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. - */ - -#pragma omp for schedule(static) - for (v = 0; v < NLVer; v++) - { -#ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; - fflush(stdout); -#endif - // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) - candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight); - // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) - } /* * PARALLEL_PROCESS_EXPOSED_VERTEX_B @@ -476,7 +473,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } } -#pragma omp master +#pragma omp single { tempCounter.clear(); // Do not need this any more } @@ -715,11 +712,11 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( candidateMate[NLVer + Ghost2LocalMap[v]] = -1; if (v != Mate[u - StartIndex]) { // u is local - // Build the Message Packet: - // Message[0] = u; //LOCAL - // Message[1] = v; //GHOST - // Message[2] = SUCCESS; //TYPE - // Send a Request (Asynchronous) + // Build the Message Packet: + // Message[0] = u; //LOCAL + // Message[1] = v; //GHOST + // Message[2] = SUCCESS; //TYPE + // Send a Request (Asynchronous) #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Sending a success message: "; diff --git a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp new file mode 100644 index 00000000..ced93456 --- /dev/null +++ b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp @@ -0,0 +1,36 @@ +#include "MatchBoxPC.h" +#include +#include +#include +#include +#include +#include "primitiveDataTypeDefinitions.h" +#include "dataStrStaticQueue.h" +#include "omp.h" + +inline void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanReal *edgeLocWeight, + MilanLongInt *candidateMate) +{ + + MilanLongInt v = -1; + +#pragma omp parallel private(v) default(shared) num_threads(4) + { + +#pragma omp for schedule(static) + for (v = 0; v < NLVer; v++) + { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; + fflush(stdout); +#endif + // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight); + // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + } + } +}