From b2230a6d6d87e7098c59655f435ef2a6c4319751 Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Mon, 13 Jun 2022 16:09:00 -0500 Subject: [PATCH 1/3] Improved critical region U --- ...istEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index dee2a019..966b86a2 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -841,9 +841,13 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } //End of If( v != Mate[u] ) } //End of critical region } //End of Else //A Ghost Vertex + } //End of For Loop adj(u) + } //End of if ( (u >= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + //Avoid to ask for the critical section if there is nothing to add + if(privateU.empty()) continue; #pragma omp critical(U) { while(!privateU.empty()) { @@ -852,7 +856,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( myCard += privateMyCard; } //End of critical U - } //End of while ( /*!Q.empty()*/ !U.empty() ) #pragma omp critical(privateMsg) From bf35c1659b4f8aa7b9ebd635a6d39fe19f37654f Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Mon, 13 Jun 2022 16:53:12 -0500 Subject: [PATCH 2/3] Further improved critical region U --- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 38 +++++++++++++++---- samples/advanced/pdegen/runs/amg_pde3d.inp | 2 +- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 966b86a2..82ca4c44 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -72,6 +72,8 @@ Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) */ +#define UCHUNK 1000 + #ifdef SERIAL_MPI #else //MPI type map @@ -658,23 +660,41 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt localVertices = 0; #endif - while( true ) - { + //TODO what would be the optimal UCHUNK + vector Us; + Us.reserve(UCHUNK); + + while( true ) { + Us.clear(); #pragma omp critical(U) { - if (U.empty()) isEmpty = true; - else u = U.pop_front(); + //If U is emptu and there are no new node to add to U + if (U.empty() && privateU.empty()) + isEmpty = true; + else { + if (U.empty() && !privateU.empty()) // If U is empty but there are nodes in private U + while (!privateU.empty()) { + U.push_back(privateU.pop_front()); + myCard += privateMyCard; + } + for (int i = 0; i < UCHUNK; i++) { // Pop the new nodes + if (U.empty()) break; + Us.push_back(U.pop_front()); + } + } } // End of critical U if (isEmpty) break; + for (MilanLongInt u : Us) + { #ifdef PRINT_DEBUG_INFO_ cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only the Local Vertices + if ((u >= StartIndex) && (u <= EndIndex)) { //Process Only the Local Vertices #ifdef COUNT_LOCAL_VERTEX - localVertices ++; + localVertices ++; #endif //Get the Adjacency list for u @@ -847,15 +867,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( } //End of if ( (u >= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex //Avoid to ask for the critical section if there is nothing to add - if(privateU.empty()) continue; + if (privateU.size() < UCHUNK && !U.empty()) continue; #pragma omp critical(U) { - while(!privateU.empty()) { + while (!privateU.empty()) { U.push_back(privateU.pop_front()); } myCard += privateMyCard; } //End of critical U + + } } //End of while ( /*!Q.empty()*/ !U.empty() ) #pragma omp critical(privateMsg) diff --git a/samples/advanced/pdegen/runs/amg_pde3d.inp b/samples/advanced/pdegen/runs/amg_pde3d.inp index bdacc992..eb254780 100644 --- a/samples/advanced/pdegen/runs/amg_pde3d.inp +++ b/samples/advanced/pdegen/runs/amg_pde3d.inp @@ -1,6 +1,6 @@ %%%%%%%%%%% General arguments % Lines starting with % are ignored. CSR ! Storage format CSR COO JAD -0123 ! IDIM; domain size. Linear system size is IDIM**3 +0080 ! IDIM; domain size. Linear system size is IDIM**3 CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES 2 ! ISTOPC From 6fd571ecb2d30a037c668019553e707664e8270e Mon Sep 17 00:00:00 2001 From: StefanoPetrilli Date: Tue, 14 Jun 2022 14:33:31 -0500 Subject: [PATCH 3/3] Lock error --- ...mEdgesLinearSearchMesgBndlSmallMateCMP.cpp | 48 +++++++++++++++---- samples/advanced/pdegen/runs/amg_pde3d.inp | 2 +- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index 82ca4c44..c9568a9f 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -225,6 +225,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanInt BufferSize; MilanLongInt *Buffer; bool isEmpty; + + //Declare the locks + omp_lock_t MateLock[NLVer]; #ifdef TIME_TRACKER double Ghost2LocalInitialization = MPI_Wtime(); #endif @@ -232,8 +235,14 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #pragma omp parallel private(insertMe, k, u, w, v, k1, adj1, adj2, adj11, adj12, heaviestEdgeWt, ghostOwner, privateU, privateMyCard, isEmpty, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) { - // TODO comments about the reduction + //Initialize the locks + //TODO this can be executed as task in parallel with other unparallelizable tasks + //TODO destroy the locks +#pragma omp for schedule(static) + for(int i = 0; i < NLVer; i++) + omp_init_lock(&MateLock[i]); + // TODO comments about the reduction #pragma omp for reduction(+ : numGhostEdges) for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice insertMe = verLocInd[i]; @@ -704,7 +713,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( v = verLocInd[k]; if ((v >= StartIndex) && (v <= EndIndex)) { //If Local Vertex: -#pragma omp critical +#pragma omp critical(innerProcessMatched) { #ifdef PRINT_DEBUG_INFO_ @@ -712,11 +721,14 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( fflush(stdout); #endif + //If the current vertex is pointing to a matched vertex and is not matched //FIXME is there a way to make candidateMate private? // for the moment it could generate an error. if (not isAlreadyMatched(v, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap) and candidateMate[v - StartIndex] == u) { + + //Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) //Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) w = computeCandidateMate(verLocPtr[v - StartIndex], @@ -737,6 +749,17 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( #endif //If found a dominating edge: if (w >= 0) { + + //TODO is it possible to lock without a critical region? + //TODO there must be a more elegant and efficient way to do this + while(true) { + if (omp_test_lock(&MateLock[v - StartIndex])) { + if (omp_test_lock(&MateLock[w - StartIndex])) break; + else omp_unset_lock(&MateLock[v - StartIndex]); + } + } + + if ((w < StartIndex) || (w > EndIndex)) { //A ghost #ifdef PRINT_DEBUG_INFO_ cout<<"\n("<=0) else { adj11 = verLocPtr[v - StartIndex]; @@ -798,11 +825,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( for (k1 = adj11; k1 < adj12; k1++) { w = verLocInd[k1]; if ((w < StartIndex) || (w > EndIndex)) { //A ghost - //Build the Message Packet: - //Message[0] = v; //LOCAL - //Message[1] = w; //GHOST - //Message[2] = FAILURE; //TYPE - //Send a Request (Asynchronous) + #ifdef PRINT_DEBUG_INFO_ cout<<"\n("<= StartIndex) && (v <= EndIndex) ) //If Local Vertex: else { //Neighbor is a ghost vertex -#pragma omp critical +#pragma omp critical(innerProcessMatched) { + + while(!omp_test_lock(&MateLock[u - StartIndex])); + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) candidateMate[NLVer + Ghost2LocalMap[v]] = -1; if (v != Mate[u - StartIndex]) { //u is local @@ -859,6 +886,9 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( NumMessagesBundled++; msgInd++; } //End of If( v != Mate[u] ) + + omp_unset_lock(&MateLock[u - StartIndex]); + } //End of critical region } //End of Else //A Ghost Vertex diff --git a/samples/advanced/pdegen/runs/amg_pde3d.inp b/samples/advanced/pdegen/runs/amg_pde3d.inp index eb254780..904b6551 100644 --- a/samples/advanced/pdegen/runs/amg_pde3d.inp +++ b/samples/advanced/pdegen/runs/amg_pde3d.inp @@ -1,6 +1,6 @@ %%%%%%%%%%% General arguments % Lines starting with % are ignored. CSR ! Storage format CSR COO JAD -0080 ! IDIM; domain size. Linear system size is IDIM**3 +020 ! IDIM; domain size. Linear system size is IDIM**3 CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES 2 ! ISTOPC