diff --git a/Make.inc.in b/Make.inc.in index 3638f486..9ac10ee7 100644 --- a/Make.inc.in +++ b/Make.inc.in @@ -75,7 +75,7 @@ CDEFINES=$(AMGCDEFINES) AMGFDEFINES=@AMGFDEFINES@ $(PSBFDEFINES) FDEFINES=$(AMGFDEFINES) -CXXDEFINES=@AMGCXXDEFINES@ +CXXDEFINES=@AMGCXXDEFINES@ $(PSBCXXDEFINES) @COMPILERULES@ diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index 37a879be..a43fb2f5 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -73,8 +73,8 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #endif // Rimosso per tornare al vecchio matching #define OMP -#ifdef OMP - fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); +#ifdef OPENMP + //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, verDistance, Mate, diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 35cab21d..24fd3134 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -59,7 +59,7 @@ #include #include #include -#ifdef OMP +#ifdef OPENMP // OpenMP is included and used if and only if the OpenMP version of the matching // is required #include "omp.h" @@ -178,7 +178,7 @@ extern "C" #define MilanRealMin MINUS_INFINITY #endif -#ifdef OMP +#ifdef OPENMP /* These functions are only used in the experimental OMP implementation, if that is disabled there is no reason to actually compile or reference them. */ @@ -431,7 +431,7 @@ is disabled there is no reason to actually compile or reference them. */ #endif -#ifndef OMP +#ifndef OPENMP //Function of find the owner of a ghost vertex using binary search: inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs); diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp index b086edad..668ed626 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching @@ -126,8 +126,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( fflush(stdout); #endif - MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank - MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank + // The starting vertex owned by the current rank + MilanLongInt StartIndex = verDistance[myRank]; + // The ending vertex owned by the current rank + MilanLongInt EndIndex = verDistance[myRank + 1] - 1; MPI_Status computeStatus; @@ -145,7 +147,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( // only one message will be sent in the initialization phase - // one of: REQUEST/FAILURE/SUCCESS vector QLocalVtx, QGhostVtx, QMsgType; - vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + // Changed by Fabio to be an integer, addresses needs to be integers! + vector QOwner; MilanLongInt *PCounter = new MilanLongInt[numProcs]; for (int i = 0; i < numProcs; i++) @@ -153,7 +156,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NumMessagesBundled = 0; // TODO when the last computational section will be refactored this could be eliminated - MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers! + // Changed by Fabio to be an integer, addresses needs to be integers! + MilanInt ghostOwner = 0; MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; @@ -168,9 +172,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt myCard = 0; // Build the Ghost Vertex Set: Vg - map Ghost2LocalMap; // Map each ghost vertex to a local vertex - vector Counter; // Store the edge count for each ghost vertex - MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices + // Map each ghost vertex to a local vertex + map Ghost2LocalMap; + // Store the edge count for each ghost vertex + vector Counter; + // Number of Ghost vertices + MilanLongInt numGhostVertices = 0, numGhostEdges = 0; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; diff --git a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 index 53892ebc..24720675 100644 --- a/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_c_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_c_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 index b250e434..57ed8893 100644 --- a/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_c_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_c_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 index 7b95e5cd..200d630c 100644 --- a/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_d_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1_psb_lpk_+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 index ef1c992e..e2b7ea0c 100644 --- a/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_d_soc2_map_bld.F90 @@ -309,8 +309,9 @@ subroutine amg_d_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk - if (itmp < (bnds(kk)-1_psb_lpk_+locnaggr(kk))) then + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk + if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) !$omp end atomic diff --git a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 index 857c6ff3..0f8bb7dd 100644 --- a/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_s_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_s_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 index ef7f5707..99047468 100644 --- a/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_s_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_s_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 index 50fe70a2..7961921a 100644 --- a/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_z_soc1_map_bld.F90 @@ -275,7 +275,8 @@ subroutine amg_z_soc1_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in disjoint = all(ilaggr(icol(1:ip)) == -(nr+1)).or.(ip==0) if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 b/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 index c6ac226e..35d02fd0 100644 --- a/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 +++ b/amgprec/impl/aggregator/amg_z_soc2_map_bld.F90 @@ -309,7 +309,8 @@ subroutine amg_z_soc2_map_bld(iorder,theta,clean_zeros,a,desc_a,nlaggr,ilaggr,in ! if (disjoint) then locnaggr(kk) = locnaggr(kk) + 1 - itmp = (bnds(kk)-1+locnaggr(kk))*nths+kk + itmp = (bnds(kk)-1+locnaggr(kk)) !be careful about overflow + itmp = itmp*nths+kk if (itmp < (bnds(kk)-1+locnaggr(kk))) then !$omp atomic update info = max(12345678,info) diff --git a/amgprec/impl/aggregator/clean.cpp b/amgprec/impl/aggregator/clean.cpp index 018469e4..479dcce3 100644 --- a/amgprec/impl/aggregator/clean.cpp +++ b/amgprec/impl/aggregator/clean.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP // TODO comment void clean(MilanLongInt NLVer, diff --git a/amgprec/impl/aggregator/computeCandidateMate.cpp b/amgprec/impl/aggregator/computeCandidateMate.cpp index 39ce8db1..f70b8866 100644 --- a/amgprec/impl/aggregator/computeCandidateMate.cpp +++ b/amgprec/impl/aggregator/computeCandidateMate.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /** * Execute the research fr the Candidate Mate without controlling if the vertices are already matched. * Returns the vertices with the highest weight diff --git a/amgprec/impl/aggregator/extractUChunk.cpp b/amgprec/impl/aggregator/extractUChunk.cpp index 0986dfb6..4e50a4f3 100644 --- a/amgprec/impl/aggregator/extractUChunk.cpp +++ b/amgprec/impl/aggregator/extractUChunk.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void extractUChunk( vector &UChunkBeingProcessed, vector &U, diff --git a/amgprec/impl/aggregator/findOwnerOfGhost.cpp b/amgprec/impl/aggregator/findOwnerOfGhost.cpp index 81c18822..2723a7a3 100644 --- a/amgprec/impl/aggregator/findOwnerOfGhost.cpp +++ b/amgprec/impl/aggregator/findOwnerOfGhost.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /// Find the owner of a ghost node: MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs) diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 3f0f1a10..2c8f052d 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt *numGhostEdges, diff --git a/amgprec/impl/aggregator/isAlreadyMatched.cpp b/amgprec/impl/aggregator/isAlreadyMatched.cpp index de5f2f18..16d47a14 100644 --- a/amgprec/impl/aggregator/isAlreadyMatched.cpp +++ b/amgprec/impl/aggregator/isAlreadyMatched.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP /** * //TODO documentation * @param k diff --git a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp index f5429bf4..79f253eb 100644 --- a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp +++ b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, diff --git a/amgprec/impl/aggregator/processCrossEdge.cpp b/amgprec/impl/aggregator/processCrossEdge.cpp index d7c72d42..45cddb44 100644 --- a/amgprec/impl/aggregator/processCrossEdge.cpp +++ b/amgprec/impl/aggregator/processCrossEdge.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PROCESS_CROSS_EDGE(MilanLongInt *edge, MilanLongInt *S) { diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index c7ac4703..f869d69c 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, @@ -113,32 +113,35 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, } // End of if(w >=0) else { - // This piece of code is executed a really small amount of times - adj11 = verLocPtr[v]; - adj12 = verLocPtr[v + 1]; - for (k1 = adj11; k1 < adj12; k1++) { - w = verLocInd[k1]; - if ((w < StartIndex) || (w > EndIndex)) { // A ghost - +#pragma omp critical(adjuse) + { + // This piece of code is executed a really small number of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + #ifdef PRINT_DEBUG_INFO_ - cout << "\n(" << myRank << ")Sending a failure message: "; - cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); - fflush(stdout); + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif - (*msgInd)++; - (*NumMessagesBundled)++; - ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); - // assert(ghostOwner != -1); - // assert(ghostOwner != myRank); - PCounter[ghostOwner]++; - - privateQLocalVtx.push_back(v + StartIndex); - privateQGhostVtx.push_back(w); - privateQMsgType.push_back(FAILURE); - privateQOwner.push_back(ghostOwner); - - } // End of if(GHOST) - } // End of for loop + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + } } // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index d88199a6..77ec34bb 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void processMatchedVertices( MilanLongInt NLVer, vector &UChunkBeingProcessed, diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index 4a9cfcba..e02dd9c7 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP //#define DEBUG_HANG_ void processMatchedVerticesAndSendMessages( MilanLongInt NLVer, diff --git a/amgprec/impl/aggregator/processMessages.cpp b/amgprec/impl/aggregator/processMessages.cpp index 6ac3f541..dc09cde1 100644 --- a/amgprec/impl/aggregator/processMessages.cpp +++ b/amgprec/impl/aggregator/processMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP //#define DEBUG_HANG_ void processMessages( diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index e51095da..6171b86d 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void queuesTransfer(vector &U, vector &privateU, vector &QLocalVtx, @@ -17,8 +17,6 @@ void queuesTransfer(vector &U, U.insert(U.end(), privateU.begin(), privateU.end()); } - privateU.clear(); - #pragma omp critical(sendMessageTransfer) { @@ -28,6 +26,7 @@ void queuesTransfer(vector &U, QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); } + privateU.clear(); privateQLocalVtx.clear(); privateQGhostVtx.clear(); privateQMsgType.clear(); diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index 919dc7e9..3349ce86 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -1,5 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OMP +#ifdef OPENMP void sendBundledMessages(MilanLongInt *numGhostEdges, MilanInt *BufferSize, MilanLongInt *Buffer,