diff --git a/amgprec/amg_d_matchboxp_mod.f90 b/amgprec/amg_d_matchboxp_mod.f90 index e19ce617..5e0151ec 100644 --- a/amgprec/amg_d_matchboxp_mod.f90 +++ b/amgprec/amg_d_matchboxp_mod.f90 @@ -724,6 +724,7 @@ contains & vnl, mate, iam, np,ictxt,& & msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp) if (do_timings) call psb_toc(idx_cmboxp) + if (iam==0) write(0,*) iam,' buildmatching from PMatchBox:', info,ph0t,ph1t,ph2t if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info if (debug_sync) then call psb_max(ictxt,info) diff --git a/amgprec/amg_s_matchboxp_mod.f90 b/amgprec/amg_s_matchboxp_mod.f90 index a7f41c24..04194836 100644 --- a/amgprec/amg_s_matchboxp_mod.f90 +++ b/amgprec/amg_s_matchboxp_mod.f90 @@ -272,9 +272,7 @@ contains write(0,*) 'Impossible: mate(k) > nc' cycle else - if (ilaggr(k) == ilaggr_neginit) then - wk = w(k) widx = w(idx) wmax = max(abs(wk),abs(widx)) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index aa2658ea..c49ce8d4 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -68,11 +68,12 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #define TIME_TRACKER - #ifdef TIME_TRACKER - double tmr = MPI_Wtime(); - #endif +#ifdef TIME_TRACKER + double tmr = MPI_Wtime(); +#endif -#if defined(OPENMP) +#if 1 + // defined(OPENMP) //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, @@ -92,11 +93,11 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, #endif - #ifdef TIME_TRACKER - tmr = MPI_Wtime() - tmr; - fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer); - #endif - +#ifdef TIME_TRACKER + tmr = MPI_Wtime() - tmr; + fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer); +#endif + #endif } @@ -114,13 +115,25 @@ void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); #endif +#if 1 + // defined(OPENMP) + //fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); + salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +#else salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, - verLocPtr, verLocInd, edgeLocWeight, - verDistance, Mate, - myRank, numProcs, C_comm, - msgIndSent, msgActualSent, msgPercent, - ph0_time, ph1_time, ph2_time, - ph1_card, ph2_card ); + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +#endif #endif } diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 24fd3134..f1a7245b 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -178,264 +178,416 @@ extern "C" #define MilanRealMin MINUS_INFINITY #endif -#ifdef OPENMP /* These functions are only used in the experimental OMP implementation, if that is disabled there is no reason to actually compile or reference them. */ - // Function of find the owner of a ghost vertex using binary search: - MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, - MilanInt myRank, MilanInt numProcs); - - MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, - MilanLongInt adj2, - MilanLongInt *verLocInd, - MilanReal *edgeLocWeight); - - void queuesTransfer(vector &U, - vector &privateU, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - bool isAlreadyMatched(MilanLongInt node, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap); - - MilanLongInt computeCandidateMate(MilanLongInt adj1, - MilanLongInt adj2, - MilanReal *edgeLocWeight, - MilanLongInt k, - MilanLongInt *verLocInd, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap); - - void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt StartIndex, MilanLongInt EndIndex, - MilanLongInt *numGhostEdgesPtr, - MilanLongInt *numGhostVerticesPtr, - MilanLongInt *S, - MilanLongInt *verLocInd, - MilanLongInt *verLocPtr, - map &Ghost2LocalMap, - vector &Counter, - vector &verGhostPtr, - vector &verGhostInd, - vector &tempCounter, - vector &GMate, - vector &Message, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - MilanLongInt *&candidateMate, - vector &U, - vector &privateU, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void clean(MilanLongInt NLVer, - MilanInt myRank, - MilanLongInt MessageIndex, - vector &SRequest, - vector &SStatus, - MilanInt BufferSize, - MilanLongInt *Buffer, - MilanLongInt msgActual, - MilanLongInt *msgActualSent, - MilanLongInt msgInd, - MilanLongInt *msgIndSent, - MilanLongInt NumMessagesBundled, - MilanReal *msgPercent); - - void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanInt myRank, - MilanReal *edgeLocWeight, - MilanLongInt *candidateMate); - - void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, - MilanLongInt *candidateMate, - MilanLongInt *verLocInd, - MilanLongInt *verLocPtr, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *Mate, - vector &GMate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - vector &U, - vector &privateU, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void PROCESS_CROSS_EDGE(MilanLongInt *edge, - MilanLongInt *SPtr); - - void processMatchedVertices( - MilanLongInt NLVer, - vector &UChunkBeingProcessed, - vector &U, - vector &privateU, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - MilanLongInt *candidateMate, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner); - - void processMatchedVerticesAndSendMessages( - MilanLongInt NLVer, - vector &UChunkBeingProcessed, - vector &U, - vector &privateU, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *NumMessagesBundledPtr, - MilanLongInt *SPtr, - MilanLongInt *verLocPtr, - MilanLongInt *verLocInd, - MilanLongInt *verDistance, - MilanLongInt *PCounter, - vector &Counter, - MilanInt myRank, - MilanInt numProcs, - MilanLongInt *candidateMate, - vector &GMate, - MilanLongInt *Mate, - map &Ghost2LocalMap, - MilanReal *edgeLocWeight, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &privateQLocalVtx, - vector &privateQGhostVtx, - vector &privateQMsgType, - vector &privateQOwner, - MPI_Comm comm, - MilanLongInt *msgActual, - vector &Message); - - void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, - MilanInt *BufferSizePtr, - MilanLongInt *Buffer, - vector &PCumulative, - vector &PMessageBundle, - vector &PSizeInfoMessages, - MilanLongInt *PCounter, - MilanLongInt NumMessagesBundled, - MilanLongInt *msgActualPtr, - MilanLongInt *MessageIndexPtr, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &SRequest, - vector &SStatus); - - void processMessages( - MilanLongInt NLVer, - MilanLongInt *Mate, - MilanLongInt *candidateMate, - map &Ghost2LocalMap, - vector &GMate, - vector &Counter, - MilanLongInt StartIndex, - MilanLongInt EndIndex, - MilanLongInt *myCardPtr, - MilanLongInt *msgIndPtr, - MilanLongInt *msgActualPtr, - MilanReal *edgeLocWeight, - MilanLongInt *verDistance, - MilanLongInt *verLocPtr, - MilanLongInt k, - MilanLongInt *verLocInd, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &Message, - MilanLongInt numGhostEdges, - MilanLongInt u, - MilanLongInt v, - MilanLongInt *SPtr, - vector &U); - - void extractUChunk( - vector &UChunkBeingProcessed, - vector &U, - vector &privateU); - - void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( - MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, - MilanLongInt *verDistance, - MilanLongInt *Mate, - MilanInt myRank, MilanInt numProcs, MPI_Comm comm, - MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, - MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, - MilanLongInt *ph1_card, MilanLongInt *ph2_card); -#endif + // Function of find the owner of a ghost vertex using binary search: + MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, + MilanInt myRank, MilanInt numProcs); + + MilanLongInt firstComputeCandidateMateD(MilanLongInt adj1, + MilanLongInt adj2, + MilanLongInt *verLocInd, + MilanReal *edgeLocWeight); + + + void queuesTransfer(vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + bool isAlreadyMatched(MilanLongInt node, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + MilanLongInt computeCandidateMateD(MilanLongInt adj1, + MilanLongInt adj2, + MilanReal *edgeLocWeight, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt StartIndex, MilanLongInt EndIndex, + MilanLongInt *numGhostEdgesPtr, + MilanLongInt *numGhostVerticesPtr, + MilanLongInt *S, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + map &Ghost2LocalMap, + vector &Counter, + vector &verGhostPtr, + vector &verGhostInd, + vector &tempCounter, + vector &GMate, + vector &Message, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + MilanLongInt *&candidateMate, + vector &U, + vector &privateU, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void clean(MilanLongInt NLVer, + MilanInt myRank, + MilanLongInt MessageIndex, + vector &SRequest, + vector &SStatus, + MilanInt BufferSize, + MilanLongInt *Buffer, + MilanLongInt msgActual, + MilanLongInt *msgActualSent, + MilanLongInt msgInd, + MilanLongInt *msgIndSent, + MilanLongInt NumMessagesBundled, + MilanReal *msgPercent); + + void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanReal *edgeLocWeight, + MilanLongInt *candidateMate); + + void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer, + MilanLongInt *candidateMate, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *Mate, + vector &GMate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void PROCESS_CROSS_EDGE(MilanLongInt *edge, + MilanLongInt *SPtr); + + void processMatchedVerticesD( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void processMatchedVerticesAndSendMessagesD( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanReal *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message); + + void sendBundledMessages(MilanLongInt *numGhostEdgesPtr, + MilanInt *BufferSizePtr, + MilanLongInt *Buffer, + vector &PCumulative, + vector &PMessageBundle, + vector &PSizeInfoMessages, + MilanLongInt *PCounter, + MilanLongInt NumMessagesBundled, + MilanLongInt *msgActualPtr, + MilanLongInt *MessageIndexPtr, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &SRequest, + vector &SStatus); + + void processMessagesD( + MilanLongInt NLVer, + MilanLongInt *Mate, + MilanLongInt *candidateMate, + map &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *msgActualPtr, + MilanReal *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *SPtr, + vector &U); + + void extractUChunk( + vector &UChunkBeingProcessed, + vector &U, + vector &privateU); + + MilanLongInt firstComputeCandidateMateS(MilanLongInt adj1, + MilanLongInt adj2, + MilanLongInt *verLocInd, + MilanFloat *edgeLocWeight); + + MilanLongInt computeCandidateMateS(MilanLongInt adj1, + MilanLongInt adj2, + MilanFloat *edgeLocWeight, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap); + + void PARALLEL_COMPUTE_CANDIDATE_MATE_BS(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanFloat *edgeLocWeight, + MilanLongInt *candidateMate); + + void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer, + MilanLongInt *candidateMate, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *Mate, + vector &GMate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + void processMatchedVerticesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner); + + void processMatchedVerticesAndSendMessagesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *NumMessagesBundledPtr, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message); + + void processMessagesS( + MilanLongInt NLVer, + MilanLongInt *Mate, + MilanLongInt *candidateMate, + map &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCardPtr, + MilanLongInt *msgIndPtr, + MilanLongInt *msgActualPtr, + MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *SPtr, + vector &U); + + + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, + MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, + MilanLongInt *ph1_card, MilanLongInt *ph2_card); + + + void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, + MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, + MilanLongInt *ph1_card, MilanLongInt *ph2_card); -#ifndef OPENMP - //Function of find the owner of a ghost vertex using binary search: - inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, - MilanInt myRank, MilanInt numProcs); -#endif void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( MilanLongInt NLVer, MilanLongInt NLEdge, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp index f03f726f..6ae18ebb 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp @@ -1303,16 +1303,16 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( // SINGLE PRECISION VERSION void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( - MilanLongInt NLVer, MilanLongInt NLEdge, - MilanLongInt* verLocPtr, MilanLongInt* verLocInd, - MilanFloat* edgeLocWeight, - MilanLongInt* verDistance, - MilanLongInt* Mate, - MilanInt myRank, MilanInt numProcs, MPI_Comm comm, - MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, - MilanReal* msgPercent, - MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, - MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, + MilanFloat* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, + MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { #if !defined(SERIAL_MPI) #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); - processMatchedVertices(NLVer, + processMatchedVerticesD(NLVer, UChunkBeingProcessed, U, privateU, @@ -430,7 +429,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - processMatchedVerticesAndSendMessages(NLVer, + processMatchedVerticesAndSendMessagesD(NLVer, UChunkBeingProcessed, U, privateU, @@ -490,8 +489,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// - - processMessages(NLVer, + //startTime = MPI_Wtime(); + processMessagesD(NLVer, Mate, candidateMate, Ghost2LocalMap, @@ -556,6 +555,488 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( *ph2_card = myCard; // Cardinality at the end of Phase-2 } // End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate + +void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt *verLocPtr, MilanLongInt *verLocInd, + MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, + MilanReal *msgPercent, + MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, + MilanLongInt *ph1_card, MilanLongInt *ph2_card) +{ + + /* + * verDistance: it's a vector long as the number of processors. + * verDistance[i] contains the first node index of the i-th processor + * verDistance[i + 1] contains the last node index of the i-th processor + * NLVer: number of elements in the LocPtr + * NLEdge: number of edges assigned to the current processor + * + * Contains the portion of matrix assigned to the processor in + * Yale notation + * verLocInd: contains the positions on row of the matrix + * verLocPtr: i-th value is the position of the first element on the i-th row and + * i+1-th value is the position of the first element on the i+1-th row + */ + +#if !defined(SERIAL_MPI) +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; + fflush(stdout); +#endif + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ") verDistance [" ; + for (int i = 0; i < numProcs; i++) + cout << verDistance[i] << "," << verDistance[i+1]; + cout << "]\n"; + fflush(stdout); +#endif +#ifdef DEBUG_HANG_ + if (myRank == 0) { + cout << "\n(" << myRank << ") verDistance [" ; + for (int i = 0; i < numProcs; i++) + cout << verDistance[i] << "," ; + cout << verDistance[numProcs]<< "]\n"; + } + fflush(stdout); +#endif + + // The starting vertex owned by the current rank + MilanLongInt StartIndex = verDistance[myRank]; + // The ending vertex owned by the current rank + MilanLongInt EndIndex = verDistance[myRank + 1] - 1; + + MPI_Status computeStatus; + + MilanLongInt msgActual = 0, msgInd = 0; + MilanFloat heaviestEdgeWt = 0.0f; // Assumes positive weight + MilanReal startTime, finishTime; + + startTime = MPI_Wtime(); + + // Data structures for sending and receiving messages: + vector Message; // [ u, v, message_type ] + Message.resize(3, -1); + // Data structures for Message Bundling: + // Although up to two messages can be sent along any cross edge, + // only one message will be sent in the initialization phase - + // one of: REQUEST/FAILURE/SUCCESS + vector QLocalVtx, QGhostVtx, QMsgType; + // Changed by Fabio to be an integer, addresses needs to be integers! + vector QOwner; + + MilanLongInt *PCounter = new MilanLongInt[numProcs]; + for (int i = 0; i < numProcs; i++) + PCounter[i] = 0; + + MilanLongInt NumMessagesBundled = 0; + // TODO when the last computational section will be refactored this could be eliminated + // Changed by Fabio to be an integer, addresses needs to be integers! + MilanInt ghostOwner = 0; + MilanLongInt *candidateMate = nullptr; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; + fflush(stdout); + cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex; + fflush(stdout); +#endif + // Other Variables: + MilanLongInt u = -1, v = -1, w = -1, i = 0; + MilanLongInt k = -1, adj1 = -1, adj2 = -1; + MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; + MilanLongInt myCard = 0; + + // Build the Ghost Vertex Set: Vg + // Map each ghost vertex to a local vertex + map Ghost2LocalMap; + // Store the edge count for each ghost vertex + vector Counter; + // Number of Ghost vertices + MilanLongInt numGhostVertices = 0, numGhostEdges = 0; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; + fflush(stdout); +#endif +#ifdef DEBUG_HANG_ + if (myRank == 0) + cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; + fflush(stdout); +#endif + + // Define Adjacency Lists for Ghost Vertices: + // cout<<"Building Ghost data structures ... \n\n"; + vector verGhostPtr, verGhostInd, tempCounter; + // Mate array for ghost vertices: + vector GMate; // Proportional to the number of ghost vertices + MilanLongInt S; + MilanLongInt privateMyCard = 0; + vector PCumulative, PMessageBundle, PSizeInfoMessages; + vector SRequest; // Requests that are used for each send message + vector SStatus; // Status of sent messages, used in MPI_Wait + MilanLongInt MessageIndex = 0; // Pointer for current message + MilanInt BufferSize; + MilanLongInt *Buffer; + + vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; + vector privateQOwner; + vector U, privateU; + + + initialize(NLVer, NLEdge, StartIndex, + EndIndex, &numGhostEdges, + &numGhostVertices, &S, + verLocInd, verLocPtr, + Ghost2LocalMap, Counter, + verGhostPtr, verGhostInd, + tempCounter, GMate, + Message, QLocalVtx, + QGhostVtx, QMsgType, QOwner, + candidateMate, U, + privateU, + privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + finishTime = MPI_Wtime(); + *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization +#ifdef DEBUG_HANG_ + cout << myRank << " Finished initialization" << endl; + fflush(stdout); +#endif + + startTime = MPI_Wtime(); + + ///////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////// INITIALIZATION ///////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////////////// + // Compute the Initial Matching Set: + + /* + * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from + * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize + * the two. + * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. + */ + + PARALLEL_COMPUTE_CANDIDATE_MATE_BS(NLVer, + verLocPtr, + verLocInd, + myRank, + edgeLocWeight, + candidateMate); + +#ifdef DEBUG_HANG_ + cout << myRank << " Finished Exposed Vertex" << endl; + fflush(stdout); +#if 0 + cout << myRank << " candidateMate after parallelCompute " < UChunkBeingProcessed; + UChunkBeingProcessed.reserve(UCHUNK); + + processMatchedVerticesS(NLVer, + UChunkBeingProcessed, + U, + privateU, + StartIndex, + EndIndex, + &myCard, + &msgInd, + &NumMessagesBundled, + &S, + verLocPtr, + verLocInd, + verDistance, + PCounter, + Counter, + myRank, + numProcs, + candidateMate, + GMate, + Mate, + Ghost2LocalMap, + edgeLocWeight, + QLocalVtx, + QGhostVtx, + QMsgType, + QOwner, + privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + +#ifdef DEBUG_HANG_ + cout << myRank << " Finished Process Vertices" << endl; + fflush(stdout); +#if 0 + cout << myRank << " Mate after Matched Vertices " < heaviestEdgeWt) || + ((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) { + heaviestEdgeWt = edgeLocWeight[k]; + w = verLocInd[k]; + finalK = k; + } + } // End of for loop + return finalK; +} + +/** + * //TODO documentation + * @param adj1 + * @param adj2 + * @param edgeLocWeight + * @param k + * @param verLocInd + * @param StartIndex + * @param EndIndex + * @param GMate + * @param Mate + * @param Ghost2LocalMap + * @return + */ +MilanLongInt computeCandidateMateS(MilanLongInt adj1, + MilanLongInt adj2, + MilanFloat *edgeLocWeight, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap) +{ + // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + + MilanInt w = -1; + MilanFloat heaviestEdgeWt = 0.0f; // Assign the smallest Value + for (k = adj1; k < adj2; k++) { + if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) + continue; + + if ((edgeLocWeight[k] > heaviestEdgeWt) || + ((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) { + heaviestEdgeWt = edgeLocWeight[k]; + w = verLocInd[k]; + } + } // End of for loop + // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + + return w; +} diff --git a/amgprec/impl/aggregator/extractUChunk.cpp b/amgprec/impl/aggregator/extractUChunk.cpp index 4e50a4f3..9f5bdfe2 100644 --- a/amgprec/impl/aggregator/extractUChunk.cpp +++ b/amgprec/impl/aggregator/extractUChunk.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void extractUChunk( vector &UChunkBeingProcessed, vector &U, @@ -29,4 +28,3 @@ void extractUChunk( } // End of critical U // End of critical U } -#endif diff --git a/amgprec/impl/aggregator/findOwnerOfGhost.cpp b/amgprec/impl/aggregator/findOwnerOfGhost.cpp index 2723a7a3..779a5e7f 100644 --- a/amgprec/impl/aggregator/findOwnerOfGhost.cpp +++ b/amgprec/impl/aggregator/findOwnerOfGhost.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP /// Find the owner of a ghost node: MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs) @@ -27,4 +26,3 @@ MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, return Current; } // End of findOwnerOfGhost() -#endif diff --git a/amgprec/impl/aggregator/initialize.cpp b/amgprec/impl/aggregator/initialize.cpp index 2c8f052d..baac9e8c 100644 --- a/amgprec/impl/aggregator/initialize.cpp +++ b/amgprec/impl/aggregator/initialize.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt *numGhostEdges, @@ -302,4 +301,3 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, } // End of single region } // End of parallel region } -#endif diff --git a/amgprec/impl/aggregator/isAlreadyMatched.cpp b/amgprec/impl/aggregator/isAlreadyMatched.cpp index 16d47a14..62cdca4b 100644 --- a/amgprec/impl/aggregator/isAlreadyMatched.cpp +++ b/amgprec/impl/aggregator/isAlreadyMatched.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP /** * //TODO documentation * @param k @@ -44,4 +43,3 @@ bool isAlreadyMatched(MilanLongInt node, return val >= 0; // Already matched } -#endif diff --git a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp index 79f253eb..8563c1ff 100644 --- a/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp +++ b/amgprec/impl/aggregator/parallelComputeCandidateMateB.cpp @@ -1,6 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OPENMP -void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, +void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanInt myRank, @@ -20,9 +19,36 @@ void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, fflush(stdout); #endif // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) - candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight); + candidateMate[v] = firstComputeCandidateMateD(verLocPtr[v], verLocPtr[v + 1], + verLocInd, edgeLocWeight); // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) } } } + +void PARALLEL_COMPUTE_CANDIDATE_MATE_BS(MilanLongInt NLVer, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanInt myRank, + MilanFloat *edgeLocWeight, + MilanLongInt *candidateMate) +{ + + MilanLongInt v = -1; + +#pragma omp parallel private(v) default(shared) num_threads(NUM_THREAD) + { + +#pragma omp for schedule(static) + for (v = 0; v < NLVer; v++) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; + fflush(stdout); #endif + // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + candidateMate[v] = firstComputeCandidateMateS(verLocPtr[v], verLocPtr[v + 1], + verLocInd, edgeLocWeight); + // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) + } + } +} diff --git a/amgprec/impl/aggregator/processCrossEdge.cpp b/amgprec/impl/aggregator/processCrossEdge.cpp index 45cddb44..d9d557a6 100644 --- a/amgprec/impl/aggregator/processCrossEdge.cpp +++ b/amgprec/impl/aggregator/processCrossEdge.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void PROCESS_CROSS_EDGE(MilanLongInt *edge, MilanLongInt *S) { @@ -22,4 +21,3 @@ void PROCESS_CROSS_EDGE(MilanLongInt *edge, // End: PARALLEL_PROCESS_CROSS_EDGE_B } -#endif diff --git a/amgprec/impl/aggregator/processExposedVertex.cpp b/amgprec/impl/aggregator/processExposedVertex.cpp index ecc5cf41..cb3cea65 100644 --- a/amgprec/impl/aggregator/processExposedVertex.cpp +++ b/amgprec/impl/aggregator/processExposedVertex.cpp @@ -1,6 +1,5 @@ -#include "MatchBoxPC.h" -#ifdef OPENMP -void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, +#include "MatchBoxPC.h" +void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer, MilanLongInt *candidateMate, MilanLongInt *verLocInd, MilanLongInt *verLocPtr, @@ -31,15 +30,16 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, vector &privateQOwner) { - MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; + MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanInt ghostOwner = 0, option, igw; - //#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ - firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ - default(shared) num_threads(NUM_THREAD) +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner) \ + default(shared) num_threads(NUM_THREAD) { - //#pragma omp for reduction(+ \ + #pragma omp for reduction(+ \ : PCounter[:numProcs], myCard \ [:1], msgInd \ [:1], NumMessagesBundled \ @@ -66,7 +66,7 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, #pragma omp critical(Matching) { if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { - w = computeCandidateMate(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, + w = computeCandidateMateD(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); candidateMate[v] = w; @@ -181,4 +181,188 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, } // End of parallel region } + + +void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer, + MilanLongInt *candidateMate, + MilanLongInt *verLocInd, + MilanLongInt *verLocPtr, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *Mate, + vector &GMate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *S, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + vector &U, + vector &privateU, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) +{ + + MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; + MilanInt ghostOwner = 0, option, igw; + +#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner) \ + default(shared) num_threads(NUM_THREAD) + + { +#pragma omp for reduction(+ \ + : PCounter[:numProcs], myCard \ + [:1], msgInd \ + [:1], NumMessagesBundled \ + [:1]) \ + schedule(static) + for (v = 0; v < NLVer; v++) { + option = -1; + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + k = candidateMate[v]; + candidateMate[v] = verLocInd[k]; + w = candidateMate[v]; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl; + fflush(stdout); +#endif + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v + StartIndex << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { +#pragma omp critical(Matching) + { + if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { + w = computeCandidateMateS(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v] = w; + } + if (w >= 0) { + (*myCard)++; + if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex + option = 2; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) { + option = 1; + Mate[v] = w; + GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == (v + StartIndex)) { + option = 3; + Mate[v] = w; // v is local + Mate[w - StartIndex] = v + StartIndex; // w is local +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") "; + fflush(stdout); +#endif + } // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) ) + } // End of Else + } // End of second if + } + + } // End of if(w >=0) + else { + //#pragma omp critical(adjuse) + { + // This piece of code is executed a really small number of times + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); #endif + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + } + } + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + + switch (option) + { + case -1: + break; + case 1: + privateU.push_back(v + StartIndex); + privateU.push_back(w); + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")"; + fflush(stdout); +#endif + + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + case 2: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message (291):"; + cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + (*msgInd)++; + (*NumMessagesBundled)++; + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + + privateQLocalVtx.push_back(v + StartIndex); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + default: + privateU.push_back(v + StartIndex); + privateU.push_back(w); + break; + } + + } // End of for ( v=0; v < NLVer; v++ ) + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + } // End of parallel region +} diff --git a/amgprec/impl/aggregator/processMatchedVertices.cpp b/amgprec/impl/aggregator/processMatchedVertices.cpp index 457e0de8..531c9d32 100644 --- a/amgprec/impl/aggregator/processMatchedVertices.cpp +++ b/amgprec/impl/aggregator/processMatchedVertices.cpp @@ -1,6 +1,5 @@ #include "MatchBoxPC.h" -#ifdef OPENMP -void processMatchedVertices( +void processMatchedVerticesD( MilanLongInt NLVer, vector &UChunkBeingProcessed, vector &U, @@ -98,7 +97,7 @@ void processMatchedVertices( if (candidateMate[v - StartIndex] == u) { // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], + w = computeCandidateMateD(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, @@ -290,4 +289,296 @@ void processMatchedVertices( #endif } // End of parallel region } + + + +void processMatchedVerticesS( + MilanLongInt NLVer, + vector &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner) +{ + + MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; + int option; + MilanLongInt mateVal; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + +#ifdef COUNT_LOCAL_VERTEX + MilanLongInt localVertices = 0; +#endif +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \ + privateQMsgType, privateQOwner, UChunkBeingProcessed) \ + default(shared) num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1]) + { + + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); +#endif + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + +#ifdef COUNT_LOCAL_VERTEX + localVertices++; +#endif + + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); +#endif +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { +#pragma omp critical + { +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { + + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateS(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); +#endif + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + +#pragma omp critical + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); +#pragma omp atomic + PCounter[ghostOwner]++; + (*NumMessagesBundled)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + // assert(ghostOwner != -1); + // assert(ghostOwner != myRank); + + (*NumMessagesBundled)++; + PCounter[ghostOwner]++; + (*msgInd)++; + + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + +#pragma omp critical(U) + { + U.insert(U.end(), privateU.begin(), privateU.end()); + } + +#pragma omp critical(sendMessageTransfer) + { + QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end()); + QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end()); + QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end()); + QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end()); + } + + privateU.clear(); + privateQLocalVtx.clear(); + privateQGhostVtx.clear(); + privateQMsgType.clear(); + privateQOwner.clear(); + + } // End of while ( !U.empty() ) + +#ifdef COUNT_LOCAL_VERTEX + printf("Count local vertexes: %ld for thread %d of processor %d\n", + localVertices, + omp_get_thread_num(), + myRank); + #endif + } // End of parallel region +} diff --git a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp index e75fa8db..d094afaa 100644 --- a/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp +++ b/amgprec/impl/aggregator/processMatchedVerticesAndSendMessages.cpp @@ -1,7 +1,6 @@ #include "MatchBoxPC.h" -#ifdef OPENMP //#define DEBUG_HANG_ -void processMatchedVerticesAndSendMessages( +void processMatchedVerticesAndSendMessagesD( MilanLongInt NLVer, vector &UChunkBeingProcessed, vector &U, @@ -27,7 +26,7 @@ void processMatchedVerticesAndSendMessages( vector &QLocalVtx, vector &QGhostVtx, vector &QMsgType, - vector &QOwner, + vector &QOwner, vector &privateQLocalVtx, vector &privateQGhostVtx, vector &privateQMsgType, @@ -52,6 +51,302 @@ void processMatchedVerticesAndSendMessages( MilanLongInt localVertices = 0; #endif //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ + firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, \ + privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \ + default(shared) \ + num_threads(NUM_THREAD) \ + reduction(+ \ + : msgInd[:1], PCounter \ + [:numProcs], myCard \ + [:1], NumMessagesBundled \ + [:1], msgActual \ + [:1]) + { + + while (!U.empty()) { + + extractUChunk(UChunkBeingProcessed, U, privateU); + + for (MilanLongInt u : UChunkBeingProcessed) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")u: " << u; + fflush(stdout); +#endif + if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices + +#ifdef COUNT_LOCAL_VERTEX + localVertices++; +#endif + // Get the Adjacency list for u + adj1 = verLocPtr[u - StartIndex]; // Pointer + adj2 = verLocPtr[u - StartIndex + 1]; + for (k = adj1; k < adj2; k++) { + option = -1; + v = verLocInd[k]; + + if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v]; + fflush(stdout); +#endif +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { +#pragma omp critical + { +#pragma omp atomic read + mateVal = Mate[v - StartIndex]; + // If the current vertex is pointing to a matched vertex and is not matched + if (mateVal < 0) { + + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateD(verLocPtr[v - StartIndex], + verLocPtr[v - StartIndex + 1], + edgeLocWeight, 0, + verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { + + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message:"; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); +#endif + option = 2; + + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + option = 1; + Mate[v - StartIndex] = w; // v is a local vertex + GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex + + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + option = 3; + Mate[v - StartIndex] = w; // v is a local vertex + Mate[w - StartIndex] = v; // w is a local vertex +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else + option = 4; // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of If (candidateMate[v-StartIndex] == u + } + } // End of task + } // mateval < 0 + } // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { // Neighbor is a ghost vertex + +#pragma omp critical + { + if (candidateMate[NLVer + Ghost2LocalMap[v]] == u) + candidateMate[NLVer + Ghost2LocalMap[v]] = -1; + if (v != Mate[u - StartIndex]) + option = 5; // u is local + } // End of critical + } // End of Else //A Ghost Vertex + + switch (option) + { + case -1: + // No things to do + break; + case 1: + // Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") "; + fflush(stdout); +#endif + // Decrement the counter: + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr); + case 2: + + // Found a dominating edge, it is a ghost + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(REQUEST); + privateQOwner.push_back(ghostOwner); + break; + case 3: + privateU.push_back(v); + privateU.push_back(w); + (*myCard)++; + break; + case 4: + // Could not find a dominating vertex + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { // A ghost +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs); + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + // Build the Message Packet: + // Message[0] = v; // LOCAL + // Message[1] = w; // GHOST + // Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + + (*msgActual)++; + (*msgInd)++; + privateQLocalVtx.push_back(v); + privateQGhostVtx.push_back(w); + privateQMsgType.push_back(FAILURE); + privateQOwner.push_back(ghostOwner); + + } // End of if(GHOST) + } // End of for loop + break; + case 5: + default: + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a success message: "; + cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n"; + fflush(stdout); +#endif + + ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs); + // Build the Message Packet: + // Message[0] = u; // LOCAL + // Message[1] = v; // GHOST + // Message[2] = SUCCESS; // TYPE + // Send a Request (Asynchronous) + // MPI_Bsend(&Message[0], 3, TypeMap(), ghostOwner, ComputeTag, comm); + (*msgActual)++; + (*msgInd)++; + privateQLocalVtx.push_back(u); + privateQGhostVtx.push_back(v); + privateQMsgType.push_back(SUCCESS); + privateQOwner.push_back(ghostOwner); + + break; + } // End of switch + } // End of inner for + } + } // End of outer for + + queuesTransfer(U, privateU, QLocalVtx, + QGhostVtx, + QMsgType, QOwner, privateQLocalVtx, + privateQGhostVtx, + privateQMsgType, + privateQOwner); + + } // End of while ( !U.empty() ) + +#ifdef COUNT_LOCAL_VERTEX + printf("Count local vertexes: %ld for thread %d of processor %d\n", + localVertices, mp_get_thread_num(), myRank); +#endif + } // End of parallel region + + // Send the messages +#ifdef DEBUG_HANG_ + cout << myRank<<" Sending: "<(), ghostOwner, ComputeTag, comm); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + } +#ifdef DEBUG_HANG_ + cout << myRank<<" Done sending messages"< &UChunkBeingProcessed, + vector &U, + vector &privateU, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *NumMessagesBundled, + MilanLongInt *SPtr, + MilanLongInt *verLocPtr, + MilanLongInt *verLocInd, + MilanLongInt *verDistance, + MilanLongInt *PCounter, + vector &Counter, + MilanInt myRank, + MilanInt numProcs, + MilanLongInt *candidateMate, + vector &GMate, + MilanLongInt *Mate, + map &Ghost2LocalMap, + MilanFloat *edgeLocWeight, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &privateQLocalVtx, + vector &privateQGhostVtx, + vector &privateQMsgType, + vector &privateQOwner, + MPI_Comm comm, + MilanLongInt *msgActual, + vector &Message) +{ + + MilanLongInt initialSize = QLocalVtx.size(); + MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner; + int option; + MilanLongInt mateVal; + +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + +#ifdef COUNT_LOCAL_VERTEX + MilanLongInt localVertices = 0; +#endif +#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ num_threads(NUM_THREAD) \ @@ -103,7 +398,7 @@ void processMatchedVerticesAndSendMessages( if (candidateMate[v - StartIndex] == u) { // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) - w = computeCandidateMate(verLocPtr[v - StartIndex], + w = computeCandidateMateS(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, 0, verLocInd, StartIndex, EndIndex, @@ -293,4 +588,3 @@ void processMatchedVerticesAndSendMessages( cout << myRank<<" Done sending messages"< &Ghost2LocalMap, + vector &GMate, + vector &Counter, + MilanLongInt StartIndex, + MilanLongInt EndIndex, + MilanLongInt *myCard, + MilanLongInt *msgInd, + MilanLongInt *msgActual, + MilanFloat *edgeLocWeight, + MilanLongInt *verDistance, + MilanLongInt *verLocPtr, + MilanLongInt k, + MilanLongInt *verLocInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &Message, + MilanLongInt numGhostEdges, + MilanLongInt u, + MilanLongInt v, + MilanLongInt *S, + vector &U) +{ + + //#define PRINT_DEBUG_INFO_ + + MilanInt Sender; + MPI_Status computeStatus; + MilanLongInt bundleSize, w; + MilanLongInt adj11, adj12, k1; + MilanLongInt ghostOwner; + int error_codeC; + error_codeC = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN); + char error_message[MPI_MAX_ERROR_STRING]; + int message_length; + MilanLongInt message_type = 0; + + // Buffer to receive bundled messages + // Maximum messages that can be received from any processor is + // twice the edge cut: REQUEST; REQUEST+(FAILURE/SUCCESS) + vector ReceiveBuffer; + try + { + ReceiveBuffer.reserve(numGhostEdges * 2 * 3); // Three integers per cross edge + } + catch (length_error) + { + cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout << "Not enough memory to allocate the internal variables \n"; + exit(1); + } + +#ifdef PRINT_DEBUG_INFO_ + cout + << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")About to begin Message processing phase ... *S=" << *S << endl; + fflush(stdout); #endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << "=========================************===============================" << endl; + fflush(stdout); + fflush(stdout); +#endif + // BLOCKING RECEIVE: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << " Waiting for blocking receive..." << endl; + fflush(stdout); + fflush(stdout); +#endif + + //cout << myRank<<" Receiving ..."; + error_codeC = MPI_Recv(&Message[0], 3, TypeMap(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS) + { + MPI_Error_string(error_codeC, error_message, &message_length); + cout << "\n*Error in call to MPI_Receive on Slave: " << error_message << "\n"; + fflush(stdout); + } + Sender = computeStatus.MPI_SOURCE; + //cout << " ...from "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n"; + fflush(stdout); + } +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message Bundle After: " << endl; + for (int i = 0; i < bundleSize; i++) + cout << ReceiveBuffer[i] << ","; + cout << endl; + fflush(stdout); +#endif + } else { // Just a single message: +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl; + fflush(stdout); +#endif + // Add the current message to Queue: + bundleSize = 3; //#of integers in the message + // Build the Message Buffer: + if (!ReceiveBuffer.empty()) + ReceiveBuffer.clear(); // Empty it out first + ReceiveBuffer.resize(bundleSize, -1); // Initialize + + ReceiveBuffer[0] = Message[0]; // u + ReceiveBuffer[1] = Message[1]; // v + ReceiveBuffer[2] = Message[2]; // message_type + } + +#ifdef DEBUG_GHOST_ + if ((v < StartIndex) || (v > EndIndex)) { + cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl; + fflush(stdout); + } +#endif +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Processing message: u= " << u << " v= " << v << " Type= " << message_type << endl; + fflush(stdout); +#endif + + // Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop + for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) { + u = ReceiveBuffer[bundleCounter - 3]; // GHOST + v = ReceiveBuffer[bundleCounter - 2]; // LOCAL + message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE + + // CASE I: REQUEST + if (message_type == REQUEST) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is REQUEST" << endl; + fflush(stdout); +#endif +#ifdef DEBUG_GHOST_ + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } + +#endif + + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched (v is local) + candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost + if (candidateMate[v - StartIndex] == u) { + GMate[Ghost2LocalMap[u]] = v; // u is ghost + Mate[v - StartIndex] = u; // v is local + U.push_back(v); + U.push_back(u); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl; + fflush(stdout); +#endif + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); + } // End of if ( candidateMate[v-StartIndex] == u )e + } // End of if ( Mate[v] == -1 ) + } // End of REQUEST + else { // CASE II: SUCCESS + if (message_type == SUCCESS) { +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is SUCCESS" << endl; + fflush(stdout); +#endif + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); +#ifdef DEBUG_GHOST_ + if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) { + cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl; + fflush(stdout); + } +#endif + if (Mate[v - StartIndex] == -1) { + // Process only if not already matched ( v is local) + if (candidateMate[v - StartIndex] == u) { + // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + w = computeCandidateMateS(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], + edgeLocWeight, k,verLocInd, StartIndex, EndIndex, + GMate, Mate, Ghost2LocalMap); + candidateMate[v - StartIndex] = w; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; + fflush(stdout); +#endif + // If found a dominating edge: + if (w >= 0) { + if ((w < StartIndex) || (w > EndIndex)) { + // w is a ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = REQUEST; // TYPE + // Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a request message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) { + Mate[v - StartIndex] = w; // v is local + GMate[Ghost2LocalMap[w]] = v; // w is ghost + U.push_back(v); + U.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); +#endif + + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S); + } // End of if CandidateMate[w] = v + } // End of if a Ghost Vertex + else { // w is a local vertex + if (candidateMate[w - StartIndex] == v) { + Mate[v - StartIndex] = w; // v is local + Mate[w - StartIndex] = v; // w is local + // Q.push_back(u); + U.push_back(v); + U.push_back(w); + (*myCard)++; +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl; + fflush(stdout); +#endif + } // End of if(CandidateMate(w) = v + } // End of Else + } // End of if(w >=0) + else { // No dominant edge found + adj11 = verLocPtr[v - StartIndex]; + adj12 = verLocPtr[v - StartIndex + 1]; + for (k1 = adj11; k1 < adj12; k1++) { + w = verLocInd[k1]; + if ((w < StartIndex) || (w > EndIndex)) { + // A ghost + // Build the Message Packet: + Message[0] = v; // LOCAL + Message[1] = w; // GHOST + Message[2] = FAILURE; // TYPE + // Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Sending a failure message: "; + cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl; + fflush(stdout); +#endif + ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs); + //assert(ghostOwner != -1); + //assert(ghostOwner != myRank); + //cout << myRank<<" Sending to "<(), ghostOwner, ComputeTag, comm); + (*msgInd)++; + (*msgActual)++; + } // End of if(GHOST) + } // End of for loop + } // End of Else: w == -1 + // End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } // End of if ( candidateMate[v-StartIndex] == u ) + } // End of if ( Mate[v] == -1 ) + } // End of if ( message_type == SUCCESS ) + else { + // CASE III: FAILURE +#ifdef PRINT_DEBUG_INFO_ + cout << "\n(" << myRank << ")Message type is FAILURE" << endl; + fflush(stdout); +#endif + GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore + PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter + } // End of else: CASE III + } // End of else: CASE I + } + + return; +} diff --git a/amgprec/impl/aggregator/queueTransfer.cpp b/amgprec/impl/aggregator/queueTransfer.cpp index b8b6a4bb..d1391292 100644 --- a/amgprec/impl/aggregator/queueTransfer.cpp +++ b/amgprec/impl/aggregator/queueTransfer.cpp @@ -1,5 +1,4 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void queuesTransfer(vector &U, vector &privateU, vector &QLocalVtx, @@ -31,4 +30,3 @@ void queuesTransfer(vector &U, privateQOwner.clear(); } -#endif diff --git a/amgprec/impl/aggregator/sendBundledMessages.cpp b/amgprec/impl/aggregator/sendBundledMessages.cpp index 3349ce86..debabf7e 100644 --- a/amgprec/impl/aggregator/sendBundledMessages.cpp +++ b/amgprec/impl/aggregator/sendBundledMessages.cpp @@ -1,24 +1,23 @@ #include "MatchBoxPC.h" -#ifdef OPENMP void sendBundledMessages(MilanLongInt *numGhostEdges, - MilanInt *BufferSize, - MilanLongInt *Buffer, - vector &PCumulative, - vector &PMessageBundle, - vector &PSizeInfoMessages, - MilanLongInt *PCounter, - MilanLongInt NumMessagesBundled, - MilanLongInt *msgActual, - MilanLongInt *msgInd, - MilanInt numProcs, - MilanInt myRank, - MPI_Comm comm, - vector &QLocalVtx, - vector &QGhostVtx, - vector &QMsgType, - vector &QOwner, - vector &SRequest, - vector &SStatus) + MilanInt *BufferSize, + MilanLongInt *Buffer, + vector &PCumulative, + vector &PMessageBundle, + vector &PSizeInfoMessages, + MilanLongInt *PCounter, + MilanLongInt NumMessagesBundled, + MilanLongInt *msgActual, + MilanLongInt *msgInd, + MilanInt numProcs, + MilanInt myRank, + MPI_Comm comm, + vector &QLocalVtx, + vector &QGhostVtx, + vector &QMsgType, + vector &QOwner, + vector &SRequest, + vector &SStatus) { MilanLongInt myIndex = 0, numMessagesToSend; @@ -207,4 +206,3 @@ void sendBundledMessages(MilanLongInt *numGhostEdges, } } } -#endif