Merge branch 'mboxomp' into PolySmooth

PolySmooth
sfilippone 6 months ago
commit bfe0a32e09

@ -724,6 +724,7 @@ contains
& vnl, mate, iam, np,ictxt,& & vnl, mate, iam, np,ictxt,&
& msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp) & msgis,msgas,msgprc,ph0t,ph1t,ph2t,ph1crd,ph2crd,info,display_inp)
if (do_timings) call psb_toc(idx_cmboxp) if (do_timings) call psb_toc(idx_cmboxp)
if (iam==0) write(0,*) iam,' buildmatching from PMatchBox:', info,ph0t,ph1t,ph2t
if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info if (debug) write(0,*) iam,' buildmatching from PMatchBox:', info
if (debug_sync) then if (debug_sync) then
call psb_max(ictxt,info) call psb_max(ictxt,info)

@ -272,9 +272,7 @@ contains
write(0,*) 'Impossible: mate(k) > nc' write(0,*) 'Impossible: mate(k) > nc'
cycle cycle
else else
if (ilaggr(k) == ilaggr_neginit) then if (ilaggr(k) == ilaggr_neginit) then
wk = w(k) wk = w(k)
widx = w(idx) widx = w(idx)
wmax = max(abs(wk),abs(widx)) wmax = max(abs(wk),abs(widx))

@ -72,7 +72,8 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
double tmr = MPI_Wtime(); double tmr = MPI_Wtime();
#endif #endif
#if defined(OPENMP) #if 1
// defined(OPENMP)
//fprintf(stderr,"Warning: using buggy OpenMP matching!\n"); //fprintf(stderr,"Warning: using buggy OpenMP matching!\n");
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight, verLocPtr, verLocInd, edgeLocWeight,
@ -114,6 +115,17 @@ void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n",
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif #endif
#if 1
// defined(OPENMP)
//fprintf(stderr,"Warning: using buggy OpenMP matching!\n");
salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate,
myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card );
#else
salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight, verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate, verDistance, Mate,
@ -122,6 +134,7 @@ void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
ph0_time, ph1_time, ph2_time, ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card ); ph1_card, ph2_card );
#endif #endif
#endif
} }
#ifdef __cplusplus #ifdef __cplusplus

@ -178,7 +178,6 @@ extern "C"
#define MilanRealMin MINUS_INFINITY #define MilanRealMin MINUS_INFINITY
#endif #endif
#ifdef OPENMP
/* These functions are only used in the experimental OMP implementation, if that /* These functions are only used in the experimental OMP implementation, if that
is disabled there is no reason to actually compile or reference them. */ is disabled there is no reason to actually compile or reference them. */
@ -186,11 +185,12 @@ is disabled there is no reason to actually compile or reference them. */
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs); MilanInt myRank, MilanInt numProcs);
MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, MilanLongInt firstComputeCandidateMateD(MilanLongInt adj1,
MilanLongInt adj2, MilanLongInt adj2,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanReal *edgeLocWeight); MilanReal *edgeLocWeight);
void queuesTransfer(vector<MilanLongInt> &U, void queuesTransfer(vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU, vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx, vector<MilanLongInt> &QLocalVtx,
@ -209,7 +209,7 @@ is disabled there is no reason to actually compile or reference them. */
MilanLongInt *Mate, MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap); map<MilanLongInt, MilanLongInt> &Ghost2LocalMap);
MilanLongInt computeCandidateMate(MilanLongInt adj1, MilanLongInt computeCandidateMateD(MilanLongInt adj1,
MilanLongInt adj2, MilanLongInt adj2,
MilanReal *edgeLocWeight, MilanReal *edgeLocWeight,
MilanLongInt k, MilanLongInt k,
@ -260,14 +260,14 @@ is disabled there is no reason to actually compile or reference them. */
MilanLongInt NumMessagesBundled, MilanLongInt NumMessagesBundled,
MilanReal *msgPercent); MilanReal *msgPercent);
void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer, void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanInt myRank, MilanInt myRank,
MilanReal *edgeLocWeight, MilanReal *edgeLocWeight,
MilanLongInt *candidateMate); MilanLongInt *candidateMate);
void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer, void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer,
MilanLongInt *candidateMate, MilanLongInt *candidateMate,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
@ -300,7 +300,7 @@ is disabled there is no reason to actually compile or reference them. */
void PROCESS_CROSS_EDGE(MilanLongInt *edge, void PROCESS_CROSS_EDGE(MilanLongInt *edge,
MilanLongInt *SPtr); MilanLongInt *SPtr);
void processMatchedVertices( void processMatchedVerticesD(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
@ -332,7 +332,7 @@ is disabled there is no reason to actually compile or reference them. */
vector<MilanLongInt> &privateQMsgType, vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner); vector<MilanInt> &privateQOwner);
void processMatchedVerticesAndSendMessages( void processMatchedVerticesAndSendMessagesD(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
@ -387,7 +387,7 @@ is disabled there is no reason to actually compile or reference them. */
vector<MPI_Request> &SRequest, vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus); vector<MPI_Status> &SStatus);
void processMessages( void processMessagesD(
MilanLongInt NLVer, MilanLongInt NLVer,
MilanLongInt *Mate, MilanLongInt *Mate,
MilanLongInt *candidateMate, MilanLongInt *candidateMate,
@ -419,6 +419,153 @@ is disabled there is no reason to actually compile or reference them. */
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU); vector<MilanLongInt> &privateU);
MilanLongInt firstComputeCandidateMateS(MilanLongInt adj1,
MilanLongInt adj2,
MilanLongInt *verLocInd,
MilanFloat *edgeLocWeight);
MilanLongInt computeCandidateMateS(MilanLongInt adj1,
MilanLongInt adj2,
MilanFloat *edgeLocWeight,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap);
void PARALLEL_COMPUTE_CANDIDATE_MATE_BS(MilanLongInt NLVer,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanInt myRank,
MilanFloat *edgeLocWeight,
MilanLongInt *candidateMate);
void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer,
MilanLongInt *candidateMate,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *Mate,
vector<MilanLongInt> &GMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void processMatchedVerticesS(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void processMatchedVerticesAndSendMessagesS(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message);
void processMessagesS(
MilanLongInt NLVer,
MilanLongInt *Mate,
MilanLongInt *candidateMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Counter,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *msgActualPtr,
MilanFloat *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *verLocPtr,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &Message,
MilanLongInt numGhostEdges,
MilanLongInt u,
MilanLongInt v,
MilanLongInt *SPtr,
vector<MilanLongInt> &U);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight,
@ -428,14 +575,19 @@ is disabled there is no reason to actually compile or reference them. */
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card); MilanLongInt *ph1_card, MilanLongInt *ph2_card);
#endif
#ifndef OPENMP void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
//Function of find the owner of a ghost vertex using binary search: MilanLongInt NLVer, MilanLongInt NLEdge,
inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanFloat *edgeLocWeight,
MilanInt myRank, MilanInt numProcs); MilanLongInt *verDistance,
#endif MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt NLVer, MilanLongInt NLEdge,

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
// *********************************************************************** // ***********************************************************************
// //
// MatchboxP: A C++ library for approximate weighted matching // MatchboxP: A C++ library for approximate weighted matching
@ -244,7 +243,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel.
*/ */
PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer, PARALLEL_COMPUTE_CANDIDATE_MATE_BD(NLVer,
verLocPtr, verLocPtr,
verLocInd, verLocInd,
myRank, myRank,
@ -269,7 +268,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* TODO: Test when it's actually more efficient to execute this code * TODO: Test when it's actually more efficient to execute this code
* in parallel. * in parallel.
*/ */
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer, PARALLEL_PROCESS_EXPOSED_VERTEX_BD(NLVer,
candidateMate, candidateMate,
verLocInd, verLocInd,
verLocPtr, verLocPtr,
@ -321,7 +320,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
vector<MilanLongInt> UChunkBeingProcessed; vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK); UChunkBeingProcessed.reserve(UCHUNK);
processMatchedVertices(NLVer, processMatchedVerticesD(NLVer,
UChunkBeingProcessed, UChunkBeingProcessed,
U, U,
privateU, privateU,
@ -430,7 +429,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
/////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessages(NLVer, processMatchedVerticesAndSendMessagesD(NLVer,
UChunkBeingProcessed, UChunkBeingProcessed,
U, U,
privateU, privateU,
@ -490,8 +489,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////// PROCESS MESSAGES //////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////
//startTime = MPI_Wtime();
processMessages(NLVer, processMessagesD(NLVer,
Mate, Mate,
candidateMate, candidateMate,
Ghost2LocalMap, Ghost2LocalMap,
@ -556,6 +555,488 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
*ph2_card = myCard; // Cardinality at the end of Phase-2 *ph2_card = myCard; // Cardinality at the end of Phase-2
} }
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate // End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd,
MilanFloat *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent,
MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card)
{
/*
* verDistance: it's a vector long as the number of processors.
* verDistance[i] contains the first node index of the i-th processor
* verDistance[i + 1] contains the last node index of the i-th processor
* NLVer: number of elements in the LocPtr
* NLEdge: number of edges assigned to the current processor
*
* Contains the portion of matrix assigned to the processor in
* Yale notation
* verLocInd: contains the positions on row of the matrix
* verLocPtr: i-th value is the position of the first element on the i-th row and
* i+1-th value is the position of the first element on the i+1-th row
*/
#if !defined(SERIAL_MPI)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," << verDistance[i+1];
cout << "]\n";
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0) {
cout << "\n(" << myRank << ") verDistance [" ;
for (int i = 0; i < numProcs; i++)
cout << verDistance[i] << "," ;
cout << verDistance[numProcs]<< "]\n";
}
fflush(stdout);
#endif
// The starting vertex owned by the current rank
MilanLongInt StartIndex = verDistance[myRank];
// The ending vertex owned by the current rank
MilanLongInt EndIndex = verDistance[myRank + 1] - 1;
MPI_Status computeStatus;
MilanLongInt msgActual = 0, msgInd = 0;
MilanFloat heaviestEdgeWt = 0.0f; // Assumes positive weight
MilanReal startTime, finishTime;
startTime = MPI_Wtime();
// Data structures for sending and receiving messages:
vector<MilanLongInt> Message; // [ u, v, message_type ]
Message.resize(3, -1);
// Data structures for Message Bundling:
// Although up to two messages can be sent along any cross edge,
// only one message will be sent in the initialization phase -
// one of: REQUEST/FAILURE/SUCCESS
vector<MilanLongInt> QLocalVtx, QGhostVtx, QMsgType;
// Changed by Fabio to be an integer, addresses needs to be integers!
vector<MilanInt> QOwner;
MilanLongInt *PCounter = new MilanLongInt[numProcs];
for (int i = 0; i < numProcs; i++)
PCounter[i] = 0;
MilanLongInt NumMessagesBundled = 0;
// TODO when the last computational section will be refactored this could be eliminated
// Changed by Fabio to be an integer, addresses needs to be integers!
MilanInt ghostOwner = 0;
MilanLongInt *candidateMate = nullptr;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge;
fflush(stdout);
cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex;
fflush(stdout);
#endif
// Other Variables:
MilanLongInt u = -1, v = -1, w = -1, i = 0;
MilanLongInt k = -1, adj1 = -1, adj2 = -1;
MilanLongInt k1 = -1, adj11 = -1, adj12 = -1;
MilanLongInt myCard = 0;
// Build the Ghost Vertex Set: Vg
// Map each ghost vertex to a local vertex
map<MilanLongInt, MilanLongInt> Ghost2LocalMap;
// Store the edge count for each ghost vertex
vector<MilanLongInt> Counter;
// Number of Ghost vertices
MilanLongInt numGhostVertices = 0, numGhostEdges = 0;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
fflush(stdout);
#endif
// Define Adjacency Lists for Ghost Vertices:
// cout<<"Building Ghost data structures ... \n\n";
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
// Mate array for ghost vertices:
vector<MilanLongInt> GMate; // Proportional to the number of ghost vertices
MilanLongInt S;
MilanLongInt privateMyCard = 0;
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
vector<MPI_Request> SRequest; // Requests that are used for each send message
vector<MPI_Status> SStatus; // Status of sent messages, used in MPI_Wait
MilanLongInt MessageIndex = 0; // Pointer for current message
MilanInt BufferSize;
MilanLongInt *Buffer;
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType;
vector<MilanInt> privateQOwner;
vector<MilanLongInt> U, privateU;
initialize(NLVer, NLEdge, StartIndex,
EndIndex, &numGhostEdges,
&numGhostVertices, &S,
verLocInd, verLocPtr,
Ghost2LocalMap, Counter,
verGhostPtr, verGhostInd,
tempCounter, GMate,
Message, QLocalVtx,
QGhostVtx, QMsgType, QOwner,
candidateMate, U,
privateU,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
#ifdef DEBUG_HANG_
cout << myRank << " Finished initialization" << endl;
fflush(stdout);
#endif
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Compute the Initial Matching Set:
/*
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
* PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize
* the two.
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel.
*/
PARALLEL_COMPUTE_CANDIDATE_MATE_BS(NLVer,
verLocPtr,
verLocInd,
myRank,
edgeLocWeight,
candidateMate);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " candidateMate after parallelCompute " <<endl;
for (int i=0; i<NLVer; i++) {
cout << candidateMate[i] << " " ;
}
cout << endl;
#endif
#endif
/*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
* TODO: write comment
*
* TODO: Test when it's actually more efficient to execute this code
* in parallel.
*/
PARALLEL_PROCESS_EXPOSED_VERTEX_BS(NLVer,
candidateMate,
verLocInd,
verLocPtr,
StartIndex,
EndIndex,
Mate,
GMate,
Ghost2LocalMap,
edgeLocWeight,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
U,
privateU,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
tempCounter.clear(); // Do not need this any more
#ifdef DEBUG_HANG_
cout << myRank << " Finished Exposed Vertex" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Exposed Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
// TODO what would be the optimal UCHUNK
vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK);
processMatchedVerticesS(NLVer,
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
#ifdef DEBUG_HANG_
cout << myRank << " Finished Process Vertices" << endl;
fflush(stdout);
#if 0
cout << myRank << " Mate after Matched Vertices " <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
/////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
sendBundledMessages(&numGhostEdges,
&BufferSize,
Buffer,
PCumulative,
PMessageBundle,
PSizeInfoMessages,
PCounter,
NumMessagesBundled,
&msgActual,
&MessageIndex,
numProcs,
myRank,
comm,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
SRequest,
SStatus);
///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
finishTime = MPI_Wtime();
*ph1_time = finishTime - startTime; // Time taken for Phase-1
#ifdef DEBUG_HANG_
cout << myRank << " Finished sendBundles" << endl;
fflush(stdout);
#endif
*ph1_card = myCard; // Cardinality at the end of Phase-1
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////// MAIN LOOP //////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Main While Loop:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Entering While(true) loop..";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
while (true) {
#ifdef DEBUG_HANG_
//if (myRank == 0)
cout << "\n(" << myRank << ") Main loop" << endl;
fflush(stdout);
#endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessagesS(NLVer,
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner,
comm,
&msgActual,
Message);
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
//// BREAK IF NO MESSAGES EXPECTED /////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMatchedAndSend phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
#endif
if (S == 0) {
#ifdef DEBUG_HANG_
cout << "\n(" << myRank << ") Breaking out" << endl;
fflush(stdout);
#endif
break;
}
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMessagesS(NLVer,
Mate,
candidateMate,
Ghost2LocalMap,
GMate,
Counter,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&msgActual,
edgeLocWeight,
verDistance,
verLocPtr,
k,
verLocInd,
numProcs,
myRank,
comm,
Message,
numGhostEdges,
u,
v,
&S,
U);
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
#ifdef DEBUG_HANG_
#if 0
cout << myRank << " Mate after ProcessMessages phase "<<S <<endl;
for (int i=0; i<NLVer; i++) {
cout << Mate[i] << " " ;
}
cout << endl;
#endif
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
fflush(stdout);
cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual;
fflush(stdout);
cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl;
fflush(stdout);
#endif
} // End of while (true)
clean(NLVer,
myRank,
MessageIndex,
SRequest,
SStatus,
BufferSize,
Buffer,
msgActual,
msgActualSent,
msgInd,
msgIndSent,
NumMessagesBundled,
msgPercent);
finishTime = MPI_Wtime();
*ph2_time = finishTime - startTime; // Time taken for Phase-2
*ph2_card = myCard; // Cardinality at the end of Phase-2
}
#endif #endif
#endif #endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
// TODO comment // TODO comment
void clean(MilanLongInt NLVer, void clean(MilanLongInt NLVer,
@ -89,4 +88,3 @@ void clean(MilanLongInt NLVer,
} }
} }
} }
#endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
/** /**
* Execute the research fr the Candidate Mate without controlling if the vertices are already matched. * Execute the research fr the Candidate Mate without controlling if the vertices are already matched.
* Returns the vertices with the highest weight * Returns the vertices with the highest weight
@ -9,7 +8,7 @@
* @param edgeLocWeight * @param edgeLocWeight
* @return * @return
*/ */
MilanLongInt firstComputeCandidateMate(MilanLongInt adj1, MilanLongInt firstComputeCandidateMateD(MilanLongInt adj1,
MilanLongInt adj2, MilanLongInt adj2,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanReal *edgeLocWeight) MilanReal *edgeLocWeight)
@ -42,7 +41,7 @@ MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
* @param Ghost2LocalMap * @param Ghost2LocalMap
* @return * @return
*/ */
MilanLongInt computeCandidateMate(MilanLongInt adj1, MilanLongInt computeCandidateMateD(MilanLongInt adj1,
MilanLongInt adj2, MilanLongInt adj2,
MilanReal *edgeLocWeight, MilanReal *edgeLocWeight,
MilanLongInt k, MilanLongInt k,
@ -71,4 +70,67 @@ MilanLongInt computeCandidateMate(MilanLongInt adj1,
return w; return w;
} }
#endif
MilanLongInt firstComputeCandidateMateS(MilanLongInt adj1,
MilanLongInt adj2,
MilanLongInt *verLocInd,
MilanFloat *edgeLocWeight)
{
MilanInt w = -1;
MilanFloat heaviestEdgeWt = 0.0f; // Assign the smallest
int finalK;
for (int k = adj1; k < adj2; k++) {
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
finalK = k;
}
} // End of for loop
return finalK;
}
/**
* //TODO documentation
* @param adj1
* @param adj2
* @param edgeLocWeight
* @param k
* @param verLocInd
* @param StartIndex
* @param EndIndex
* @param GMate
* @param Mate
* @param Ghost2LocalMap
* @return
*/
MilanLongInt computeCandidateMateS(MilanLongInt adj1,
MilanLongInt adj2,
MilanFloat *edgeLocWeight,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap)
{
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
MilanInt w = -1;
MilanFloat heaviestEdgeWt = 0.0f; // Assign the smallest Value
for (k = adj1; k < adj2; k++) {
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
continue;
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
}
} // End of for loop
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
return w;
}

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
void extractUChunk( void extractUChunk(
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
@ -29,4 +28,3 @@ void extractUChunk(
} // End of critical U // End of critical U } // End of critical U // End of critical U
} }
#endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
/// Find the owner of a ghost node: /// Find the owner of a ghost node:
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs) MilanInt myRank, MilanInt numProcs)
@ -27,4 +26,3 @@ MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
return Current; return Current;
} // End of findOwnerOfGhost() } // End of findOwnerOfGhost()
#endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
void initialize(MilanLongInt NLVer, MilanLongInt NLEdge, void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex, MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt *numGhostEdges, MilanLongInt *numGhostEdges,
@ -302,4 +301,3 @@ void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
} // End of single region } // End of single region
} // End of parallel region } // End of parallel region
} }
#endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
/** /**
* //TODO documentation * //TODO documentation
* @param k * @param k
@ -44,4 +43,3 @@ bool isAlreadyMatched(MilanLongInt node,
return val >= 0; // Already matched return val >= 0; // Already matched
} }
#endif

@ -1,6 +1,5 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP void PARALLEL_COMPUTE_CANDIDATE_MATE_BD(MilanLongInt NLVer,
void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanInt myRank, MilanInt myRank,
@ -20,9 +19,36 @@ void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
fflush(stdout); fflush(stdout);
#endif #endif
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) // Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight); candidateMate[v] = firstComputeCandidateMateD(verLocPtr[v], verLocPtr[v + 1],
verLocInd, edgeLocWeight);
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) // End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
} }
} }
} }
void PARALLEL_COMPUTE_CANDIDATE_MATE_BS(MilanLongInt NLVer,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanInt myRank,
MilanFloat *edgeLocWeight,
MilanLongInt *candidateMate)
{
MilanLongInt v = -1;
#pragma omp parallel private(v) default(shared) num_threads(NUM_THREAD)
{
#pragma omp for schedule(static)
for (v = 0; v < NLVer; v++) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout);
#endif #endif
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
candidateMate[v] = firstComputeCandidateMateS(verLocPtr[v], verLocPtr[v + 1],
verLocInd, edgeLocWeight);
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
}
}
}

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
void PROCESS_CROSS_EDGE(MilanLongInt *edge, void PROCESS_CROSS_EDGE(MilanLongInt *edge,
MilanLongInt *S) MilanLongInt *S)
{ {
@ -22,4 +21,3 @@ void PROCESS_CROSS_EDGE(MilanLongInt *edge,
// End: PARALLEL_PROCESS_CROSS_EDGE_B // End: PARALLEL_PROCESS_CROSS_EDGE_B
} }
#endif

@ -1,6 +1,5 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP void PARALLEL_PROCESS_EXPOSED_VERTEX_BD(MilanLongInt NLVer,
void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
MilanLongInt *candidateMate, MilanLongInt *candidateMate,
MilanLongInt *verLocInd, MilanLongInt *verLocInd,
MilanLongInt *verLocPtr, MilanLongInt *verLocPtr,
@ -34,12 +33,13 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0; MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option, igw; MilanInt ghostOwner = 0, option, igw;
//#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \ #pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \ firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \
privateQGhostVtx, privateQMsgType, privateQOwner) \
default(shared) num_threads(NUM_THREAD) default(shared) num_threads(NUM_THREAD)
{ {
//#pragma omp for reduction(+ \ #pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \ : PCounter[:numProcs], myCard \
[:1], msgInd \ [:1], msgInd \
[:1], NumMessagesBundled \ [:1], NumMessagesBundled \
@ -66,7 +66,7 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
#pragma omp critical(Matching) #pragma omp critical(Matching)
{ {
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) { if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
w = computeCandidateMate(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0, w = computeCandidateMateD(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex, verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap); GMate, Mate, Ghost2LocalMap);
candidateMate[v] = w; candidateMate[v] = w;
@ -181,4 +181,188 @@ void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
} // End of parallel region } // End of parallel region
} }
void PARALLEL_PROCESS_EXPOSED_VERTEX_BS(MilanLongInt NLVer,
MilanLongInt *candidateMate,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *Mate,
vector<MilanLongInt> &GMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *S,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option, igw;
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, \
privateQGhostVtx, privateQMsgType, privateQOwner) \
default(shared) num_threads(NUM_THREAD)
{
#pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \
[:1], msgInd \
[:1], NumMessagesBundled \
[:1]) \
schedule(static)
for (v = 0; v < NLVer; v++) {
option = -1;
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
k = candidateMate[v];
candidateMate[v] = verLocInd[k];
w = candidateMate[v];
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v + StartIndex << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0) {
#pragma omp critical(Matching)
{
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
w = computeCandidateMateS(verLocPtr[v], verLocPtr[v + 1], edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap);
candidateMate[v] = w;
}
if (w >= 0) {
(*myCard)++;
if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
option = 1;
Mate[v] = w;
GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else { // w is a local vertex
if (candidateMate[w - StartIndex] == (v + StartIndex)) {
option = 3;
Mate[v] = w; // v is local
Mate[w - StartIndex] = v + StartIndex; // w is local
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") ";
fflush(stdout);
#endif #endif
} // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
} // End of Else
} // End of second if
}
} // End of if(w >=0)
else {
//#pragma omp critical(adjuse)
{
// This piece of code is executed a really small number of times
adj11 = verLocPtr[v];
adj12 = verLocPtr[v + 1];
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
(*msgInd)++;
(*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
}
}
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
switch (option)
{
case -1:
break;
case 1:
privateU.push_back(v + StartIndex);
privateU.push_back(w);
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
case 2:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message (291):";
cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
(*msgInd)++;
(*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
default:
privateU.push_back(v + StartIndex);
privateU.push_back(w);
break;
}
} // End of for ( v=0; v < NLVer; v++ )
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of parallel region
}

@ -1,6 +1,5 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP void processMatchedVerticesD(
void processMatchedVertices(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
@ -98,7 +97,7 @@ void processMatchedVertices(
if (candidateMate[v - StartIndex] == u) { if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex], w = computeCandidateMateD(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1], verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0, edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex, verLocInd, StartIndex, EndIndex,
@ -290,4 +289,296 @@ void processMatchedVertices(
#endif #endif
} // End of parallel region } // End of parallel region
} }
void processMatchedVerticesS(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
int option;
MilanLongInt mateVal;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \
privateQMsgType, privateQOwner, UChunkBeingProcessed) \
default(shared) num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1])
{
while (!U.empty()) {
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
#endif
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++) {
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
#endif
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) {
#pragma omp critical
{
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) {
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMateS(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u
}
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else { // Neighbor is a ghost vertex
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex])
option = 5; // u is local
} // End of critical
} // End of Else //A Ghost Vertex
switch (option)
{
case -1:
// No things to do
break;
case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2:
// Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
break;
case 4:
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
#pragma omp atomic
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
break;
case 5:
default:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
(*NumMessagesBundled)++;
PCounter[ghostOwner]++;
(*msgInd)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
break;
} // End of switch
} // End of inner for
}
} // End of outer for
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
#pragma omp critical(U)
{
U.insert(U.end(), privateU.begin(), privateU.end());
}
#pragma omp critical(sendMessageTransfer)
{
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
}
privateU.clear();
privateQLocalVtx.clear();
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
} // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices,
omp_get_thread_num(),
myRank);
#endif #endif
} // End of parallel region
}

@ -1,7 +1,6 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
//#define DEBUG_HANG_ //#define DEBUG_HANG_
void processMatchedVerticesAndSendMessages( void processMatchedVerticesAndSendMessagesD(
MilanLongInt NLVer, MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed, vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U, vector<MilanLongInt> &U,
@ -52,6 +51,302 @@ void processMatchedVerticesAndSendMessages(
MilanLongInt localVertices = 0; MilanLongInt localVertices = 0;
#endif #endif
//#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \ //#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, \
privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) \
default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1], msgActual \
[:1])
{
while (!U.empty()) {
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
#endif
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++) {
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
#endif
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) {
#pragma omp critical
{
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0) {
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMateD(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u
}
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else { // Neighbor is a ghost vertex
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex])
option = 5; // u is local
} // End of critical
} // End of Else //A Ghost Vertex
switch (option)
{
case -1:
// No things to do
break;
case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2:
// Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = v; // LOCAL
// Message[1] = w; // GHOST
// Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
break;
case 4:
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = v; // LOCAL
// Message[1] = w; // GHOST
// Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
break;
case 5:
default:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = u; // LOCAL
// Message[1] = v; // GHOST
// Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
break;
} // End of switch
} // End of inner for
}
} // End of outer for
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices, mp_get_thread_num(), myRank);
#endif
} // End of parallel region
// Send the messages
#ifdef DEBUG_HANG_
cout << myRank<<" Sending: "<<QOwner.size()-initialSize<<" messages" <<endl;
#endif
for (int i = initialSize; i < QOwner.size(); i++) {
Message[0] = QLocalVtx[i];
Message[1] = QGhostVtx[i];
Message[2] = QMsgType[i];
ghostOwner = QOwner[i];
//MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
}
#ifdef DEBUG_HANG_
cout << myRank<<" Done sending messages"<<endl;
#endif
}
void processMatchedVerticesAndSendMessagesS(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanFloat *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message)
{
MilanLongInt initialSize = QLocalVtx.size();
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
int option;
MilanLongInt mateVal;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\ firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\
privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \ privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \ num_threads(NUM_THREAD) \
@ -103,7 +398,7 @@ void processMatchedVerticesAndSendMessages(
if (candidateMate[v - StartIndex] == u) { if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex], w = computeCandidateMateS(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1], verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0, edgeLocWeight, 0,
verLocInd, StartIndex, EndIndex, verLocInd, StartIndex, EndIndex,
@ -293,4 +588,3 @@ void processMatchedVerticesAndSendMessages(
cout << myRank<<" Done sending messages"<<endl; cout << myRank<<" Done sending messages"<<endl;
#endif #endif
} }
#endif

@ -1,8 +1,7 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
//#define DEBUG_HANG_ //#define DEBUG_HANG_
void processMessages( void processMessagesD(
MilanLongInt NLVer, MilanLongInt NLVer,
MilanLongInt *Mate, MilanLongInt *Mate,
MilanLongInt *candidateMate, MilanLongInt *candidateMate,
@ -212,8 +211,9 @@ void processMessages(
// Process only if not already matched ( v is local) // Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u) { if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) // Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, w = computeCandidateMateD(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1],
verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap); edgeLocWeight, k,verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w; candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl; cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
@ -314,4 +314,318 @@ void processMessages(
return; return;
} }
void processMessagesS(
MilanLongInt NLVer,
MilanLongInt *Mate,
MilanLongInt *candidateMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Counter,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *msgActual,
MilanFloat *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *verLocPtr,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &Message,
MilanLongInt numGhostEdges,
MilanLongInt u,
MilanLongInt v,
MilanLongInt *S,
vector<MilanLongInt> &U)
{
//#define PRINT_DEBUG_INFO_
MilanInt Sender;
MPI_Status computeStatus;
MilanLongInt bundleSize, w;
MilanLongInt adj11, adj12, k1;
MilanLongInt ghostOwner;
int error_codeC;
error_codeC = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
char error_message[MPI_MAX_ERROR_STRING];
int message_length;
MilanLongInt message_type = 0;
// Buffer to receive bundled messages
// Maximum messages that can be received from any processor is
// twice the edge cut: REQUEST; REQUEST+(FAILURE/SUCCESS)
vector<MilanLongInt> ReceiveBuffer;
try
{
ReceiveBuffer.reserve(numGhostEdges * 2 * 3); // Three integers per cross edge
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
#ifdef PRINT_DEBUG_INFO_
cout
<< "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")About to begin Message processing phase ... *S=" << *S << endl;
fflush(stdout);
#endif #endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
// BLOCKING RECEIVE:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << " Waiting for blocking receive..." << endl;
fflush(stdout);
fflush(stdout);
#endif
//cout << myRank<<" Receiving ...";
error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS)
{
MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on Slave: " << error_message << "\n";
fflush(stdout);
}
Sender = computeStatus.MPI_SOURCE;
//cout << " ...from "<<Sender << endl;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl;
fflush(stdout);
#endif
if (Message[2] == SIZEINFO) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl;
fflush(stdout);
#endif
bundleSize = Message[0]; //#of integers in the message
// Build the Message Buffer:
if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle Before: " << endl;
for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ",";
cout << endl;
fflush(stdout);
#endif
// Receive the message
//cout << myRank<<" Receiving from "<<Sender<<endl;
error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS) {
MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n";
fflush(stdout);
}
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle After: " << endl;
for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ",";
cout << endl;
fflush(stdout);
#endif
} else { // Just a single message:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl;
fflush(stdout);
#endif
// Add the current message to Queue:
bundleSize = 3; //#of integers in the message
// Build the Message Buffer:
if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize
ReceiveBuffer[0] = Message[0]; // u
ReceiveBuffer[1] = Message[1]; // v
ReceiveBuffer[2] = Message[2]; // message_type
}
#ifdef DEBUG_GHOST_
if ((v < StartIndex) || (v > EndIndex)) {
cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl;
fflush(stdout);
}
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing message: u= " << u << " v= " << v << " Type= " << message_type << endl;
fflush(stdout);
#endif
// Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) {
u = ReceiveBuffer[bundleCounter - 3]; // GHOST
v = ReceiveBuffer[bundleCounter - 2]; // LOCAL
message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
// CASE I: REQUEST
if (message_type == REQUEST) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is REQUEST" << endl;
fflush(stdout);
#endif
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1) {
// Process only if not already matched (v is local)
candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost
if (candidateMate[v - StartIndex] == u) {
GMate[Ghost2LocalMap[u]] = v; // u is ghost
Mate[v - StartIndex] = u; // v is local
U.push_back(v);
U.push_back(u);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl;
fflush(stdout);
#endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
} // End of if ( candidateMate[v-StartIndex] == u )e
} // End of if ( Mate[v] == -1 )
} // End of REQUEST
else { // CASE II: SUCCESS
if (message_type == SUCCESS) {
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is SUCCESS" << endl;
fflush(stdout);
#endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1) {
// Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u) {
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMateS(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1],
edgeLocWeight, k,verLocInd, StartIndex, EndIndex,
GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0) {
if ((w < StartIndex) || (w > EndIndex)) {
// w is a ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
Mate[v - StartIndex] = w; // v is local
GMate[Ghost2LocalMap[w]] = v; // w is ghost
U.push_back(v);
U.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout);
#endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else { // w is a local vertex
if (candidateMate[w - StartIndex] == v) {
Mate[v - StartIndex] = w; // v is local
Mate[w - StartIndex] = v; // w is local
// Q.push_back(u);
U.push_back(v);
U.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else { // No dominant edge found
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++) {
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex)) {
// A ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
} // End of if(GHOST)
} // End of for loop
} // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of if ( candidateMate[v-StartIndex] == u )
} // End of if ( Mate[v] == -1 )
} // End of if ( message_type == SUCCESS )
else {
// CASE III: FAILURE
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is FAILURE" << endl;
fflush(stdout);
#endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter
} // End of else: CASE III
} // End of else: CASE I
}
return;
}

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
void queuesTransfer(vector<MilanLongInt> &U, void queuesTransfer(vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU, vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx, vector<MilanLongInt> &QLocalVtx,
@ -31,4 +30,3 @@ void queuesTransfer(vector<MilanLongInt> &U,
privateQOwner.clear(); privateQOwner.clear();
} }
#endif

@ -1,5 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#ifdef OPENMP
void sendBundledMessages(MilanLongInt *numGhostEdges, void sendBundledMessages(MilanLongInt *numGhostEdges,
MilanInt *BufferSize, MilanInt *BufferSize,
MilanLongInt *Buffer, MilanLongInt *Buffer,
@ -207,4 +206,3 @@ void sendBundledMessages(MilanLongInt *numGhostEdges,
} }
} }
} }
#endif

Loading…
Cancel
Save