@ -1,5 +1,4 @@
# include "MatchBoxPC.h"
# ifdef OPENMP
// ***********************************************************************
//
// MatchboxP: A C++ library for approximate weighted matching
@ -244,7 +243,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel .
*/
PARALLEL_COMPUTE_CANDIDATE_MATE_B ( NLVer ,
PARALLEL_COMPUTE_CANDIDATE_MATE_B D ( NLVer ,
verLocPtr ,
verLocInd ,
myRank ,
@ -269,7 +268,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* TODO : Test when it ' s actually more efficient to execute this code
* in parallel .
*/
PARALLEL_PROCESS_EXPOSED_VERTEX_B ( NLVer ,
PARALLEL_PROCESS_EXPOSED_VERTEX_B D ( NLVer ,
candidateMate ,
verLocInd ,
verLocPtr ,
@ -321,7 +320,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
vector < MilanLongInt > UChunkBeingProcessed ;
UChunkBeingProcessed . reserve ( UCHUNK ) ;
processMatchedVertices ( NLVer ,
processMatchedVertices D ( NLVer ,
UChunkBeingProcessed ,
U ,
privateU ,
@ -430,7 +429,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessages ( NLVer ,
processMatchedVerticesAndSendMessages D ( NLVer ,
UChunkBeingProcessed ,
U ,
privateU ,
@ -490,8 +489,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMessages ( NLVer ,
//startTime = MPI_Wtime();
processMessages D ( NLVer ,
Mate ,
candidateMate ,
Ghost2LocalMap ,
@ -556,6 +555,488 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
* ph2_card = myCard ; // Cardinality at the end of Phase-2
}
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP (
MilanLongInt NLVer , MilanLongInt NLEdge ,
MilanLongInt * verLocPtr , MilanLongInt * verLocInd ,
MilanFloat * edgeLocWeight ,
MilanLongInt * verDistance ,
MilanLongInt * Mate ,
MilanInt myRank , MilanInt numProcs , MPI_Comm comm ,
MilanLongInt * msgIndSent , MilanLongInt * msgActualSent ,
MilanReal * msgPercent ,
MilanReal * ph0_time , MilanReal * ph1_time , MilanReal * ph2_time ,
MilanLongInt * ph1_card , MilanLongInt * ph2_card )
{
/*
* verDistance : it ' s a vector long as the number of processors .
* verDistance [ i ] contains the first node index of the i - th processor
* verDistance [ i + 1 ] contains the last node index of the i - th processor
* NLVer : number of elements in the LocPtr
* NLEdge : number of edges assigned to the current processor
*
* Contains the portion of matrix assigned to the processor in
* Yale notation
* verLocInd : contains the positions on row of the matrix
* verLocPtr : i - th value is the position of the first element on the i - th row and
* i + 1 - th value is the position of the first element on the i + 1 - th row
*/
# if !defined(SERIAL_MPI)
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling() " ;
fflush ( stdout ) ;
# endif
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " ) verDistance [ " ;
for ( int i = 0 ; i < numProcs ; i + + )
cout < < verDistance [ i ] < < " , " < < verDistance [ i + 1 ] ;
cout < < " ] \n " ;
fflush ( stdout ) ;
# endif
# ifdef DEBUG_HANG_
if ( myRank = = 0 ) {
cout < < " \n ( " < < myRank < < " ) verDistance [ " ;
for ( int i = 0 ; i < numProcs ; i + + )
cout < < verDistance [ i ] < < " , " ;
cout < < verDistance [ numProcs ] < < " ] \n " ;
}
fflush ( stdout ) ;
# endif
// The starting vertex owned by the current rank
MilanLongInt StartIndex = verDistance [ myRank ] ;
// The ending vertex owned by the current rank
MilanLongInt EndIndex = verDistance [ myRank + 1 ] - 1 ;
MPI_Status computeStatus ;
MilanLongInt msgActual = 0 , msgInd = 0 ;
MilanFloat heaviestEdgeWt = 0.0f ; // Assumes positive weight
MilanReal startTime , finishTime ;
startTime = MPI_Wtime ( ) ;
// Data structures for sending and receiving messages:
vector < MilanLongInt > Message ; // [ u, v, message_type ]
Message . resize ( 3 , - 1 ) ;
// Data structures for Message Bundling:
// Although up to two messages can be sent along any cross edge,
// only one message will be sent in the initialization phase -
// one of: REQUEST/FAILURE/SUCCESS
vector < MilanLongInt > QLocalVtx , QGhostVtx , QMsgType ;
// Changed by Fabio to be an integer, addresses needs to be integers!
vector < MilanInt > QOwner ;
MilanLongInt * PCounter = new MilanLongInt [ numProcs ] ;
for ( int i = 0 ; i < numProcs ; i + + )
PCounter [ i ] = 0 ;
MilanLongInt NumMessagesBundled = 0 ;
// TODO when the last computational section will be refactored this could be eliminated
// Changed by Fabio to be an integer, addresses needs to be integers!
MilanInt ghostOwner = 0 ;
MilanLongInt * candidateMate = nullptr ;
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )NV: " < < NLVer < < " Edges: " < < NLEdge ;
fflush ( stdout ) ;
cout < < " \n ( " < < myRank < < " )StartIndex: " < < StartIndex < < " EndIndex: " < < EndIndex ;
fflush ( stdout ) ;
# endif
// Other Variables:
MilanLongInt u = - 1 , v = - 1 , w = - 1 , i = 0 ;
MilanLongInt k = - 1 , adj1 = - 1 , adj2 = - 1 ;
MilanLongInt k1 = - 1 , adj11 = - 1 , adj12 = - 1 ;
MilanLongInt myCard = 0 ;
// Build the Ghost Vertex Set: Vg
// Map each ghost vertex to a local vertex
map < MilanLongInt , MilanLongInt > Ghost2LocalMap ;
// Store the edge count for each ghost vertex
vector < MilanLongInt > Counter ;
// Number of Ghost vertices
MilanLongInt numGhostVertices = 0 , numGhostEdges = 0 ;
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )About to compute Ghost Vertices... " ;
fflush ( stdout ) ;
# endif
# ifdef DEBUG_HANG_
if ( myRank = = 0 )
cout < < " \n ( " < < myRank < < " )About to compute Ghost Vertices... " ;
fflush ( stdout ) ;
# endif
// Define Adjacency Lists for Ghost Vertices:
// cout<<"Building Ghost data structures ... \n\n";
vector < MilanLongInt > verGhostPtr , verGhostInd , tempCounter ;
// Mate array for ghost vertices:
vector < MilanLongInt > GMate ; // Proportional to the number of ghost vertices
MilanLongInt S ;
MilanLongInt privateMyCard = 0 ;
vector < MilanLongInt > PCumulative , PMessageBundle , PSizeInfoMessages ;
vector < MPI_Request > SRequest ; // Requests that are used for each send message
vector < MPI_Status > SStatus ; // Status of sent messages, used in MPI_Wait
MilanLongInt MessageIndex = 0 ; // Pointer for current message
MilanInt BufferSize ;
MilanLongInt * Buffer ;
vector < MilanLongInt > privateQLocalVtx , privateQGhostVtx , privateQMsgType ;
vector < MilanInt > privateQOwner ;
vector < MilanLongInt > U , privateU ;
initialize ( NLVer , NLEdge , StartIndex ,
EndIndex , & numGhostEdges ,
& numGhostVertices , & S ,
verLocInd , verLocPtr ,
Ghost2LocalMap , Counter ,
verGhostPtr , verGhostInd ,
tempCounter , GMate ,
Message , QLocalVtx ,
QGhostVtx , QMsgType , QOwner ,
candidateMate , U ,
privateU ,
privateQLocalVtx ,
privateQGhostVtx ,
privateQMsgType ,
privateQOwner ) ;
finishTime = MPI_Wtime ( ) ;
* ph0_time = finishTime - startTime ; // Time taken for Phase-0: Initialization
# ifdef DEBUG_HANG_
cout < < myRank < < " Finished initialization " < < endl ;
fflush ( stdout ) ;
# endif
startTime = MPI_Wtime ( ) ;
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Compute the Initial Matching Set:
/*
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
* PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize
* the two .
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel .
*/
PARALLEL_COMPUTE_CANDIDATE_MATE_BS ( NLVer ,
verLocPtr ,
verLocInd ,
myRank ,
edgeLocWeight ,
candidateMate ) ;
# ifdef DEBUG_HANG_
cout < < myRank < < " Finished Exposed Vertex " < < endl ;
fflush ( stdout ) ;
#if 0
cout < < myRank < < " candidateMate after parallelCompute " < < endl ;
for ( int i = 0 ; i < NLVer ; i + + ) {
cout < < candidateMate [ i ] < < " " ;
}
cout < < endl ;
# endif
# endif
/*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
* TODO : write comment
*
* TODO : Test when it ' s actually more efficient to execute this code
* in parallel .
*/
PARALLEL_PROCESS_EXPOSED_VERTEX_BS ( NLVer ,
candidateMate ,
verLocInd ,
verLocPtr ,
StartIndex ,
EndIndex ,
Mate ,
GMate ,
Ghost2LocalMap ,
edgeLocWeight ,
& myCard ,
& msgInd ,
& NumMessagesBundled ,
& S ,
verDistance ,
PCounter ,
Counter ,
myRank ,
numProcs ,
U ,
privateU ,
QLocalVtx ,
QGhostVtx ,
QMsgType ,
QOwner ,
privateQLocalVtx ,
privateQGhostVtx ,
privateQMsgType ,
privateQOwner ) ;
tempCounter . clear ( ) ; // Do not need this any more
# ifdef DEBUG_HANG_
cout < < myRank < < " Finished Exposed Vertex " < < endl ;
fflush ( stdout ) ;
#if 0
cout < < myRank < < " Mate after Exposed Vertices " < < endl ;
for ( int i = 0 ; i < NLVer ; i + + ) {
cout < < Mate [ i ] < < " " ;
}
cout < < endl ;
# endif
# endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
// TODO what would be the optimal UCHUNK
vector < MilanLongInt > UChunkBeingProcessed ;
UChunkBeingProcessed . reserve ( UCHUNK ) ;
processMatchedVerticesS ( NLVer ,
UChunkBeingProcessed ,
U ,
privateU ,
StartIndex ,
EndIndex ,
& myCard ,
& msgInd ,
& NumMessagesBundled ,
& S ,
verLocPtr ,
verLocInd ,
verDistance ,
PCounter ,
Counter ,
myRank ,
numProcs ,
candidateMate ,
GMate ,
Mate ,
Ghost2LocalMap ,
edgeLocWeight ,
QLocalVtx ,
QGhostVtx ,
QMsgType ,
QOwner ,
privateQLocalVtx ,
privateQGhostVtx ,
privateQMsgType ,
privateQOwner ) ;
# ifdef DEBUG_HANG_
cout < < myRank < < " Finished Process Vertices " < < endl ;
fflush ( stdout ) ;
#if 0
cout < < myRank < < " Mate after Matched Vertices " < < endl ;
for ( int i = 0 ; i < NLVer ; i + + ) {
cout < < Mate [ i ] < < " " ;
}
cout < < endl ;
# endif
# endif
/////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
sendBundledMessages ( & numGhostEdges ,
& BufferSize ,
Buffer ,
PCumulative ,
PMessageBundle ,
PSizeInfoMessages ,
PCounter ,
NumMessagesBundled ,
& msgActual ,
& MessageIndex ,
numProcs ,
myRank ,
comm ,
QLocalVtx ,
QGhostVtx ,
QMsgType ,
QOwner ,
SRequest ,
SStatus ) ;
///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
finishTime = MPI_Wtime ( ) ;
* ph1_time = finishTime - startTime ; // Time taken for Phase-1
# ifdef DEBUG_HANG_
cout < < myRank < < " Finished sendBundles " < < endl ;
fflush ( stdout ) ;
# endif
* ph1_card = myCard ; // Cardinality at the end of Phase-1
startTime = MPI_Wtime ( ) ;
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////// MAIN LOOP //////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Main While Loop:
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " =========================************=============================== " < < endl ;
fflush ( stdout ) ;
fflush ( stdout ) ;
# endif
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )Entering While(true) loop.. " ;
fflush ( stdout ) ;
# endif
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " =========================************=============================== " < < endl ;
fflush ( stdout ) ;
fflush ( stdout ) ;
# endif
while ( true ) {
# ifdef DEBUG_HANG_
//if (myRank == 0)
cout < < " \n ( " < < myRank < < " ) Main loop " < < endl ;
fflush ( stdout ) ;
# endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessagesS ( NLVer ,
UChunkBeingProcessed ,
U ,
privateU ,
StartIndex ,
EndIndex ,
& myCard ,
& msgInd ,
& NumMessagesBundled ,
& S ,
verLocPtr ,
verLocInd ,
verDistance ,
PCounter ,
Counter ,
myRank ,
numProcs ,
candidateMate ,
GMate ,
Mate ,
Ghost2LocalMap ,
edgeLocWeight ,
QLocalVtx ,
QGhostVtx ,
QMsgType ,
QOwner ,
privateQLocalVtx ,
privateQGhostVtx ,
privateQMsgType ,
privateQOwner ,
comm ,
& msgActual ,
Message ) ;
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
//// BREAK IF NO MESSAGES EXPECTED /////////
# ifdef DEBUG_HANG_
#if 0
cout < < myRank < < " Mate after ProcessMatchedAndSend phase " < < S < < endl ;
for ( int i = 0 ; i < NLVer ; i + + ) {
cout < < Mate [ i ] < < " " ;
}
cout < < endl ;
# endif
# endif
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )Deciding whether to break: S= " < < S < < endl ;
# endif
if ( S = = 0 ) {
# ifdef DEBUG_HANG_
cout < < " \n ( " < < myRank < < " ) Breaking out " < < endl ;
fflush ( stdout ) ;
# endif
break ;
}
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMessagesS ( NLVer ,
Mate ,
candidateMate ,
Ghost2LocalMap ,
GMate ,
Counter ,
StartIndex ,
EndIndex ,
& myCard ,
& msgInd ,
& msgActual ,
edgeLocWeight ,
verDistance ,
verLocPtr ,
k ,
verLocInd ,
numProcs ,
myRank ,
comm ,
Message ,
numGhostEdges ,
u ,
v ,
& S ,
U ) ;
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
# ifdef DEBUG_HANG_
#if 0
cout < < myRank < < " Mate after ProcessMessages phase " < < S < < endl ;
for ( int i = 0 ; i < NLVer ; i + + ) {
cout < < Mate [ i ] < < " " ;
}
cout < < endl ;
# endif
# endif
# ifdef PRINT_DEBUG_INFO_
cout < < " \n ( " < < myRank < < " )Finished Message processing phase: S= " < < S ;
fflush ( stdout ) ;
cout < < " \n ( " < < myRank < < " )** SENT : ACTUAL= " < < msgActual ;
fflush ( stdout ) ;
cout < < " \n ( " < < myRank < < " )** SENT : INDIVIDUAL= " < < msgInd < < endl ;
fflush ( stdout ) ;
# endif
} // End of while (true)
clean ( NLVer ,
myRank ,
MessageIndex ,
SRequest ,
SStatus ,
BufferSize ,
Buffer ,
msgActual ,
msgActualSent ,
msgInd ,
msgIndSent ,
NumMessagesBundled ,
msgPercent ) ;
finishTime = MPI_Wtime ( ) ;
* ph2_time = finishTime - startTime ; // Time taken for Phase-2
* ph2_card = myCard ; // Cardinality at the end of Phase-2
}
# endif
# endif