You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
556 lines
20 KiB
C++
556 lines
20 KiB
C++
#include "MatchBoxPC.h"
|
|
#ifdef OMP
|
|
// ***********************************************************************
|
|
//
|
|
// MatchboxP: A C++ library for approximate weighted matching
|
|
// Mahantesh Halappanavar (hala@pnnl.gov)
|
|
// Pacific Northwest National Laboratory
|
|
//
|
|
// ***********************************************************************
|
|
//
|
|
// Copyright (2021) Battelle Memorial Institute
|
|
// All rights reserved.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions
|
|
// are met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the copyright holder nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
// POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// ************************************************************************
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
/////////////////////////// DOMINATING EDGES MODEL ///////////////////////////////////
|
|
//////////////////////////////////////////////////////////////////////////////////////
|
|
/* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate()
|
|
*
|
|
* Date : New update: Feb 17, 2019, Richland, Washington.
|
|
* Date : Original development: May 17, 2009, E&CS Bldg.
|
|
*
|
|
* Purpose : Compute Approximate Maximum Weight Matching in Linear Time
|
|
*
|
|
* Args : inputMatrix - instance of Compressed-Col format of Matrix
|
|
* Mate - The Mate array
|
|
*
|
|
* Returns : By Value: (void)
|
|
* By Reference: Mate
|
|
*
|
|
* Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge.
|
|
* Assumption: The Mate Array is empty.
|
|
*/
|
|
|
|
/*
|
|
NLVer = #of vertices, NLEdge = #of edges
|
|
CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers)
|
|
verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process
|
|
Mate = A vector of size |V_p| (local subgraph) to store the output (matching)
|
|
MPI: myRank, numProcs, comm,
|
|
Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world
|
|
Statistics: ph0_time, ph1_time, ph2_time: Runtimes
|
|
Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2)
|
|
*/
|
|
//#define DEBUG_HANG_
|
|
#ifdef SERIAL_MPI
|
|
#else
|
|
|
|
// DOUBLE PRECISION VERSION
|
|
// WARNING: The vertex block on a given rank is contiguous
|
|
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
MilanLongInt NLVer, MilanLongInt NLEdge,
|
|
MilanLongInt *verLocPtr, MilanLongInt *verLocInd,
|
|
MilanReal *edgeLocWeight,
|
|
MilanLongInt *verDistance,
|
|
MilanLongInt *Mate,
|
|
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
|
|
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent,
|
|
MilanReal *msgPercent,
|
|
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
|
|
MilanLongInt *ph1_card, MilanLongInt *ph2_card)
|
|
{
|
|
|
|
/*
|
|
* verDistance: it's a vector long as the number of processors.
|
|
* verDistance[i] contains the first node index of the i-th processor
|
|
* verDistance[i + 1] contains the last node index of the i-th processor
|
|
* NLVer: number of elements in the LocPtr
|
|
* NLEdge: number of edges assigned to the current processor
|
|
*
|
|
* Contains the portion of matrix assigned to the processor in
|
|
* Yale notation
|
|
* verLocInd: contains the positions on row of the matrix
|
|
* verLocPtr: i-th value is the position of the first element on the i-th row and
|
|
* i+1-th value is the position of the first element on the i+1-th row
|
|
*/
|
|
|
|
#if !defined(SERIAL_MPI)
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()";
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ") verDistance [" ;
|
|
for (int i = 0; i < numProcs; i++)
|
|
cout << verDistance[i] << "," << verDistance[i+1];
|
|
cout << "]\n";
|
|
fflush(stdout);
|
|
#endif
|
|
#ifdef DEBUG_HANG_
|
|
if (myRank == 0) {
|
|
cout << "\n(" << myRank << ") verDistance [" ;
|
|
for (int i = 0; i < numProcs; i++)
|
|
cout << verDistance[i] << "," ;
|
|
cout << verDistance[numProcs]<< "]\n";
|
|
}
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank
|
|
MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank
|
|
|
|
MPI_Status computeStatus;
|
|
|
|
MilanLongInt msgActual = 0, msgInd = 0;
|
|
MilanReal heaviestEdgeWt = 0.0f; // Assumes positive weight
|
|
MilanReal startTime, finishTime;
|
|
|
|
startTime = MPI_Wtime();
|
|
|
|
// Data structures for sending and receiving messages:
|
|
vector<MilanLongInt> Message; // [ u, v, message_type ]
|
|
Message.resize(3, -1);
|
|
// Data structures for Message Bundling:
|
|
// Although up to two messages can be sent along any cross edge,
|
|
// only one message will be sent in the initialization phase -
|
|
// one of: REQUEST/FAILURE/SUCCESS
|
|
vector<MilanLongInt> QLocalVtx, QGhostVtx, QMsgType;
|
|
vector<MilanInt> QOwner; // Changed by Fabio to be an integer, addresses needs to be integers!
|
|
|
|
MilanLongInt *PCounter = new MilanLongInt[numProcs];
|
|
for (int i = 0; i < numProcs; i++)
|
|
PCounter[i] = 0;
|
|
|
|
MilanLongInt NumMessagesBundled = 0;
|
|
// TODO when the last computational section will be refactored this could be eliminated
|
|
MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers!
|
|
MilanLongInt *candidateMate = nullptr;
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge;
|
|
fflush(stdout);
|
|
cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex;
|
|
fflush(stdout);
|
|
#endif
|
|
// Other Variables:
|
|
MilanLongInt u = -1, v = -1, w = -1, i = 0;
|
|
MilanLongInt k = -1, adj1 = -1, adj2 = -1;
|
|
MilanLongInt k1 = -1, adj11 = -1, adj12 = -1;
|
|
MilanLongInt myCard = 0;
|
|
|
|
// Build the Ghost Vertex Set: Vg
|
|
map<MilanLongInt, MilanLongInt> Ghost2LocalMap; // Map each ghost vertex to a local vertex
|
|
vector<MilanLongInt> Counter; // Store the edge count for each ghost vertex
|
|
MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
|
|
fflush(stdout);
|
|
#endif
|
|
#ifdef DEBUG_HANG_
|
|
if (myRank == 0)
|
|
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
// Define Adjacency Lists for Ghost Vertices:
|
|
// cout<<"Building Ghost data structures ... \n\n";
|
|
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
|
|
// Mate array for ghost vertices:
|
|
vector<MilanLongInt> GMate; // Proportional to the number of ghost vertices
|
|
MilanLongInt S;
|
|
MilanLongInt privateMyCard = 0;
|
|
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
|
|
vector<MPI_Request> SRequest; // Requests that are used for each send message
|
|
vector<MPI_Status> SStatus; // Status of sent messages, used in MPI_Wait
|
|
MilanLongInt MessageIndex = 0; // Pointer for current message
|
|
MilanInt BufferSize;
|
|
MilanLongInt *Buffer;
|
|
|
|
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType;
|
|
vector<MilanInt> privateQOwner;
|
|
vector<MilanLongInt> U, privateU;
|
|
|
|
initialize(NLVer, NLEdge, StartIndex,
|
|
EndIndex, &numGhostEdges,
|
|
&numGhostVertices, &S,
|
|
verLocInd, verLocPtr,
|
|
Ghost2LocalMap, Counter,
|
|
verGhostPtr, verGhostInd,
|
|
tempCounter, GMate,
|
|
Message, QLocalVtx,
|
|
QGhostVtx, QMsgType, QOwner,
|
|
candidateMate, U,
|
|
privateU,
|
|
privateQLocalVtx,
|
|
privateQGhostVtx,
|
|
privateQMsgType,
|
|
privateQOwner);
|
|
|
|
finishTime = MPI_Wtime();
|
|
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
|
|
#ifdef DEBUG_HANG_
|
|
cout << myRank << " Finished initialization" << endl;
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
startTime = MPI_Wtime();
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
// Compute the Initial Matching Set:
|
|
|
|
/*
|
|
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
|
|
* PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize
|
|
* the two.
|
|
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel.
|
|
*/
|
|
|
|
PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer,
|
|
verLocPtr,
|
|
verLocInd,
|
|
myRank,
|
|
edgeLocWeight,
|
|
candidateMate);
|
|
|
|
#ifdef DEBUG_HANG_
|
|
cout << myRank << " Finished Exposed Vertex" << endl;
|
|
fflush(stdout);
|
|
#if 0
|
|
cout << myRank << " candidateMate after parallelCompute " <<endl;
|
|
for (int i=0; i<NLVer; i++) {
|
|
cout << candidateMate[i] << " " ;
|
|
}
|
|
cout << endl;
|
|
#endif
|
|
#endif
|
|
/*
|
|
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
|
|
* TODO: write comment
|
|
*
|
|
* TODO: Test when it's actually more efficient to execute this code
|
|
* in parallel.
|
|
*/
|
|
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer,
|
|
candidateMate,
|
|
verLocInd,
|
|
verLocPtr,
|
|
StartIndex,
|
|
EndIndex,
|
|
Mate,
|
|
GMate,
|
|
Ghost2LocalMap,
|
|
edgeLocWeight,
|
|
&myCard,
|
|
&msgInd,
|
|
&NumMessagesBundled,
|
|
&S,
|
|
verDistance,
|
|
PCounter,
|
|
Counter,
|
|
myRank,
|
|
numProcs,
|
|
U,
|
|
privateU,
|
|
QLocalVtx,
|
|
QGhostVtx,
|
|
QMsgType,
|
|
QOwner,
|
|
privateQLocalVtx,
|
|
privateQGhostVtx,
|
|
privateQMsgType,
|
|
privateQOwner);
|
|
|
|
tempCounter.clear(); // Do not need this any more
|
|
|
|
#ifdef DEBUG_HANG_
|
|
cout << myRank << " Finished Exposed Vertex" << endl;
|
|
fflush(stdout);
|
|
#if 0
|
|
cout << myRank << " Mate after Exposed Vertices " <<endl;
|
|
for (int i=0; i<NLVer; i++) {
|
|
cout << Mate[i] << " " ;
|
|
}
|
|
cout << endl;
|
|
#endif
|
|
#endif
|
|
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// TODO what would be the optimal UCHUNK
|
|
vector<MilanLongInt> UChunkBeingProcessed;
|
|
UChunkBeingProcessed.reserve(UCHUNK);
|
|
|
|
processMatchedVertices(NLVer,
|
|
UChunkBeingProcessed,
|
|
U,
|
|
privateU,
|
|
StartIndex,
|
|
EndIndex,
|
|
&myCard,
|
|
&msgInd,
|
|
&NumMessagesBundled,
|
|
&S,
|
|
verLocPtr,
|
|
verLocInd,
|
|
verDistance,
|
|
PCounter,
|
|
Counter,
|
|
myRank,
|
|
numProcs,
|
|
candidateMate,
|
|
GMate,
|
|
Mate,
|
|
Ghost2LocalMap,
|
|
edgeLocWeight,
|
|
QLocalVtx,
|
|
QGhostVtx,
|
|
QMsgType,
|
|
QOwner,
|
|
privateQLocalVtx,
|
|
privateQGhostVtx,
|
|
privateQMsgType,
|
|
privateQOwner);
|
|
|
|
|
|
#ifdef DEBUG_HANG_
|
|
cout << myRank << " Finished Process Vertices" << endl;
|
|
fflush(stdout);
|
|
#if 0
|
|
cout << myRank << " Mate after Matched Vertices " <<endl;
|
|
for (int i=0; i<NLVer; i++) {
|
|
cout << Mate[i] << " " ;
|
|
}
|
|
cout << endl;
|
|
#endif
|
|
#endif
|
|
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
sendBundledMessages(&numGhostEdges,
|
|
&BufferSize,
|
|
Buffer,
|
|
PCumulative,
|
|
PMessageBundle,
|
|
PSizeInfoMessages,
|
|
PCounter,
|
|
NumMessagesBundled,
|
|
&msgActual,
|
|
&MessageIndex,
|
|
numProcs,
|
|
myRank,
|
|
comm,
|
|
QLocalVtx,
|
|
QGhostVtx,
|
|
QMsgType,
|
|
QOwner,
|
|
SRequest,
|
|
SStatus);
|
|
|
|
///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
|
|
|
|
finishTime = MPI_Wtime();
|
|
*ph1_time = finishTime - startTime; // Time taken for Phase-1
|
|
|
|
#ifdef DEBUG_HANG_
|
|
cout << myRank << " Finished sendBundles" << endl;
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
*ph1_card = myCard; // Cardinality at the end of Phase-1
|
|
startTime = MPI_Wtime();
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
//////////////////////////////////////// MAIN LOOP //////////////////////////////////////
|
|
/////////////////////////////////////////////////////////////////////////////////////////
|
|
// Main While Loop:
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
|
fflush(stdout);
|
|
fflush(stdout);
|
|
#endif
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")Entering While(true) loop..";
|
|
fflush(stdout);
|
|
#endif
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
|
fflush(stdout);
|
|
fflush(stdout);
|
|
#endif
|
|
|
|
while (true) {
|
|
#ifdef DEBUG_HANG_
|
|
//if (myRank == 0)
|
|
cout << "\n(" << myRank << ") Main loop" << endl;
|
|
fflush(stdout);
|
|
#endif
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
processMatchedVerticesAndSendMessages(NLVer,
|
|
UChunkBeingProcessed,
|
|
U,
|
|
privateU,
|
|
StartIndex,
|
|
EndIndex,
|
|
&myCard,
|
|
&msgInd,
|
|
&NumMessagesBundled,
|
|
&S,
|
|
verLocPtr,
|
|
verLocInd,
|
|
verDistance,
|
|
PCounter,
|
|
Counter,
|
|
myRank,
|
|
numProcs,
|
|
candidateMate,
|
|
GMate,
|
|
Mate,
|
|
Ghost2LocalMap,
|
|
edgeLocWeight,
|
|
QLocalVtx,
|
|
QGhostVtx,
|
|
QMsgType,
|
|
QOwner,
|
|
privateQLocalVtx,
|
|
privateQGhostVtx,
|
|
privateQMsgType,
|
|
privateQOwner,
|
|
comm,
|
|
&msgActual,
|
|
Message);
|
|
|
|
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
|
|
|
|
//// BREAK IF NO MESSAGES EXPECTED /////////
|
|
#ifdef DEBUG_HANG_
|
|
#if 0
|
|
cout << myRank << " Mate after ProcessMatchedAndSend phase "<<S <<endl;
|
|
for (int i=0; i<NLVer; i++) {
|
|
cout << Mate[i] << " " ;
|
|
}
|
|
cout << endl;
|
|
#endif
|
|
#endif
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
|
|
#endif
|
|
|
|
if (S == 0) {
|
|
#ifdef DEBUG_HANG_
|
|
cout << "\n(" << myRank << ") Breaking out" << endl;
|
|
fflush(stdout);
|
|
#endif
|
|
break;
|
|
}
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
|
|
///////////////////////////////////////////////////////////////////////////////////
|
|
|
|
processMessages(NLVer,
|
|
Mate,
|
|
candidateMate,
|
|
Ghost2LocalMap,
|
|
GMate,
|
|
Counter,
|
|
StartIndex,
|
|
EndIndex,
|
|
&myCard,
|
|
&msgInd,
|
|
&msgActual,
|
|
edgeLocWeight,
|
|
verDistance,
|
|
verLocPtr,
|
|
k,
|
|
verLocInd,
|
|
numProcs,
|
|
myRank,
|
|
comm,
|
|
Message,
|
|
numGhostEdges,
|
|
u,
|
|
v,
|
|
&S,
|
|
U);
|
|
|
|
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
|
|
#ifdef DEBUG_HANG_
|
|
#if 0
|
|
cout << myRank << " Mate after ProcessMessages phase "<<S <<endl;
|
|
for (int i=0; i<NLVer; i++) {
|
|
cout << Mate[i] << " " ;
|
|
}
|
|
cout << endl;
|
|
#endif
|
|
#endif
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
|
|
fflush(stdout);
|
|
cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual;
|
|
fflush(stdout);
|
|
cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl;
|
|
fflush(stdout);
|
|
#endif
|
|
} // End of while (true)
|
|
|
|
clean(NLVer,
|
|
myRank,
|
|
MessageIndex,
|
|
SRequest,
|
|
SStatus,
|
|
BufferSize,
|
|
Buffer,
|
|
msgActual,
|
|
msgActualSent,
|
|
msgInd,
|
|
msgIndSent,
|
|
NumMessagesBundled,
|
|
msgPercent);
|
|
|
|
finishTime = MPI_Wtime();
|
|
*ph2_time = finishTime - startTime; // Time taken for Phase-2
|
|
*ph2_card = myCard; // Cardinality at the end of Phase-2
|
|
}
|
|
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
|
|
#endif
|
|
|
|
#endif
|
|
#endif
|