Merge branch 'dev-openmp' into development
commit
0bcc9d7b55
@ -0,0 +1,554 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
// ***********************************************************************
|
||||
//
|
||||
// MatchboxP: A C++ library for approximate weighted matching
|
||||
// Mahantesh Halappanavar (hala@pnnl.gov)
|
||||
// Pacific Northwest National Laboratory
|
||||
//
|
||||
// ***********************************************************************
|
||||
//
|
||||
// Copyright (2021) Battelle Memorial Institute
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright
|
||||
// notice, this list of conditions and the following disclaimer in the
|
||||
// documentation and/or other materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the copyright holder nor the names of its
|
||||
// contributors may be used to endorse or promote products derived from
|
||||
// this software without specific prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
||||
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
// POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// ************************************************************************
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////// DOMINATING EDGES MODEL ///////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
/* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate()
|
||||
*
|
||||
* Date : New update: Feb 17, 2019, Richland, Washington.
|
||||
* Date : Original development: May 17, 2009, E&CS Bldg.
|
||||
*
|
||||
* Purpose : Compute Approximate Maximum Weight Matching in Linear Time
|
||||
*
|
||||
* Args : inputMatrix - instance of Compressed-Col format of Matrix
|
||||
* Mate - The Mate array
|
||||
*
|
||||
* Returns : By Value: (void)
|
||||
* By Reference: Mate
|
||||
*
|
||||
* Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge.
|
||||
* Assumption: The Mate Array is empty.
|
||||
*/
|
||||
|
||||
/*
|
||||
NLVer = #of vertices, NLEdge = #of edges
|
||||
CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers)
|
||||
verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process
|
||||
Mate = A vector of size |V_p| (local subgraph) to store the output (matching)
|
||||
MPI: myRank, numProcs, comm,
|
||||
Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world
|
||||
Statistics: ph0_time, ph1_time, ph2_time: Runtimes
|
||||
Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2)
|
||||
*/
|
||||
//#define DEBUG_HANG_
|
||||
#ifdef SERIAL_MPI
|
||||
#else
|
||||
|
||||
// DOUBLE PRECISION VERSION
|
||||
// WARNING: The vertex block on a given rank is contiguous
|
||||
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
||||
MilanLongInt NLVer, MilanLongInt NLEdge,
|
||||
MilanLongInt *verLocPtr, MilanLongInt *verLocInd,
|
||||
MilanReal *edgeLocWeight,
|
||||
MilanLongInt *verDistance,
|
||||
MilanLongInt *Mate,
|
||||
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
|
||||
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent,
|
||||
MilanReal *msgPercent,
|
||||
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
|
||||
MilanLongInt *ph1_card, MilanLongInt *ph2_card)
|
||||
{
|
||||
|
||||
/*
|
||||
* verDistance: it's a vector long as the number of processors.
|
||||
* verDistance[i] contains the first node index of the i-th processor
|
||||
* verDistance[i + 1] contains the last node index of the i-th processor
|
||||
* NLVer: number of elements in the LocPtr
|
||||
* NLEdge: number of edges assigned to the current processor
|
||||
*
|
||||
* Contains the portion of matrix assigned to the processor in
|
||||
* Yale notation
|
||||
* verLocInd: contains the positions on row of the matrix
|
||||
* verLocPtr: i-th value is the position of the first element on the i-th row and
|
||||
* i+1-th value is the position of the first element on the i+1-th row
|
||||
*/
|
||||
|
||||
#if !defined(SERIAL_MPI)
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ") verDistance [" ;
|
||||
for (int i = 0; i < numProcs; i++)
|
||||
cout << verDistance[i] << "," << verDistance[i+1];
|
||||
cout << "]\n";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef DEBUG_HANG_
|
||||
if (myRank == 0) {
|
||||
cout << "\n(" << myRank << ") verDistance [" ;
|
||||
for (int i = 0; i < numProcs; i++)
|
||||
cout << verDistance[i] << "," ;
|
||||
cout << verDistance[numProcs]<< "]\n";
|
||||
}
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank
|
||||
MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank
|
||||
|
||||
MPI_Status computeStatus;
|
||||
|
||||
MilanLongInt msgActual = 0, msgInd = 0;
|
||||
MilanReal heaviestEdgeWt = 0.0f; // Assumes positive weight
|
||||
MilanReal startTime, finishTime;
|
||||
|
||||
startTime = MPI_Wtime();
|
||||
|
||||
// Data structures for sending and receiving messages:
|
||||
vector<MilanLongInt> Message; // [ u, v, message_type ]
|
||||
Message.resize(3, -1);
|
||||
// Data structures for Message Bundling:
|
||||
// Although up to two messages can be sent along any cross edge,
|
||||
// only one message will be sent in the initialization phase -
|
||||
// one of: REQUEST/FAILURE/SUCCESS
|
||||
vector<MilanLongInt> QLocalVtx, QGhostVtx, QMsgType;
|
||||
vector<MilanInt> QOwner; // Changed by Fabio to be an integer, addresses needs to be integers!
|
||||
|
||||
MilanLongInt *PCounter = new MilanLongInt[numProcs];
|
||||
for (int i = 0; i < numProcs; i++)
|
||||
PCounter[i] = 0;
|
||||
|
||||
MilanLongInt NumMessagesBundled = 0;
|
||||
// TODO when the last computational section will be refactored this could be eliminated
|
||||
MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers!
|
||||
MilanLongInt *candidateMate = nullptr;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Other Variables:
|
||||
MilanLongInt u = -1, v = -1, w = -1, i = 0;
|
||||
MilanLongInt k = -1, adj1 = -1, adj2 = -1;
|
||||
MilanLongInt k1 = -1, adj11 = -1, adj12 = -1;
|
||||
MilanLongInt myCard = 0;
|
||||
|
||||
// Build the Ghost Vertex Set: Vg
|
||||
map<MilanLongInt, MilanLongInt> Ghost2LocalMap; // Map each ghost vertex to a local vertex
|
||||
vector<MilanLongInt> Counter; // Store the edge count for each ghost vertex
|
||||
MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef DEBUG_HANG_
|
||||
if (myRank == 0)
|
||||
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// Define Adjacency Lists for Ghost Vertices:
|
||||
// cout<<"Building Ghost data structures ... \n\n";
|
||||
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
|
||||
// Mate array for ghost vertices:
|
||||
vector<MilanLongInt> GMate; // Proportional to the number of ghost vertices
|
||||
MilanLongInt S;
|
||||
MilanLongInt privateMyCard = 0;
|
||||
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
|
||||
vector<MPI_Request> SRequest; // Requests that are used for each send message
|
||||
vector<MPI_Status> SStatus; // Status of sent messages, used in MPI_Wait
|
||||
MilanLongInt MessageIndex = 0; // Pointer for current message
|
||||
MilanInt BufferSize;
|
||||
MilanLongInt *Buffer;
|
||||
|
||||
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType;
|
||||
vector<MilanInt> privateQOwner;
|
||||
vector<MilanLongInt> U, privateU;
|
||||
|
||||
initialize(NLVer, NLEdge, StartIndex,
|
||||
EndIndex, &numGhostEdges,
|
||||
&numGhostVertices, &S,
|
||||
verLocInd, verLocPtr,
|
||||
Ghost2LocalMap, Counter,
|
||||
verGhostPtr, verGhostInd,
|
||||
tempCounter, GMate,
|
||||
Message, QLocalVtx,
|
||||
QGhostVtx, QMsgType, QOwner,
|
||||
candidateMate, U,
|
||||
privateU,
|
||||
privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
finishTime = MPI_Wtime();
|
||||
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank << " Finished initialization" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
startTime = MPI_Wtime();
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Compute the Initial Matching Set:
|
||||
|
||||
/*
|
||||
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
|
||||
* PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize
|
||||
* the two.
|
||||
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel.
|
||||
*/
|
||||
|
||||
PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer,
|
||||
verLocPtr,
|
||||
verLocInd,
|
||||
myRank,
|
||||
edgeLocWeight,
|
||||
candidateMate);
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank << " Finished Exposed Vertex" << endl;
|
||||
fflush(stdout);
|
||||
#if 0
|
||||
cout << myRank << " candidateMate after parallelCompute " <<endl;
|
||||
for (int i=0; i<NLVer; i++) {
|
||||
cout << candidateMate[i] << " " ;
|
||||
}
|
||||
cout << endl;
|
||||
#endif
|
||||
#endif
|
||||
/*
|
||||
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
|
||||
* TODO: write comment
|
||||
*
|
||||
* TODO: Test when it's actually more efficient to execute this code
|
||||
* in parallel.
|
||||
*/
|
||||
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer,
|
||||
candidateMate,
|
||||
verLocInd,
|
||||
verLocPtr,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
Mate,
|
||||
GMate,
|
||||
Ghost2LocalMap,
|
||||
edgeLocWeight,
|
||||
&myCard,
|
||||
&msgInd,
|
||||
&NumMessagesBundled,
|
||||
&S,
|
||||
verDistance,
|
||||
PCounter,
|
||||
Counter,
|
||||
myRank,
|
||||
numProcs,
|
||||
U,
|
||||
privateU,
|
||||
QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType,
|
||||
QOwner,
|
||||
privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
tempCounter.clear(); // Do not need this any more
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank << " Finished Exposed Vertex" << endl;
|
||||
fflush(stdout);
|
||||
#if 0
|
||||
cout << myRank << " Mate after Exposed Vertices " <<endl;
|
||||
for (int i=0; i<NLVer; i++) {
|
||||
cout << Mate[i] << " " ;
|
||||
}
|
||||
cout << endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// TODO what would be the optimal UCHUNK
|
||||
vector<MilanLongInt> UChunkBeingProcessed;
|
||||
UChunkBeingProcessed.reserve(UCHUNK);
|
||||
|
||||
processMatchedVertices(NLVer,
|
||||
UChunkBeingProcessed,
|
||||
U,
|
||||
privateU,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
&myCard,
|
||||
&msgInd,
|
||||
&NumMessagesBundled,
|
||||
&S,
|
||||
verLocPtr,
|
||||
verLocInd,
|
||||
verDistance,
|
||||
PCounter,
|
||||
Counter,
|
||||
myRank,
|
||||
numProcs,
|
||||
candidateMate,
|
||||
GMate,
|
||||
Mate,
|
||||
Ghost2LocalMap,
|
||||
edgeLocWeight,
|
||||
QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType,
|
||||
QOwner,
|
||||
privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank << " Finished Process Vertices" << endl;
|
||||
fflush(stdout);
|
||||
#if 0
|
||||
cout << myRank << " Mate after Matched Vertices " <<endl;
|
||||
for (int i=0; i<NLVer; i++) {
|
||||
cout << Mate[i] << " " ;
|
||||
}
|
||||
cout << endl;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
sendBundledMessages(&numGhostEdges,
|
||||
&BufferSize,
|
||||
Buffer,
|
||||
PCumulative,
|
||||
PMessageBundle,
|
||||
PSizeInfoMessages,
|
||||
PCounter,
|
||||
NumMessagesBundled,
|
||||
&msgActual,
|
||||
&MessageIndex,
|
||||
numProcs,
|
||||
myRank,
|
||||
comm,
|
||||
QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType,
|
||||
QOwner,
|
||||
SRequest,
|
||||
SStatus);
|
||||
|
||||
///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
|
||||
|
||||
finishTime = MPI_Wtime();
|
||||
*ph1_time = finishTime - startTime; // Time taken for Phase-1
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank << " Finished sendBundles" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
*ph1_card = myCard; // Cardinality at the end of Phase-1
|
||||
startTime = MPI_Wtime();
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
//////////////////////////////////////// MAIN LOOP //////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Main While Loop:
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Entering While(true) loop..";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
while (true) {
|
||||
#ifdef DEBUG_HANG_
|
||||
//if (myRank == 0)
|
||||
cout << "\n(" << myRank << ") Main loop" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
processMatchedVerticesAndSendMessages(NLVer,
|
||||
UChunkBeingProcessed,
|
||||
U,
|
||||
privateU,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
&myCard,
|
||||
&msgInd,
|
||||
&NumMessagesBundled,
|
||||
&S,
|
||||
verLocPtr,
|
||||
verLocInd,
|
||||
verDistance,
|
||||
PCounter,
|
||||
Counter,
|
||||
myRank,
|
||||
numProcs,
|
||||
candidateMate,
|
||||
GMate,
|
||||
Mate,
|
||||
Ghost2LocalMap,
|
||||
edgeLocWeight,
|
||||
QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType,
|
||||
QOwner,
|
||||
privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner,
|
||||
comm,
|
||||
&msgActual,
|
||||
Message);
|
||||
|
||||
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
|
||||
|
||||
//// BREAK IF NO MESSAGES EXPECTED /////////
|
||||
#ifdef DEBUG_HANG_
|
||||
#if 0
|
||||
cout << myRank << " Mate after ProcessMatchedAndSend phase "<<S <<endl;
|
||||
for (int i=0; i<NLVer; i++) {
|
||||
cout << Mate[i] << " " ;
|
||||
}
|
||||
cout << endl;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
|
||||
#endif
|
||||
|
||||
if (S == 0) {
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << "\n(" << myRank << ") Breaking out" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
processMessages(NLVer,
|
||||
Mate,
|
||||
candidateMate,
|
||||
Ghost2LocalMap,
|
||||
GMate,
|
||||
Counter,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
&myCard,
|
||||
&msgInd,
|
||||
&msgActual,
|
||||
edgeLocWeight,
|
||||
verDistance,
|
||||
verLocPtr,
|
||||
k,
|
||||
verLocInd,
|
||||
numProcs,
|
||||
myRank,
|
||||
comm,
|
||||
Message,
|
||||
numGhostEdges,
|
||||
u,
|
||||
v,
|
||||
&S,
|
||||
U);
|
||||
|
||||
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
|
||||
#ifdef DEBUG_HANG_
|
||||
#if 0
|
||||
cout << myRank << " Mate after ProcessMessages phase "<<S <<endl;
|
||||
for (int i=0; i<NLVer; i++) {
|
||||
cout << Mate[i] << " " ;
|
||||
}
|
||||
cout << endl;
|
||||
#endif
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
} // End of while (true)
|
||||
|
||||
clean(NLVer,
|
||||
myRank,
|
||||
MessageIndex,
|
||||
SRequest,
|
||||
SStatus,
|
||||
BufferSize,
|
||||
Buffer,
|
||||
msgActual,
|
||||
msgActualSent,
|
||||
msgInd,
|
||||
msgIndSent,
|
||||
NumMessagesBundled,
|
||||
msgPercent);
|
||||
|
||||
finishTime = MPI_Wtime();
|
||||
*ph2_time = finishTime - startTime; // Time taken for Phase-2
|
||||
*ph2_card = myCard; // Cardinality at the end of Phase-2
|
||||
}
|
||||
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
|
||||
#endif
|
||||
|
||||
#endif
|
@ -0,0 +1,91 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
// TODO comment
|
||||
|
||||
void clean(MilanLongInt NLVer,
|
||||
MilanInt myRank,
|
||||
MilanLongInt MessageIndex,
|
||||
vector<MPI_Request> &SRequest,
|
||||
vector<MPI_Status> &SStatus,
|
||||
MilanInt BufferSize,
|
||||
MilanLongInt *Buffer,
|
||||
MilanLongInt msgActual,
|
||||
MilanLongInt *msgActualSent,
|
||||
MilanLongInt msgInd,
|
||||
MilanLongInt *msgIndSent,
|
||||
MilanLongInt NumMessagesBundled,
|
||||
MilanReal *msgPercent)
|
||||
{
|
||||
// Cleanup Phase
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp master
|
||||
{
|
||||
#pragma omp task
|
||||
{
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ") Waitall= " << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << "\n(" << myRank << ") Waitall " << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
//return;
|
||||
|
||||
MPI_Waitall(MessageIndex, &SRequest[0], &SStatus[0]);
|
||||
|
||||
// MPI_Buffer_attach(&Buffer, BufferSize); //Attach the Buffer
|
||||
if (BufferSize > 0)
|
||||
{
|
||||
MPI_Buffer_detach(&Buffer, &BufferSize); // Detach the Buffer
|
||||
free(Buffer); // Free the memory that was allocated
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")End of function to compute matching: " << endl;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")myCardinality: " << myCard << endl;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")Matching took " << finishTime - startTime << "seconds" << endl;
|
||||
fflush(stdout);
|
||||
cout << "\n(" << myRank << ")** Getting out of the matching function **" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ") Number of Ghost edges = " << numGhostEdges;
|
||||
cout << "\n(" << myRank << ") Total number of potential message X 2 = " << numGhostEdges * 2;
|
||||
cout << "\n(" << myRank << ") Number messages bundled = " << NumMessagesBundled;
|
||||
cout << "\n(" << myRank << ") Total Individual Messages sent = " << msgInd;
|
||||
if (msgInd > 0)
|
||||
{
|
||||
cout << "\n(" << myRank << ") Percentage of messages bundled = " << ((double)NumMessagesBundled / (double)(msgInd)) * 100.0 << "% \n";
|
||||
}
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#pragma omp task
|
||||
{
|
||||
*msgActualSent = msgActual;
|
||||
*msgIndSent = msgInd;
|
||||
if (msgInd > 0)
|
||||
{
|
||||
*msgPercent = ((double)NumMessagesBundled / (double)(msgInd)) * 100.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*msgPercent = 0;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
if (myRank == 0)
|
||||
cout << "\n(" << myRank << ") Done" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,73 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
/**
|
||||
* Execute the research fr the Candidate Mate without controlling if the vertices are already matched.
|
||||
* Returns the vertices with the highest weight
|
||||
* @param adj1
|
||||
* @param adj2
|
||||
* @param verLocInd
|
||||
* @param edgeLocWeight
|
||||
* @return
|
||||
*/
|
||||
MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
|
||||
MilanLongInt adj2,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanReal *edgeLocWeight)
|
||||
{
|
||||
MilanInt w = -1;
|
||||
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
|
||||
int finalK;
|
||||
for (int k = adj1; k < adj2; k++) {
|
||||
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
|
||||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
|
||||
heaviestEdgeWt = edgeLocWeight[k];
|
||||
w = verLocInd[k];
|
||||
finalK = k;
|
||||
}
|
||||
} // End of for loop
|
||||
return finalK;
|
||||
}
|
||||
|
||||
/**
|
||||
* //TODO documentation
|
||||
* @param adj1
|
||||
* @param adj2
|
||||
* @param edgeLocWeight
|
||||
* @param k
|
||||
* @param verLocInd
|
||||
* @param StartIndex
|
||||
* @param EndIndex
|
||||
* @param GMate
|
||||
* @param Mate
|
||||
* @param Ghost2LocalMap
|
||||
* @return
|
||||
*/
|
||||
MilanLongInt computeCandidateMate(MilanLongInt adj1,
|
||||
MilanLongInt adj2,
|
||||
MilanReal *edgeLocWeight,
|
||||
MilanLongInt k,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
vector<MilanLongInt> &GMate,
|
||||
MilanLongInt *Mate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap)
|
||||
{
|
||||
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
|
||||
|
||||
MilanInt w = -1;
|
||||
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
|
||||
for (k = adj1; k < adj2; k++) {
|
||||
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
|
||||
continue;
|
||||
|
||||
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
|
||||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k]))) {
|
||||
heaviestEdgeWt = edgeLocWeight[k];
|
||||
w = verLocInd[k];
|
||||
}
|
||||
} // End of for loop
|
||||
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
|
||||
|
||||
return w;
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void extractUChunk(
|
||||
vector<MilanLongInt> &UChunkBeingProcessed,
|
||||
vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU)
|
||||
{
|
||||
|
||||
UChunkBeingProcessed.clear();
|
||||
#pragma omp critical(U)
|
||||
{
|
||||
|
||||
if (U.empty() && !privateU.empty()) // If U is empty but there are nodes in private U
|
||||
{
|
||||
while (!privateU.empty())
|
||||
UChunkBeingProcessed.push_back(privateU.back());
|
||||
privateU.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < UCHUNK; i++)
|
||||
{ // Pop the new nodes
|
||||
if (U.empty())
|
||||
break;
|
||||
UChunkBeingProcessed.push_back(U.back());
|
||||
U.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
} // End of critical U // End of critical U
|
||||
}
|
@ -0,0 +1,29 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
/// Find the owner of a ghost node:
|
||||
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
|
||||
MilanInt myRank, MilanInt numProcs)
|
||||
{
|
||||
|
||||
MilanLongInt mStartInd = mVerDistance[myRank];
|
||||
MilanInt Start = 0;
|
||||
MilanInt End = numProcs;
|
||||
MilanInt Current = 0;
|
||||
|
||||
while (Start <= End)
|
||||
{
|
||||
Current = (End + Start) / 2;
|
||||
// CASE-1:
|
||||
if (mVerDistance[Current] == vtxIndex) return Current;
|
||||
else // CASE 2:
|
||||
if (mVerDistance[Current] > vtxIndex)
|
||||
End = Current - 1;
|
||||
else // CASE 3:
|
||||
Start = Current + 1;
|
||||
} // End of While()
|
||||
|
||||
if (mVerDistance[Current] > vtxIndex)
|
||||
return (Current - 1);
|
||||
|
||||
return Current;
|
||||
} // End of findOwnerOfGhost()
|
@ -0,0 +1,304 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
|
||||
MilanLongInt StartIndex, MilanLongInt EndIndex,
|
||||
MilanLongInt *numGhostEdges,
|
||||
MilanLongInt *numGhostVertices,
|
||||
MilanLongInt *S,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanLongInt *verLocPtr,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
|
||||
vector<MilanLongInt> &Counter,
|
||||
vector<MilanLongInt> &verGhostPtr,
|
||||
vector<MilanLongInt> &verGhostInd,
|
||||
vector<MilanLongInt> &tempCounter,
|
||||
vector<MilanLongInt> &GMate,
|
||||
vector<MilanLongInt> &Message,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
MilanLongInt *&candidateMate,
|
||||
vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU,
|
||||
vector<MilanLongInt> &privateQLocalVtx,
|
||||
vector<MilanLongInt> &privateQGhostVtx,
|
||||
vector<MilanLongInt> &privateQMsgType,
|
||||
vector<MilanInt> &privateQOwner)
|
||||
{
|
||||
|
||||
MilanLongInt insertMe = 0;
|
||||
MilanLongInt adj1, adj2;
|
||||
int i, v, k, w;
|
||||
// index that starts with zero to |Vg| - 1
|
||||
map<MilanLongInt, MilanLongInt>::iterator storedAlready;
|
||||
|
||||
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
|
||||
{
|
||||
|
||||
#pragma omp single
|
||||
{
|
||||
|
||||
#ifdef TIME_TRACKER
|
||||
double Ghost2LocalInitialization = MPI_Wtime();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* OMP Ghost2LocalInitialization
|
||||
* This loop analyzes all the edges and when finds a ghost edge
|
||||
* puts it in the Ghost2LocalMap.
|
||||
* A critical region is needed when inserting data in the map.
|
||||
*
|
||||
* Despite the critical region it is still productive to
|
||||
* parallelize this cycle because the critical region is exeuted
|
||||
* only when a ghost edge is found and ghost edges are a minority,
|
||||
* circa 3.5% during the tests.
|
||||
*/
|
||||
#pragma omp task depend(out \
|
||||
: *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices)
|
||||
{
|
||||
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
|
||||
: numGhostEdges[:1])
|
||||
for (i = 0; i < NLEdge; i++)
|
||||
{ // O(m) - Each edge stored twice
|
||||
insertMe = verLocInd[i];
|
||||
if ((insertMe < StartIndex) || (insertMe > EndIndex))
|
||||
{ // Find a ghost
|
||||
(*numGhostEdges)++;
|
||||
#pragma omp critical
|
||||
{
|
||||
storedAlready = Ghost2LocalMap.find(insertMe);
|
||||
if (storedAlready != Ghost2LocalMap.end())
|
||||
{ // Has already been added
|
||||
Counter[storedAlready->second]++; // Increment the counter
|
||||
}
|
||||
else
|
||||
{ // Insert an entry for the ghost:
|
||||
Ghost2LocalMap[insertMe] = *numGhostVertices; // Add a map entry
|
||||
Counter.push_back(1); // Initialize the counter
|
||||
(*numGhostVertices)++; // Increment the number of ghost vertices
|
||||
} // End of else()
|
||||
}
|
||||
} // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
|
||||
} // End of for(ghost vertices)
|
||||
} // end of task depend
|
||||
|
||||
// *numGhostEdges = atomicNumGhostEdges;
|
||||
#ifdef TIME_TRACKER
|
||||
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
|
||||
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")NGhosts:" << *numGhostVertices << " GhostEdges: " << *numGhostEdges;
|
||||
if (!Ghost2LocalMap.empty())
|
||||
{
|
||||
cout << "\n(" << myRank << ")Final Map : on process ";
|
||||
cout << "\n(" << myRank << ")Key \t Value \t Counter \n";
|
||||
fflush(stdout);
|
||||
storedAlready = Ghost2LocalMap.begin();
|
||||
do
|
||||
{
|
||||
cout << storedAlready->second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl;
|
||||
fflush(stdout);
|
||||
storedAlready++;
|
||||
} while (storedAlready != Ghost2LocalMap.end());
|
||||
}
|
||||
#endif
|
||||
|
||||
#pragma omp task depend(out \
|
||||
: verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \
|
||||
: *numGhostVertices, *numGhostEdges)
|
||||
{
|
||||
|
||||
// Initialize adjacency Lists for Ghost Vertices:
|
||||
try
|
||||
{
|
||||
verGhostPtr.reserve(*numGhostVertices + 1); // Pointer Vector
|
||||
tempCounter.reserve(*numGhostVertices); // Pointer Vector
|
||||
verGhostInd.reserve(*numGhostEdges); // Index Vector
|
||||
GMate.reserve(*numGhostVertices); // Ghost Mate Vector
|
||||
}
|
||||
catch (length_error)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
|
||||
cout << "Not enough memory to allocate the internal variables \n";
|
||||
exit(1);
|
||||
}
|
||||
// Initialize the Vectors:
|
||||
verGhostPtr.resize(*numGhostVertices + 1, 0); // Pointer Vector
|
||||
tempCounter.resize(*numGhostVertices, 0); // Temporary Counter
|
||||
verGhostInd.resize(*numGhostEdges, -1); // Index Vector
|
||||
GMate.resize(*numGhostVertices, -1); // Temporary Counter
|
||||
verGhostPtr[0] = 0; // The first value
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Ghost Vertex Pointer: ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
} // End of task
|
||||
|
||||
#pragma omp task depend(out \
|
||||
: verGhostPtr) depend(in \
|
||||
: Counter, *numGhostVertices)
|
||||
{
|
||||
|
||||
#ifdef TIME_TRACKER
|
||||
double verGhostPtrInitialization = MPI_Wtime();
|
||||
#endif
|
||||
for (i = 0; i < *numGhostVertices; i++)
|
||||
{ // O(|Ghost Vertices|)
|
||||
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << verGhostPtr[i] << "\t";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef TIME_TRACKER
|
||||
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
|
||||
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
|
||||
#endif
|
||||
} // End of task
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
if (*numGhostVertices > 0)
|
||||
cout << verGhostPtr[*numGhostVertices] << "\n";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef TIME_TRACKER
|
||||
double verGhostIndInitialization = MPI_Wtime();
|
||||
#endif
|
||||
|
||||
/*
|
||||
* OMP verGhostIndInitialization
|
||||
*
|
||||
* In this cycle the verGhostInd is initialized
|
||||
* with the datas related to ghost edges.
|
||||
* The check to see if a node is a ghost node is
|
||||
* executed in paralle and when a ghost node
|
||||
* is found a critical region is started.
|
||||
*
|
||||
* Despite the critical region it's still useful to
|
||||
* parallelize the for cause the ghost nodes
|
||||
* are a minority hence the critical region is executed
|
||||
* few times, circa 3.5% of the times in the tests.
|
||||
*/
|
||||
#pragma omp task depend(in \
|
||||
: insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \
|
||||
: verGhostInd)
|
||||
{
|
||||
#pragma omp taskloop num_tasks(NUM_THREAD)
|
||||
for (v = 0; v < NLVer; v++)
|
||||
{
|
||||
adj1 = verLocPtr[v]; // Vertex Pointer
|
||||
adj2 = verLocPtr[v + 1];
|
||||
for (k = adj1; k < adj2; k++)
|
||||
{
|
||||
w = verLocInd[k]; // Get the adjacent vertex
|
||||
if ((w < StartIndex) || (w > EndIndex))
|
||||
{ // Find a ghost
|
||||
#pragma omp critical
|
||||
{
|
||||
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert
|
||||
tempCounter[Ghost2LocalMap[w]]++; // Increment the counter
|
||||
}
|
||||
verGhostInd[insertMe] = v + StartIndex; // Add the adjacency
|
||||
} // End of if((w < StartIndex) || (w > EndIndex))
|
||||
} // End of for(k)
|
||||
} // End of for (v)
|
||||
} // end of tasklopp
|
||||
|
||||
#ifdef TIME_TRACKER
|
||||
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
|
||||
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Ghost Vertex Index: ";
|
||||
for (v = 0; v < *numGhostEdges; v++)
|
||||
cout << verGhostInd[v] << "\t";
|
||||
cout << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#pragma omp task depend(in \
|
||||
: *numGhostEdges) depend(out \
|
||||
: QLocalVtx, QGhostVtx, QMsgType, QOwner)
|
||||
{
|
||||
try
|
||||
{
|
||||
QLocalVtx.reserve(*numGhostEdges); // Local Vertex
|
||||
QGhostVtx.reserve(*numGhostEdges); // Ghost Vertex
|
||||
QMsgType.reserve(*numGhostEdges); // Message Type (Request/Failure)
|
||||
QOwner.reserve(*numGhostEdges); // Owner of the ghost: COmpute once and use later
|
||||
}
|
||||
catch (length_error)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
|
||||
cout << "Not enough memory to allocate the internal variables \n";
|
||||
exit(1);
|
||||
}
|
||||
} // end of task
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Allocating CandidateMate.. ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
if (myRank == 0)
|
||||
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#pragma omp task depend(in \
|
||||
: *numGhostVertices) depend(out \
|
||||
: candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
|
||||
{
|
||||
|
||||
// Allocate Data Structures:
|
||||
/*
|
||||
* candidateMate was a vector and has been replaced with an array
|
||||
* there is no point in using the vector (or maybe there is (???))
|
||||
* so I replaced it with an array wich is slightly faster
|
||||
*/
|
||||
candidateMate = new MilanLongInt[NLVer + (*numGhostVertices)];
|
||||
|
||||
*S = (*numGhostVertices); // Initialize S with number of Ghost Vertices
|
||||
|
||||
/*
|
||||
* Create the Queue Data Structure for the Dominating Set
|
||||
*
|
||||
* I had to declare the staticuQueue U before the parallel region
|
||||
* to have it in the correct scope. Since we can't change the dimension
|
||||
* of a staticQueue I had to destroy the previous object and instantiate
|
||||
* a new one of the correct size.
|
||||
*/
|
||||
//new (&U) staticQueue(NLVer + (*numGhostVertices));
|
||||
U.reserve(NLVer + (*numGhostVertices));
|
||||
|
||||
// Initialize the private vectors
|
||||
privateQLocalVtx.reserve(*numGhostVertices);
|
||||
privateQGhostVtx.reserve(*numGhostVertices);
|
||||
privateQMsgType.reserve(*numGhostVertices);
|
||||
privateQOwner.reserve(*numGhostVertices);
|
||||
privateU.reserve(*numGhostVertices);
|
||||
} // end of task
|
||||
|
||||
} // End of single region
|
||||
} // End of parallel region
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
/**
|
||||
* //TODO documentation
|
||||
* @param k
|
||||
* @param verLocInd
|
||||
* @param StartIndex
|
||||
* @param EndIndex
|
||||
* @param GMate
|
||||
* @param Mate
|
||||
* @param Ghost2LocalMap
|
||||
* @return
|
||||
*/
|
||||
bool isAlreadyMatched(MilanLongInt node,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
vector<MilanLongInt> &GMate,
|
||||
MilanLongInt *Mate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap)
|
||||
{
|
||||
|
||||
/*
|
||||
#pragma omp critical(Mate)
|
||||
{
|
||||
if ((node < StartIndex) || (node > EndIndex)) { //Is it a ghost vertex?
|
||||
result = GMate[Ghost2LocalMap[node]] >= 0;// Already matched
|
||||
} else { //A local vertex
|
||||
result = (Mate[node - StartIndex] >= 0); // Already matched
|
||||
}
|
||||
|
||||
}
|
||||
*/
|
||||
MilanLongInt val;
|
||||
if ((node < StartIndex) || (node > EndIndex)) // if ghost vertex
|
||||
{
|
||||
#pragma omp atomic read
|
||||
val = GMate[Ghost2LocalMap[node]];
|
||||
return val >= 0; // Already matched
|
||||
}
|
||||
|
||||
// If not ghost vertex
|
||||
#pragma omp atomic read
|
||||
val = Mate[node - StartIndex];
|
||||
|
||||
return val >= 0; // Already matched
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
|
||||
MilanLongInt *verLocPtr,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanInt myRank,
|
||||
MilanReal *edgeLocWeight,
|
||||
MilanLongInt *candidateMate)
|
||||
{
|
||||
|
||||
MilanLongInt v = -1;
|
||||
|
||||
#pragma omp parallel private(v) default(shared) num_threads(NUM_THREAD)
|
||||
{
|
||||
|
||||
#pragma omp for schedule(static)
|
||||
for (v = 0; v < NLVer; v++) {
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
|
||||
candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight);
|
||||
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void PROCESS_CROSS_EDGE(MilanLongInt *edge,
|
||||
MilanLongInt *S)
|
||||
{
|
||||
// Start: PARALLEL_PROCESS_CROSS_EDGE_B
|
||||
MilanLongInt captureCounter;
|
||||
|
||||
#pragma omp atomic capture
|
||||
captureCounter = --(*edge); // Decrement
|
||||
|
||||
//assert(captureCounter >= 0);
|
||||
|
||||
if (captureCounter == 0)
|
||||
#pragma omp atomic
|
||||
(*S)--; // Decrement S
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Decrementing S: Ghost vertex " << edge << " has received all its messages";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// End: PARALLEL_PROCESS_CROSS_EDGE_B
|
||||
}
|
@ -0,0 +1,195 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
|
||||
MilanLongInt *candidateMate,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanLongInt *verLocPtr,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
MilanLongInt *Mate,
|
||||
vector<MilanLongInt> &GMate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
|
||||
MilanReal *edgeLocWeight,
|
||||
MilanLongInt *myCard,
|
||||
MilanLongInt *msgInd,
|
||||
MilanLongInt *NumMessagesBundled,
|
||||
MilanLongInt *S,
|
||||
MilanLongInt *verDistance,
|
||||
MilanLongInt *PCounter,
|
||||
vector<MilanLongInt> &Counter,
|
||||
MilanInt myRank,
|
||||
MilanInt numProcs,
|
||||
vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
vector<MilanLongInt> &privateQLocalVtx,
|
||||
vector<MilanLongInt> &privateQGhostVtx,
|
||||
vector<MilanLongInt> &privateQMsgType,
|
||||
vector<MilanInt> &privateQOwner)
|
||||
{
|
||||
|
||||
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
|
||||
MilanInt ghostOwner = 0, option, igw;
|
||||
|
||||
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
|
||||
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) \
|
||||
default(shared) num_threads(NUM_THREAD)
|
||||
|
||||
{
|
||||
#pragma omp for reduction(+ \
|
||||
: PCounter[:numProcs], myCard \
|
||||
[:1], msgInd \
|
||||
[:1], NumMessagesBundled \
|
||||
[:1]) \
|
||||
schedule(static)
|
||||
for (v = 0; v < NLVer; v++) {
|
||||
option = -1;
|
||||
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
k = candidateMate[v];
|
||||
candidateMate[v] = verLocInd[k];
|
||||
w = candidateMate[v];
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")" << v + StartIndex << " Points to: " << w;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// If found a dominating edge:
|
||||
if (w >= 0)
|
||||
{
|
||||
|
||||
#pragma omp critical(processExposed)
|
||||
{
|
||||
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap)) {
|
||||
w = computeCandidateMate(verLocPtr[v],
|
||||
verLocPtr[v + 1],
|
||||
edgeLocWeight, 0,
|
||||
verLocInd,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
GMate,
|
||||
Mate,
|
||||
Ghost2LocalMap);
|
||||
candidateMate[v] = w;
|
||||
}
|
||||
|
||||
if (w >= 0) {
|
||||
(*myCard)++;
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // w is a ghost vertex
|
||||
option = 2;
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex) {
|
||||
option = 1;
|
||||
Mate[v] = w;
|
||||
GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost
|
||||
|
||||
} // End of if CandidateMate[w] = v
|
||||
|
||||
} // End of if a Ghost Vertex
|
||||
else { // w is a local vertex
|
||||
|
||||
if (candidateMate[w - StartIndex] == (v + StartIndex)) {
|
||||
option = 3;
|
||||
Mate[v] = w; // v is local
|
||||
Mate[w - StartIndex] = v + StartIndex; // w is local
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
} // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
|
||||
} // End of Else
|
||||
|
||||
} // End of second if
|
||||
|
||||
} // End critical processExposed
|
||||
|
||||
} // End of if(w >=0)
|
||||
else {
|
||||
// This piece of code is executed a really small amount of times
|
||||
adj11 = verLocPtr[v];
|
||||
adj12 = verLocPtr[v + 1];
|
||||
for (k1 = adj11; k1 < adj12; k1++) {
|
||||
w = verLocInd[k1];
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a failure message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
(*msgInd)++;
|
||||
(*NumMessagesBundled)++;
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
// assert(ghostOwner != -1);
|
||||
// assert(ghostOwner != myRank);
|
||||
PCounter[ghostOwner]++;
|
||||
|
||||
privateQLocalVtx.push_back(v + StartIndex);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(FAILURE);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
|
||||
} // End of if(GHOST)
|
||||
} // End of for loop
|
||||
}
|
||||
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
|
||||
switch (option)
|
||||
{
|
||||
case -1:
|
||||
break;
|
||||
case 1:
|
||||
privateU.push_back(v + StartIndex);
|
||||
privateU.push_back(w);
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// Decrement the counter:
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
|
||||
case 2:
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a request message (291):";
|
||||
cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
(*msgInd)++;
|
||||
(*NumMessagesBundled)++;
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
// assert(ghostOwner != -1);
|
||||
// assert(ghostOwner != myRank);
|
||||
PCounter[ghostOwner]++;
|
||||
|
||||
privateQLocalVtx.push_back(v + StartIndex);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(REQUEST);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
break;
|
||||
case 3:
|
||||
default:
|
||||
privateU.push_back(v + StartIndex);
|
||||
privateU.push_back(w);
|
||||
break;
|
||||
}
|
||||
|
||||
} // End of for ( v=0; v < NLVer; v++ )
|
||||
|
||||
queuesTransfer(U, privateU, QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType, QOwner, privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
} // End of parallel region
|
||||
}
|
@ -0,0 +1,294 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void processMatchedVertices(
|
||||
MilanLongInt NLVer,
|
||||
vector<MilanLongInt> &UChunkBeingProcessed,
|
||||
vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
MilanLongInt *myCard,
|
||||
MilanLongInt *msgInd,
|
||||
MilanLongInt *NumMessagesBundled,
|
||||
MilanLongInt *SPtr,
|
||||
MilanLongInt *verLocPtr,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanLongInt *verDistance,
|
||||
MilanLongInt *PCounter,
|
||||
vector<MilanLongInt> &Counter,
|
||||
MilanInt myRank,
|
||||
MilanInt numProcs,
|
||||
MilanLongInt *candidateMate,
|
||||
vector<MilanLongInt> &GMate,
|
||||
MilanLongInt *Mate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
|
||||
MilanReal *edgeLocWeight,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
vector<MilanLongInt> &privateQLocalVtx,
|
||||
vector<MilanLongInt> &privateQGhostVtx,
|
||||
vector<MilanLongInt> &privateQMsgType,
|
||||
vector<MilanInt> &privateQOwner)
|
||||
{
|
||||
|
||||
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
|
||||
int option;
|
||||
MilanLongInt mateVal;
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
MilanLongInt localVertices = 0;
|
||||
#endif
|
||||
//#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
|
||||
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, \
|
||||
privateQMsgType, privateQOwner, UChunkBeingProcessed) \
|
||||
default(shared) num_threads(NUM_THREAD) \
|
||||
reduction(+ \
|
||||
: msgInd[:1], PCounter \
|
||||
[:numProcs], myCard \
|
||||
[:1], NumMessagesBundled \
|
||||
[:1])
|
||||
{
|
||||
|
||||
while (!U.empty()) {
|
||||
|
||||
extractUChunk(UChunkBeingProcessed, U, privateU);
|
||||
|
||||
for (MilanLongInt u : UChunkBeingProcessed) {
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")u: " << u;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
localVertices++;
|
||||
#endif
|
||||
|
||||
// Get the Adjacency list for u
|
||||
adj1 = verLocPtr[u - StartIndex]; // Pointer
|
||||
adj2 = verLocPtr[u - StartIndex + 1];
|
||||
for (k = adj1; k < adj2; k++) {
|
||||
option = -1;
|
||||
v = verLocInd[k];
|
||||
|
||||
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#pragma omp atomic read
|
||||
mateVal = Mate[v - StartIndex];
|
||||
// If the current vertex is pointing to a matched vertex and is not matched
|
||||
if (mateVal < 0) {
|
||||
#pragma omp critical
|
||||
{
|
||||
if (candidateMate[v - StartIndex] == u) {
|
||||
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
w = computeCandidateMate(verLocPtr[v - StartIndex],
|
||||
verLocPtr[v - StartIndex + 1],
|
||||
edgeLocWeight, 0,
|
||||
verLocInd,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
GMate,
|
||||
Mate,
|
||||
Ghost2LocalMap);
|
||||
|
||||
candidateMate[v - StartIndex] = w;
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// If found a dominating edge:
|
||||
if (w >= 0) {
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a request message:";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
#endif
|
||||
option = 2;
|
||||
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
|
||||
option = 1;
|
||||
Mate[v - StartIndex] = w; // v is a local vertex
|
||||
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
|
||||
|
||||
} // End of if CandidateMate[w] = v
|
||||
} // End of if a Ghost Vertex
|
||||
else { // w is a local vertex
|
||||
if (candidateMate[w - StartIndex] == v) {
|
||||
option = 3;
|
||||
Mate[v - StartIndex] = w; // v is a local vertex
|
||||
Mate[w - StartIndex] = v; // w is a local vertex
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
} // End of if(CandidateMate(w) = v
|
||||
} // End of Else
|
||||
} // End of if(w >=0)
|
||||
else
|
||||
option = 4; // End of Else: w == -1
|
||||
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
} // End of If (candidateMate[v-StartIndex] == u
|
||||
} // End of task
|
||||
} // mateval < 0
|
||||
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
|
||||
else { // Neighbor is a ghost vertex
|
||||
|
||||
#pragma omp critical
|
||||
{
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
|
||||
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
|
||||
if (v != Mate[u - StartIndex])
|
||||
option = 5; // u is local
|
||||
} // End of critical
|
||||
} // End of Else //A Ghost Vertex
|
||||
|
||||
switch (option)
|
||||
{
|
||||
case -1:
|
||||
// No things to do
|
||||
break;
|
||||
case 1:
|
||||
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
|
||||
privateU.push_back(v);
|
||||
privateU.push_back(w);
|
||||
|
||||
(*myCard)++;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Decrement the counter:
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
|
||||
case 2:
|
||||
|
||||
// Found a dominating edge, it is a ghost
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
// assert(ghostOwner != -1);
|
||||
// assert(ghostOwner != myRank);
|
||||
PCounter[ghostOwner]++;
|
||||
(*NumMessagesBundled)++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(v);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(REQUEST);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
break;
|
||||
case 3:
|
||||
privateU.push_back(v);
|
||||
privateU.push_back(w);
|
||||
|
||||
(*myCard)++;
|
||||
break;
|
||||
case 4:
|
||||
// Could not find a dominating vertex
|
||||
adj11 = verLocPtr[v - StartIndex];
|
||||
adj12 = verLocPtr[v - StartIndex + 1];
|
||||
for (k1 = adj11; k1 < adj12; k1++) {
|
||||
w = verLocInd[k1];
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a failure message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
// assert(ghostOwner != -1);
|
||||
// assert(ghostOwner != myRank);
|
||||
|
||||
PCounter[ghostOwner]++;
|
||||
(*NumMessagesBundled)++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(v);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(FAILURE);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
|
||||
} // End of if(GHOST)
|
||||
} // End of for loop
|
||||
break;
|
||||
case 5:
|
||||
default:
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a success message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
|
||||
// assert(ghostOwner != -1);
|
||||
// assert(ghostOwner != myRank);
|
||||
|
||||
(*NumMessagesBundled)++;
|
||||
PCounter[ghostOwner]++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(u);
|
||||
privateQGhostVtx.push_back(v);
|
||||
privateQMsgType.push_back(SUCCESS);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
|
||||
break;
|
||||
} // End of switch
|
||||
|
||||
} // End of inner for
|
||||
}
|
||||
} // End of outer for
|
||||
|
||||
queuesTransfer(U, privateU, QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType, QOwner, privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
#pragma omp critical(U)
|
||||
{
|
||||
U.insert(U.end(), privateU.begin(), privateU.end());
|
||||
}
|
||||
|
||||
privateU.clear();
|
||||
|
||||
#pragma omp critical(sendMessageTransfer)
|
||||
{
|
||||
|
||||
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
|
||||
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
|
||||
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
|
||||
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
|
||||
}
|
||||
|
||||
privateQLocalVtx.clear();
|
||||
privateQGhostVtx.clear();
|
||||
privateQMsgType.clear();
|
||||
privateQOwner.clear();
|
||||
|
||||
} // End of while ( !U.empty() )
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
printf("Count local vertexes: %ld for thread %d of processor %d\n",
|
||||
localVertices,
|
||||
omp_get_thread_num(),
|
||||
myRank);
|
||||
|
||||
#endif
|
||||
} // End of parallel region
|
||||
}
|
@ -0,0 +1,308 @@
|
||||
#include "MatchBoxPC.h"
|
||||
//#define DEBUG_HANG_
|
||||
void processMatchedVerticesAndSendMessages(
|
||||
MilanLongInt NLVer,
|
||||
vector<MilanLongInt> &UChunkBeingProcessed,
|
||||
vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
MilanLongInt *myCard,
|
||||
MilanLongInt *msgInd,
|
||||
MilanLongInt *NumMessagesBundled,
|
||||
MilanLongInt *SPtr,
|
||||
MilanLongInt *verLocPtr,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanLongInt *verDistance,
|
||||
MilanLongInt *PCounter,
|
||||
vector<MilanLongInt> &Counter,
|
||||
MilanInt myRank,
|
||||
MilanInt numProcs,
|
||||
MilanLongInt *candidateMate,
|
||||
vector<MilanLongInt> &GMate,
|
||||
MilanLongInt *Mate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
|
||||
MilanReal *edgeLocWeight,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
vector<MilanLongInt> &privateQLocalVtx,
|
||||
vector<MilanLongInt> &privateQGhostVtx,
|
||||
vector<MilanLongInt> &privateQMsgType,
|
||||
vector<MilanInt> &privateQOwner,
|
||||
MPI_Comm comm,
|
||||
MilanLongInt *msgActual,
|
||||
vector<MilanLongInt> &Message)
|
||||
{
|
||||
|
||||
MilanLongInt initialSize = QLocalVtx.size();
|
||||
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
|
||||
int option;
|
||||
MilanLongInt mateVal;
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
MilanLongInt localVertices = 0;
|
||||
#endif
|
||||
//#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
|
||||
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx,\
|
||||
privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
|
||||
num_threads(NUM_THREAD) \
|
||||
reduction(+ \
|
||||
: msgInd[:1], PCounter \
|
||||
[:numProcs], myCard \
|
||||
[:1], NumMessagesBundled \
|
||||
[:1], msgActual \
|
||||
[:1])
|
||||
{
|
||||
|
||||
while (!U.empty()) {
|
||||
|
||||
extractUChunk(UChunkBeingProcessed, U, privateU);
|
||||
|
||||
for (MilanLongInt u : UChunkBeingProcessed) {
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")u: " << u;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if ((u >= StartIndex) && (u <= EndIndex)) { // Process Only the Local Vertices
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
localVertices++;
|
||||
#endif
|
||||
|
||||
// Get the Adjacency list for u
|
||||
adj1 = verLocPtr[u - StartIndex]; // Pointer
|
||||
adj2 = verLocPtr[u - StartIndex + 1];
|
||||
for (k = adj1; k < adj2; k++) {
|
||||
option = -1;
|
||||
v = verLocInd[k];
|
||||
|
||||
if ((v >= StartIndex) && (v <= EndIndex)) { // If Local Vertex:
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#pragma omp atomic read
|
||||
mateVal = Mate[v - StartIndex];
|
||||
// If the current vertex is pointing to a matched vertex and is not matched
|
||||
if (mateVal < 0) {
|
||||
#pragma omp critical
|
||||
{
|
||||
if (candidateMate[v - StartIndex] == u) {
|
||||
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
w = computeCandidateMate(verLocPtr[v - StartIndex],
|
||||
verLocPtr[v - StartIndex + 1],
|
||||
edgeLocWeight, 0,
|
||||
verLocInd,
|
||||
StartIndex,
|
||||
EndIndex,
|
||||
GMate,
|
||||
Mate,
|
||||
Ghost2LocalMap);
|
||||
|
||||
candidateMate[v - StartIndex] = w;
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// If found a dominating edge:
|
||||
if (w >= 0) {
|
||||
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a request message:";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
#endif
|
||||
option = 2;
|
||||
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
|
||||
option = 1;
|
||||
Mate[v - StartIndex] = w; // v is a local vertex
|
||||
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
|
||||
|
||||
} // End of if CandidateMate[w] = v
|
||||
} // End of if a Ghost Vertex
|
||||
else { // w is a local vertex
|
||||
if (candidateMate[w - StartIndex] == v) {
|
||||
option = 3;
|
||||
Mate[v - StartIndex] = w; // v is a local vertex
|
||||
Mate[w - StartIndex] = v; // w is a local vertex
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
} // End of if(CandidateMate(w) = v
|
||||
} // End of Else
|
||||
} // End of if(w >=0)
|
||||
else
|
||||
option = 4; // End of Else: w == -1
|
||||
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
} // End of If (candidateMate[v-StartIndex] == u
|
||||
} // End of task
|
||||
} // mateval < 0
|
||||
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
|
||||
else { // Neighbor is a ghost vertex
|
||||
|
||||
#pragma omp critical
|
||||
{
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
|
||||
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
|
||||
if (v != Mate[u - StartIndex])
|
||||
option = 5; // u is local
|
||||
} // End of critical
|
||||
} // End of Else //A Ghost Vertex
|
||||
|
||||
switch (option)
|
||||
{
|
||||
case -1:
|
||||
// No things to do
|
||||
break;
|
||||
case 1:
|
||||
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
|
||||
privateU.push_back(v);
|
||||
privateU.push_back(w);
|
||||
(*myCard)++;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Decrement the counter:
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
|
||||
case 2:
|
||||
|
||||
// Found a dominating edge, it is a ghost
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
|
||||
// Build the Message Packet:
|
||||
// Message[0] = v; // LOCAL
|
||||
// Message[1] = w; // GHOST
|
||||
// Message[2] = REQUEST; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
|
||||
(*msgActual)++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(v);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(REQUEST);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
break;
|
||||
case 3:
|
||||
privateU.push_back(v);
|
||||
privateU.push_back(w);
|
||||
(*myCard)++;
|
||||
break;
|
||||
case 4:
|
||||
// Could not find a dominating vertex
|
||||
adj11 = verLocPtr[v - StartIndex];
|
||||
adj12 = verLocPtr[v - StartIndex + 1];
|
||||
for (k1 = adj11; k1 < adj12; k1++) {
|
||||
w = verLocInd[k1];
|
||||
if ((w < StartIndex) || (w > EndIndex)) { // A ghost
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a failure message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
|
||||
// Build the Message Packet:
|
||||
// Message[0] = v; // LOCAL
|
||||
// Message[1] = w; // GHOST
|
||||
// Message[2] = FAILURE; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
|
||||
(*msgActual)++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(v);
|
||||
privateQGhostVtx.push_back(w);
|
||||
privateQMsgType.push_back(FAILURE);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
|
||||
} // End of if(GHOST)
|
||||
} // End of for loop
|
||||
break;
|
||||
case 5:
|
||||
default:
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a success message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
|
||||
|
||||
// Build the Message Packet:
|
||||
// Message[0] = u; // LOCAL
|
||||
// Message[1] = v; // GHOST
|
||||
// Message[2] = SUCCESS; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
|
||||
(*msgActual)++;
|
||||
(*msgInd)++;
|
||||
|
||||
privateQLocalVtx.push_back(u);
|
||||
privateQGhostVtx.push_back(v);
|
||||
privateQMsgType.push_back(SUCCESS);
|
||||
privateQOwner.push_back(ghostOwner);
|
||||
|
||||
break;
|
||||
} // End of switch
|
||||
} // End of inner for
|
||||
}
|
||||
} // End of outer for
|
||||
|
||||
queuesTransfer(U, privateU, QLocalVtx,
|
||||
QGhostVtx,
|
||||
QMsgType, QOwner, privateQLocalVtx,
|
||||
privateQGhostVtx,
|
||||
privateQMsgType,
|
||||
privateQOwner);
|
||||
|
||||
} // End of while ( !U.empty() )
|
||||
|
||||
#ifdef COUNT_LOCAL_VERTEX
|
||||
printf("Count local vertexes: %ld for thread %d of processor %d\n",
|
||||
localVertices,
|
||||
omp_get_thread_num(),
|
||||
myRank);
|
||||
|
||||
#endif
|
||||
} // End of parallel region
|
||||
|
||||
// Send the messages
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank<<" Sending: "<<QOwner.size()-initialSize<<" messages" <<endl;
|
||||
#endif
|
||||
for (int i = initialSize; i < QOwner.size(); i++) {
|
||||
|
||||
Message[0] = QLocalVtx[i];
|
||||
Message[1] = QGhostVtx[i];
|
||||
Message[2] = QMsgType[i];
|
||||
ghostOwner = QOwner[i];
|
||||
|
||||
//MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
|
||||
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
}
|
||||
#ifdef DEBUG_HANG_
|
||||
cout << myRank<<" Done sending messages"<<endl;
|
||||
#endif
|
||||
}
|
@ -0,0 +1,315 @@
|
||||
#include "MatchBoxPC.h"
|
||||
//#define DEBUG_HANG_
|
||||
|
||||
void processMessages(
|
||||
MilanLongInt NLVer,
|
||||
MilanLongInt *Mate,
|
||||
MilanLongInt *candidateMate,
|
||||
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
|
||||
vector<MilanLongInt> &GMate,
|
||||
vector<MilanLongInt> &Counter,
|
||||
MilanLongInt StartIndex,
|
||||
MilanLongInt EndIndex,
|
||||
MilanLongInt *myCard,
|
||||
MilanLongInt *msgInd,
|
||||
MilanLongInt *msgActual,
|
||||
MilanReal *edgeLocWeight,
|
||||
MilanLongInt *verDistance,
|
||||
MilanLongInt *verLocPtr,
|
||||
MilanLongInt k,
|
||||
MilanLongInt *verLocInd,
|
||||
MilanInt numProcs,
|
||||
MilanInt myRank,
|
||||
MPI_Comm comm,
|
||||
vector<MilanLongInt> &Message,
|
||||
MilanLongInt numGhostEdges,
|
||||
MilanLongInt u,
|
||||
MilanLongInt v,
|
||||
MilanLongInt *S,
|
||||
vector<MilanLongInt> &U)
|
||||
{
|
||||
|
||||
//#define PRINT_DEBUG_INFO_
|
||||
|
||||
MilanInt Sender;
|
||||
MPI_Status computeStatus;
|
||||
MilanLongInt bundleSize, w;
|
||||
MilanLongInt adj11, adj12, k1;
|
||||
MilanLongInt ghostOwner;
|
||||
int error_codeC;
|
||||
error_codeC = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
|
||||
char error_message[MPI_MAX_ERROR_STRING];
|
||||
int message_length;
|
||||
MilanLongInt message_type = 0;
|
||||
|
||||
// Buffer to receive bundled messages
|
||||
// Maximum messages that can be received from any processor is
|
||||
// twice the edge cut: REQUEST; REQUEST+(FAILURE/SUCCESS)
|
||||
vector<MilanLongInt> ReceiveBuffer;
|
||||
try
|
||||
{
|
||||
ReceiveBuffer.reserve(numGhostEdges * 2 * 3); // Three integers per cross edge
|
||||
}
|
||||
catch (length_error)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
|
||||
cout << "Not enough memory to allocate the internal variables \n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout
|
||||
<< "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")About to begin Message processing phase ... *S=" << *S << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << "=========================************===============================" << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// BLOCKING RECEIVE:
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << " Waiting for blocking receive..." << endl;
|
||||
fflush(stdout);
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
//cout << myRank<<" Receiving ...";
|
||||
error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus);
|
||||
if (error_codeC != MPI_SUCCESS)
|
||||
{
|
||||
MPI_Error_string(error_codeC, error_message, &message_length);
|
||||
cout << "\n*Error in call to MPI_Receive on Slave: " << error_message << "\n";
|
||||
fflush(stdout);
|
||||
}
|
||||
Sender = computeStatus.MPI_SOURCE;
|
||||
//cout << " ...from "<<Sender << endl;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
if (Message[2] == SIZEINFO) {
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
bundleSize = Message[0]; //#of integers in the message
|
||||
// Build the Message Buffer:
|
||||
if (!ReceiveBuffer.empty())
|
||||
ReceiveBuffer.clear(); // Empty it out first
|
||||
ReceiveBuffer.resize(bundleSize, -1); // Initialize
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Message Bundle Before: " << endl;
|
||||
for (int i = 0; i < bundleSize; i++)
|
||||
cout << ReceiveBuffer[i] << ",";
|
||||
cout << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Receive the message
|
||||
//cout << myRank<<" Receiving from "<<Sender<<endl;
|
||||
error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus);
|
||||
if (error_codeC != MPI_SUCCESS) {
|
||||
MPI_Error_string(error_codeC, error_message, &message_length);
|
||||
cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n";
|
||||
fflush(stdout);
|
||||
}
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Message Bundle After: " << endl;
|
||||
for (int i = 0; i < bundleSize; i++)
|
||||
cout << ReceiveBuffer[i] << ",";
|
||||
cout << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
} else { // Just a single message:
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// Add the current message to Queue:
|
||||
bundleSize = 3; //#of integers in the message
|
||||
// Build the Message Buffer:
|
||||
if (!ReceiveBuffer.empty())
|
||||
ReceiveBuffer.clear(); // Empty it out first
|
||||
ReceiveBuffer.resize(bundleSize, -1); // Initialize
|
||||
|
||||
ReceiveBuffer[0] = Message[0]; // u
|
||||
ReceiveBuffer[1] = Message[1]; // v
|
||||
ReceiveBuffer[2] = Message[2]; // message_type
|
||||
}
|
||||
|
||||
#ifdef DEBUG_GHOST_
|
||||
if ((v < StartIndex) || (v > EndIndex)) {
|
||||
cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl;
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Processing message: u= " << u << " v= " << v << " Type= " << message_type << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop
|
||||
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3) {
|
||||
u = ReceiveBuffer[bundleCounter - 3]; // GHOST
|
||||
v = ReceiveBuffer[bundleCounter - 2]; // LOCAL
|
||||
message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
|
||||
|
||||
// CASE I: REQUEST
|
||||
if (message_type == REQUEST) {
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Message type is REQUEST" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
#ifdef DEBUG_GHOST_
|
||||
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
|
||||
cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if (Mate[v - StartIndex] == -1) {
|
||||
// Process only if not already matched (v is local)
|
||||
candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost
|
||||
if (candidateMate[v - StartIndex] == u) {
|
||||
GMate[Ghost2LocalMap[u]] = v; // u is ghost
|
||||
Mate[v - StartIndex] = u; // v is local
|
||||
U.push_back(v);
|
||||
U.push_back(u);
|
||||
(*myCard)++;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
|
||||
} // End of if ( candidateMate[v-StartIndex] == u )e
|
||||
} // End of if ( Mate[v] == -1 )
|
||||
} // End of REQUEST
|
||||
else { // CASE II: SUCCESS
|
||||
if (message_type == SUCCESS) {
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Message type is SUCCESS" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
|
||||
#ifdef DEBUG_GHOST_
|
||||
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer)) {
|
||||
cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
|
||||
fflush(stdout);
|
||||
}
|
||||
#endif
|
||||
if (Mate[v - StartIndex] == -1) {
|
||||
// Process only if not already matched ( v is local)
|
||||
if (candidateMate[v - StartIndex] == u) {
|
||||
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k,
|
||||
verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap);
|
||||
candidateMate[v - StartIndex] = w;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
// If found a dominating edge:
|
||||
if (w >= 0) {
|
||||
if ((w < StartIndex) || (w > EndIndex)) {
|
||||
// w is a ghost
|
||||
// Build the Message Packet:
|
||||
Message[0] = v; // LOCAL
|
||||
Message[1] = w; // GHOST
|
||||
Message[2] = REQUEST; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a request message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
//assert(ghostOwner != -1);
|
||||
//assert(ghostOwner != myRank);
|
||||
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
|
||||
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
(*msgInd)++;
|
||||
(*msgActual)++;
|
||||
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v) {
|
||||
Mate[v - StartIndex] = w; // v is local
|
||||
GMate[Ghost2LocalMap[w]] = v; // w is ghost
|
||||
U.push_back(v);
|
||||
U.push_back(w);
|
||||
(*myCard)++;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
|
||||
} // End of if CandidateMate[w] = v
|
||||
} // End of if a Ghost Vertex
|
||||
else { // w is a local vertex
|
||||
if (candidateMate[w - StartIndex] == v) {
|
||||
Mate[v - StartIndex] = w; // v is local
|
||||
Mate[w - StartIndex] = v; // w is local
|
||||
// Q.push_back(u);
|
||||
U.push_back(v);
|
||||
U.push_back(w);
|
||||
(*myCard)++;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
} // End of if(CandidateMate(w) = v
|
||||
} // End of Else
|
||||
} // End of if(w >=0)
|
||||
else { // No dominant edge found
|
||||
adj11 = verLocPtr[v - StartIndex];
|
||||
adj12 = verLocPtr[v - StartIndex + 1];
|
||||
for (k1 = adj11; k1 < adj12; k1++) {
|
||||
w = verLocInd[k1];
|
||||
if ((w < StartIndex) || (w > EndIndex)) {
|
||||
// A ghost
|
||||
// Build the Message Packet:
|
||||
Message[0] = v; // LOCAL
|
||||
Message[1] = w; // GHOST
|
||||
Message[2] = FAILURE; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending a failure message: ";
|
||||
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
||||
//assert(ghostOwner != -1);
|
||||
//assert(ghostOwner != myRank);
|
||||
//cout << myRank<<" Sending to "<<ghostOwner<<endl;
|
||||
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
|
||||
(*msgInd)++;
|
||||
(*msgActual)++;
|
||||
} // End of if(GHOST)
|
||||
} // End of for loop
|
||||
} // End of Else: w == -1
|
||||
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
||||
} // End of if ( candidateMate[v-StartIndex] == u )
|
||||
} // End of if ( Mate[v] == -1 )
|
||||
} // End of if ( message_type == SUCCESS )
|
||||
else {
|
||||
// CASE III: FAILURE
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Message type is FAILURE" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore
|
||||
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter
|
||||
} // End of else: CASE III
|
||||
} // End of else: CASE I
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void queuesTransfer(vector<MilanLongInt> &U,
|
||||
vector<MilanLongInt> &privateU,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
vector<MilanLongInt> &privateQLocalVtx,
|
||||
vector<MilanLongInt> &privateQGhostVtx,
|
||||
vector<MilanLongInt> &privateQMsgType,
|
||||
vector<MilanInt> &privateQOwner)
|
||||
{
|
||||
|
||||
#pragma omp critical(U)
|
||||
{
|
||||
U.insert(U.end(), privateU.begin(), privateU.end());
|
||||
}
|
||||
|
||||
privateU.clear();
|
||||
|
||||
#pragma omp critical(sendMessageTransfer)
|
||||
{
|
||||
|
||||
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
|
||||
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
|
||||
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
|
||||
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
|
||||
}
|
||||
|
||||
privateQLocalVtx.clear();
|
||||
privateQGhostVtx.clear();
|
||||
privateQMsgType.clear();
|
||||
privateQOwner.clear();
|
||||
|
||||
}
|
@ -0,0 +1,209 @@
|
||||
#include "MatchBoxPC.h"
|
||||
|
||||
void sendBundledMessages(MilanLongInt *numGhostEdges,
|
||||
MilanInt *BufferSize,
|
||||
MilanLongInt *Buffer,
|
||||
vector<MilanLongInt> &PCumulative,
|
||||
vector<MilanLongInt> &PMessageBundle,
|
||||
vector<MilanLongInt> &PSizeInfoMessages,
|
||||
MilanLongInt *PCounter,
|
||||
MilanLongInt NumMessagesBundled,
|
||||
MilanLongInt *msgActual,
|
||||
MilanLongInt *msgInd,
|
||||
MilanInt numProcs,
|
||||
MilanInt myRank,
|
||||
MPI_Comm comm,
|
||||
vector<MilanLongInt> &QLocalVtx,
|
||||
vector<MilanLongInt> &QGhostVtx,
|
||||
vector<MilanLongInt> &QMsgType,
|
||||
vector<MilanInt> &QOwner,
|
||||
vector<MPI_Request> &SRequest,
|
||||
vector<MPI_Status> &SStatus)
|
||||
{
|
||||
|
||||
MilanLongInt myIndex = 0, numMessagesToSend;
|
||||
MilanInt i = 0, OneMessageSize = 0;
|
||||
|
||||
#ifdef DEBUG_HANG_
|
||||
if (myRank == 0)
|
||||
cout << "\n(" << myRank << ") Send Bundles" << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
#pragma omp parallel private(i) default(shared) num_threads(NUM_THREAD)
|
||||
{
|
||||
#pragma omp master
|
||||
{
|
||||
// Data structures for Bundled Messages:
|
||||
#pragma omp task depend(inout \
|
||||
: PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \
|
||||
: NumMessagesBundled, numProcs)
|
||||
{
|
||||
try {
|
||||
PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message
|
||||
PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure
|
||||
PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets
|
||||
}
|
||||
catch (length_error)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
|
||||
cout << "Not enough memory to allocate the internal variables \n";
|
||||
exit(1);
|
||||
}
|
||||
PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize
|
||||
PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable
|
||||
PSizeInfoMessages.resize(numProcs * 3, 0);
|
||||
}
|
||||
|
||||
#pragma omp task depend(inout \
|
||||
: PCumulative) depend(in \
|
||||
: PCounter)
|
||||
{
|
||||
for (i = 0; i < numProcs; i++)
|
||||
PCumulative[i + 1] = PCumulative[i] + PCounter[i];
|
||||
}
|
||||
|
||||
#pragma omp task depend(inout \
|
||||
: PCounter)
|
||||
{
|
||||
// Reuse PCounter to keep track of how many messages were inserted:
|
||||
for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
|
||||
PCounter[i] = 0;
|
||||
}
|
||||
|
||||
// Build the Message Bundle packet:
|
||||
#pragma omp task depend(in \
|
||||
: PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \
|
||||
: myIndex, PMessageBundle, PCounter)
|
||||
{
|
||||
for (i = 0; i < NumMessagesBundled; i++) {
|
||||
myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3;
|
||||
PMessageBundle[myIndex + 0] = QLocalVtx[i];
|
||||
PMessageBundle[myIndex + 1] = QGhostVtx[i];
|
||||
PMessageBundle[myIndex + 2] = QMsgType[i];
|
||||
PCounter[QOwner[i]]++;
|
||||
}
|
||||
}
|
||||
|
||||
// Send the Bundled Messages: Use ISend
|
||||
#pragma omp task depend(out \
|
||||
: SRequest, SStatus)
|
||||
{
|
||||
try
|
||||
{
|
||||
SRequest.reserve(numProcs * 2); // At most two messages per processor
|
||||
SStatus.reserve(numProcs * 2); // At most two messages per processor
|
||||
}
|
||||
catch (length_error)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n";
|
||||
cout << "Not enough memory to allocate the internal variables \n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Send the Messages
|
||||
#pragma omp task depend(inout \
|
||||
: SRequest, PSizeInfoMessages, PCumulative) depend(out \
|
||||
: *msgActual, *msgInd)
|
||||
{
|
||||
for (i = 0; i < numProcs; i++) { // Changed by Fabio to be an integer, addresses needs to be integers!
|
||||
if (i == myRank) // Do not send anything to yourself
|
||||
continue;
|
||||
// Send the Message with information about the size of next message:
|
||||
// Build the Message Packet:
|
||||
PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message
|
||||
PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet
|
||||
PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE
|
||||
// Send a Request (Asynchronous)
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if (PSizeInfoMessages[i * 3 + 0] > 0)
|
||||
{ // Send only if it is a nonempty packet
|
||||
MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm,
|
||||
&SRequest[(*msgInd)]);
|
||||
(*msgActual)++;
|
||||
(*msgInd)++;
|
||||
// Now Send the message with the data packet:
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl;
|
||||
for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++)
|
||||
cout << PMessageBundle[k] << ",";
|
||||
cout << endl;
|
||||
fflush(stdout);
|
||||
#endif
|
||||
MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0],
|
||||
TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[(*msgInd)]);
|
||||
(*msgInd)++;
|
||||
} // End of if size > 0
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp task depend(inout \
|
||||
: PCumulative, QLocalVtx, QGhostVtx, QMsgType, QOwner)
|
||||
{
|
||||
|
||||
// Free up temporary memory:
|
||||
PCumulative.clear();
|
||||
QLocalVtx.clear();
|
||||
QGhostVtx.clear();
|
||||
QMsgType.clear();
|
||||
QOwner.clear();
|
||||
}
|
||||
|
||||
#pragma omp task depend(inout : OneMessageSize, *BufferSize) depend(out : numMessagesToSend) depend(in : *numGhostEdges)
|
||||
{
|
||||
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
|
||||
cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2;
|
||||
cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled;
|
||||
if (*numGhostEdges > 0)
|
||||
{
|
||||
cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n";
|
||||
}
|
||||
fflush(stdout);
|
||||
#endif
|
||||
|
||||
// Allocate memory for MPI Send messages:
|
||||
/* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */
|
||||
OneMessageSize = 0;
|
||||
MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); // Size of one message packet
|
||||
// How many messages to send?
|
||||
// Potentially three kinds of messages will be sent/received:
|
||||
// Request, Success, Failure.
|
||||
// But only two will be sent from a given processor.
|
||||
// Substract the number of messages that have already been sent as bundled messages:
|
||||
numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled;
|
||||
*BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend;
|
||||
}
|
||||
|
||||
#pragma omp task depend(out : Buffer) depend(in : *BufferSize)
|
||||
{
|
||||
Buffer = 0;
|
||||
#ifdef PRINT_DEBUG_INFO_
|
||||
cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize;
|
||||
cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD;
|
||||
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
|
||||
cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend;
|
||||
cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize);
|
||||
cout << "\n(" << myRank << ")Attaching Buffer on.. ";
|
||||
fflush(stdout);
|
||||
#endif
|
||||
if ((*BufferSize) > 0)
|
||||
{
|
||||
Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory
|
||||
if (Buffer == 0)
|
||||
{
|
||||
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
|
||||
cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n";
|
||||
exit(1);
|
||||
}
|
||||
MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
cd amgprec/impl/aggregator/
|
||||
rm MatchBoxPC.o
|
||||
rm sendBundledMessages.o
|
||||
rm initialize.o
|
||||
rm extractUChunk.o
|
||||
rm isAlreadyMatched.o
|
||||
rm findOwnerOfGhost.o
|
||||
rm computeCandidateMate.o
|
||||
rm parallelComputeCandidateMateB.o
|
||||
rm processMatchedVertices.o
|
||||
rm processCrossEdge.o
|
||||
rm queueTransfer.o
|
||||
rm processMessages.o
|
||||
rm processExposedVertex.o
|
||||
rm algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o
|
||||
rm algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
|
||||
cd ../../../
|
||||
make all
|
||||
cd samples/advanced/pdegen
|
||||
make amg_d_pde3d
|
||||
cd runs
|
||||
mpirun -np 4 amg_d_pde3d amg_pde3d.inp
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue