#include "MatchBoxPC.h" // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching // Mahantesh Halappanavar (hala@pnnl.gov) // Pacific Northwest National Laboratory // // *********************************************************************** // // Copyright (2021) Battelle Memorial Institute // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the copyright holder nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // ************************************************************************ ////////////////////////////////////////////////////////////////////////////////////// /////////////////////////// DOMINATING EDGES MODEL /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// /* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate() * * Date : New update: Feb 17, 2019, Richland, Washington. * Date : Original development: May 17, 2009, E&CS Bldg. * * Purpose : Compute Approximate Maximum Weight Matching in Linear Time * * Args : inputMatrix - instance of Compressed-Col format of Matrix * Mate - The Mate array * * Returns : By Value: (void) * By Reference: Mate * * Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge. * Assumption: The Mate Array is empty. */ /* NLVer = #of vertices, NLEdge = #of edges CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers) verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process Mate = A vector of size |V_p| (local subgraph) to store the output (matching) MPI: myRank, numProcs, comm, Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world Statistics: ph0_time, ph1_time, ph2_time: Runtimes Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) */ //#define DEBUG_HANG_ #ifdef SERIAL_MPI #else // DOUBLE PRECISION VERSION // WARNING: The vertex block on a given rank is contiguous void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, MilanLongInt *verDistance, MilanLongInt *Mate, MilanInt myRank, MilanInt numProcs, MPI_Comm comm, MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, MilanLongInt *ph1_card, MilanLongInt *ph2_card) { /* * verDistance: it's a vector long as the number of processors. * verDistance[i] contains the first node index of the i-th processor * verDistance[i + 1] contains the last node index of the i-th processor * NLVer: number of elements in the LocPtr * NLEdge: number of edges assigned to the current processor * * Contains the portion of matrix assigned to the processor in * Yale notation * verLocInd: contains the positions on row of the matrix * verLocPtr: i-th value is the position of the first element on the i-th row and * i+1-th value is the position of the first element on the i+1-th row */ #if !defined(SERIAL_MPI) #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ") verDistance [" ; for (int i = 0; i < numProcs; i++) cout << verDistance[i] << "," << verDistance[i+1]; cout << "]\n"; fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) { cout << "\n(" << myRank << ") verDistance [" ; for (int i = 0; i < numProcs; i++) cout << verDistance[i] << "," ; cout << verDistance[numProcs]<< "]\n"; } fflush(stdout); #endif // The starting vertex owned by the current rank MilanLongInt StartIndex = verDistance[myRank]; // The ending vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank + 1] - 1; MPI_Status computeStatus; MilanLongInt msgActual = 0, msgInd = 0; MilanReal heaviestEdgeWt = 0.0f; // Assumes positive weight MilanReal startTime, finishTime; startTime = MPI_Wtime(); // Data structures for sending and receiving messages: vector Message; // [ u, v, message_type ] Message.resize(3, -1); // Data structures for Message Bundling: // Although up to two messages can be sent along any cross edge, // only one message will be sent in the initialization phase - // one of: REQUEST/FAILURE/SUCCESS vector QLocalVtx, QGhostVtx, QMsgType; // Changed by Fabio to be an integer, addresses needs to be integers! vector QOwner; MilanLongInt *PCounter = new MilanLongInt[numProcs]; for (int i = 0; i < numProcs; i++) PCounter[i] = 0; MilanLongInt NumMessagesBundled = 0; // TODO when the last computational section will be refactored this could be eliminated // Changed by Fabio to be an integer, addresses needs to be integers! MilanInt ghostOwner = 0; MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; fflush(stdout); cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex; fflush(stdout); #endif // Other Variables: MilanLongInt u = -1, v = -1, w = -1, i = 0; MilanLongInt k = -1, adj1 = -1, adj2 = -1; MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; MilanLongInt myCard = 0; // Build the Ghost Vertex Set: Vg // Map each ghost vertex to a local vertex map Ghost2LocalMap; // Store the edge count for each ghost vertex vector Counter; // Number of Ghost vertices MilanLongInt numGhostVertices = 0, numGhostEdges = 0; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif // Define Adjacency Lists for Ghost Vertices: // cout<<"Building Ghost data structures ... \n\n"; vector verGhostPtr, verGhostInd, tempCounter; // Mate array for ghost vertices: vector GMate; // Proportional to the number of ghost vertices MilanLongInt S; MilanLongInt privateMyCard = 0; vector PCumulative, PMessageBundle, PSizeInfoMessages; vector SRequest; // Requests that are used for each send message vector SStatus; // Status of sent messages, used in MPI_Wait MilanLongInt MessageIndex = 0; // Pointer for current message MilanInt BufferSize; MilanLongInt *Buffer; vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; vector privateQOwner; vector U, privateU; initialize(NLVer, NLEdge, StartIndex, EndIndex, &numGhostEdges, &numGhostVertices, &S, verLocInd, verLocPtr, Ghost2LocalMap, Counter, verGhostPtr, verGhostInd, tempCounter, GMate, Message, QLocalVtx, QGhostVtx, QMsgType, QOwner, candidateMate, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); finishTime = MPI_Wtime(); *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization #ifdef DEBUG_HANG_ cout << myRank << " Finished initialization" << endl; fflush(stdout); #endif startTime = MPI_Wtime(); ///////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// INITIALIZATION ///////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Compute the Initial Matching Set: /* * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize * the two. * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. */ PARALLEL_COMPUTE_CANDIDATE_MATE_BD(NLVer, verLocPtr, verLocInd, myRank, edgeLocWeight, candidateMate); #ifdef DEBUG_HANG_ cout << myRank << " Finished Exposed Vertex" << endl; fflush(stdout); #if 0 cout << myRank << " candidateMate after parallelCompute " < UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); processMatchedVerticesD(NLVer, UChunkBeingProcessed, U, privateU, StartIndex, EndIndex, &myCard, &msgInd, &NumMessagesBundled, &S, verLocPtr, verLocInd, verDistance, PCounter, Counter, myRank, numProcs, candidateMate, GMate, Mate, Ghost2LocalMap, edgeLocWeight, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); #ifdef DEBUG_HANG_ cout << myRank << " Finished Process Vertices" << endl; fflush(stdout); #if 0 cout << myRank << " Mate after Matched Vertices " < Message; // [ u, v, message_type ] Message.resize(3, -1); // Data structures for Message Bundling: // Although up to two messages can be sent along any cross edge, // only one message will be sent in the initialization phase - // one of: REQUEST/FAILURE/SUCCESS vector QLocalVtx, QGhostVtx, QMsgType; // Changed by Fabio to be an integer, addresses needs to be integers! vector QOwner; MilanLongInt *PCounter = new MilanLongInt[numProcs]; for (int i = 0; i < numProcs; i++) PCounter[i] = 0; MilanLongInt NumMessagesBundled = 0; // TODO when the last computational section will be refactored this could be eliminated // Changed by Fabio to be an integer, addresses needs to be integers! MilanInt ghostOwner = 0; MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; fflush(stdout); cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex; fflush(stdout); #endif // Other Variables: MilanLongInt u = -1, v = -1, w = -1, i = 0; MilanLongInt k = -1, adj1 = -1, adj2 = -1; MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; MilanLongInt myCard = 0; // Build the Ghost Vertex Set: Vg // Map each ghost vertex to a local vertex map Ghost2LocalMap; // Store the edge count for each ghost vertex vector Counter; // Number of Ghost vertices MilanLongInt numGhostVertices = 0, numGhostEdges = 0; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif // Define Adjacency Lists for Ghost Vertices: // cout<<"Building Ghost data structures ... \n\n"; vector verGhostPtr, verGhostInd, tempCounter; // Mate array for ghost vertices: vector GMate; // Proportional to the number of ghost vertices MilanLongInt S; MilanLongInt privateMyCard = 0; vector PCumulative, PMessageBundle, PSizeInfoMessages; vector SRequest; // Requests that are used for each send message vector SStatus; // Status of sent messages, used in MPI_Wait MilanLongInt MessageIndex = 0; // Pointer for current message MilanInt BufferSize; MilanLongInt *Buffer; vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; vector privateQOwner; vector U, privateU; initialize(NLVer, NLEdge, StartIndex, EndIndex, &numGhostEdges, &numGhostVertices, &S, verLocInd, verLocPtr, Ghost2LocalMap, Counter, verGhostPtr, verGhostInd, tempCounter, GMate, Message, QLocalVtx, QGhostVtx, QMsgType, QOwner, candidateMate, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); finishTime = MPI_Wtime(); *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization #ifdef DEBUG_HANG_ cout << myRank << " Finished initialization" << endl; fflush(stdout); #endif startTime = MPI_Wtime(); ///////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// INITIALIZATION ///////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Compute the Initial Matching Set: /* * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize * the two. * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. */ PARALLEL_COMPUTE_CANDIDATE_MATE_BS(NLVer, verLocPtr, verLocInd, myRank, edgeLocWeight, candidateMate); #ifdef DEBUG_HANG_ cout << myRank << " Finished Exposed Vertex" << endl; fflush(stdout); #if 0 cout << myRank << " candidateMate after parallelCompute " < UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); processMatchedVerticesS(NLVer, UChunkBeingProcessed, U, privateU, StartIndex, EndIndex, &myCard, &msgInd, &NumMessagesBundled, &S, verLocPtr, verLocInd, verDistance, PCounter, Counter, myRank, numProcs, candidateMate, GMate, Mate, Ghost2LocalMap, edgeLocWeight, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); #ifdef DEBUG_HANG_ cout << myRank << " Finished Process Vertices" << endl; fflush(stdout); #if 0 cout << myRank << " Mate after Matched Vertices " <