#include "MatchBoxPC.h" // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching // Mahantesh Halappanavar (hala@pnnl.gov) // Pacific Northwest National Laboratory // // *********************************************************************** // // Copyright (2021) Battelle Memorial Institute // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions // are met: // // 1. Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // // 2. Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // // 3. Neither the name of the copyright holder nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // ************************************************************************ ////////////////////////////////////////////////////////////////////////////////////// /////////////////////////// DOMINATING EDGES MODEL /////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////// /* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate() * * Date : New update: Feb 17, 2019, Richland, Washington. * Date : Original development: May 17, 2009, E&CS Bldg. * * Purpose : Compute Approximate Maximum Weight Matching in Linear Time * * Args : inputMatrix - instance of Compressed-Col format of Matrix * Mate - The Mate array * * Returns : By Value: (void) * By Reference: Mate * * Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge. * Assumption: The Mate Array is empty. */ /* NLVer = #of vertices, NLEdge = #of edges CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers) verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process Mate = A vector of size |V_p| (local subgraph) to store the output (matching) MPI: myRank, numProcs, comm, Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world Statistics: ph0_time, ph1_time, ph2_time: Runtimes Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) */ #ifdef SERIAL_MPI #else // DOUBLE PRECISION VERSION // WARNING: The vertex block on a given rank is contiguous void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight, MilanLongInt *verDistance, MilanLongInt *Mate, MilanInt myRank, MilanInt numProcs, MPI_Comm comm, MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent, MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time, MilanLongInt *ph1_card, MilanLongInt *ph2_card) { /* * verDistance: it's a vector long as the number of processors. * verDistance[i] contains the first node index of the i-th processor * verDistance[i + 1] contains the last node index of the i-th processor * NLVer: number of elements in the LocPtr * NLEdge: number of edges assigned to the current processor * * Contains the portion of matrix assigned to the processor in * Yale notation * verLocInd: contains the positions on row of the matrix * verLocPtr: i-th value is the position of the first element on the i-th row and * i+1-th value is the position of the first element on the i+1-th row */ #if !defined(SERIAL_MPI) #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]"; fflush(stdout); #endif MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank MPI_Status computeStatus; MilanLongInt msgActual = 0, msgInd = 0; MilanReal heaviestEdgeWt = 0.0f; // Assumes positive weight MilanReal startTime, finishTime; startTime = MPI_Wtime(); // Data structures for sending and receiving messages: vector Message; // [ u, v, message_type ] Message.resize(3, -1); // Data structures for Message Bundling: // Although up to two messages can be sent along any cross edge, // only one message will be sent in the initialization phase - // one of: REQUEST/FAILURE/SUCCESS vector QLocalVtx, QGhostVtx, QMsgType; vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! MilanLongInt *PCounter = new MilanLongInt[numProcs]; for (int i = 0; i < numProcs; i++) PCounter[i] = 0; MilanLongInt NumMessagesBundled = 0; // TODO when the last computational section will be refactored this could be eliminated MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers! MilanLongInt *candidateMate = nullptr; #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge; fflush(stdout); cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex; fflush(stdout); #endif // Other Variables: MilanLongInt u = -1, v = -1, w = -1, i = 0; MilanLongInt k = -1, adj1 = -1, adj2 = -1; MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; MilanLongInt myCard = 0; // Build the Ghost Vertex Set: Vg map Ghost2LocalMap; // Map each ghost vertex to a local vertex vector Counter; // Store the edge count for each ghost vertex MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ")About to compute Ghost Vertices..."; fflush(stdout); #endif // Define Adjacency Lists for Ghost Vertices: // cout<<"Building Ghost data structures ... \n\n"; vector verGhostPtr, verGhostInd, tempCounter; // Mate array for ghost vertices: vector GMate; // Proportional to the number of ghost vertices MilanLongInt S; MilanLongInt privateMyCard = 0; vector PCumulative, PMessageBundle, PSizeInfoMessages; vector SRequest; // Requests that are used for each send message vector SStatus; // Status of sent messages, used in MPI_Wait MilanLongInt MessageIndex = 0; // Pointer for current message MilanInt BufferSize; MilanLongInt *Buffer; vector privateQLocalVtx, privateQGhostVtx, privateQMsgType; vector privateQOwner; vector U, privateU; initialize(NLVer, NLEdge, StartIndex, EndIndex, &numGhostEdges, &numGhostVertices, &S, verLocInd, verLocPtr, Ghost2LocalMap, Counter, verGhostPtr, verGhostInd, tempCounter, GMate, Message, QLocalVtx, QGhostVtx, QMsgType, QOwner, candidateMate, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); finishTime = MPI_Wtime(); *ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization startTime = MPI_Wtime(); ///////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////// INITIALIZATION ///////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Compute the Initial Matching Set: /* * OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from * PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize * the two. * PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel. */ PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer, verLocPtr, verLocInd, myRank, edgeLocWeight, candidateMate); /* * PARALLEL_PROCESS_EXPOSED_VERTEX_B * TODO: write comment * * TODO: Test when it's actually more efficient to execute this code * in parallel. */ PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer, candidateMate, verLocInd, verLocPtr, StartIndex, EndIndex, Mate, GMate, Ghost2LocalMap, edgeLocWeight, &myCard, &msgInd, &NumMessagesBundled, &S, verDistance, PCounter, Counter, myRank, numProcs, U, privateU, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); tempCounter.clear(); // Do not need this any more /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// // TODO what would be the optimal UCHUNK vector UChunkBeingProcessed; UChunkBeingProcessed.reserve(UCHUNK); processMatchedVertices(NLVer, UChunkBeingProcessed, U, privateU, StartIndex, EndIndex, &myCard, &msgInd, &NumMessagesBundled, &S, verLocPtr, verLocInd, verDistance, PCounter, Counter, myRank, numProcs, candidateMate, GMate, Mate, Ghost2LocalMap, edgeLocWeight, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner); ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////// SEND BUNDLED MESSAGES ///////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// sendBundledMessages(&numGhostEdges, &BufferSize, Buffer, PCumulative, PMessageBundle, PSizeInfoMessages, PCounter, NumMessagesBundled, &msgActual, &MessageIndex, numProcs, myRank, comm, QLocalVtx, QGhostVtx, QMsgType, QOwner, SRequest, SStatus); ///////////////////////// END OF SEND BUNDLED MESSAGES ////////////////////////////////// finishTime = MPI_Wtime(); *ph1_time = finishTime - startTime; // Time taken for Phase-1 *ph1_card = myCard; // Cardinality at the end of Phase-1 startTime = MPI_Wtime(); ///////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////// MAIN LOOP ////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Main While Loop: #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << "=========================************===============================" << endl; fflush(stdout); fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Entering While(true) loop.."; fflush(stdout); #endif #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << "=========================************===============================" << endl; fflush(stdout); fflush(stdout); #endif while (true) { #ifdef DEBUG_HANG_ if (myRank == 0) cout << "\n(" << myRank << ") Main loop" << endl; fflush(stdout); #endif /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MATCHED VERTICES ////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// processMatchedVerticesAndSendMessages(NLVer, UChunkBeingProcessed, U, privateU, StartIndex, EndIndex, &myCard, &msgInd, &NumMessagesBundled, &S, verLocPtr, verLocInd, verDistance, PCounter, Counter, myRank, numProcs, candidateMate, GMate, Mate, Ghost2LocalMap, edgeLocWeight, QLocalVtx, QGhostVtx, QMsgType, QOwner, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, comm, &msgActual, Message); ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// //// BREAK IF NO MESSAGES EXPECTED ///////// #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl; #endif if (S == 0) { #ifdef DEBUG_HANG_ cout << "\n(" << myRank << ") Breaking out" << endl; fflush(stdout); #endif break; } /////////////////////////////////////////////////////////////////////////////////// /////////////////////////// PROCESS MESSAGES ////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// processMessages(NLVer, Mate, candidateMate, Ghost2LocalMap, GMate, Counter, StartIndex, EndIndex, &myCard, &msgInd, &msgActual, edgeLocWeight, verDistance, verLocPtr, k, verLocInd, numProcs, myRank, comm, Message, numGhostEdges, u, v, &S, U); ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// #ifdef PRINT_DEBUG_INFO_ cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S; fflush(stdout); cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual; fflush(stdout); cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl; fflush(stdout); #endif } // End of while (true) clean(NLVer, myRank, MessageIndex, SRequest, SStatus, BufferSize, Buffer, msgActual, msgActualSent, msgInd, msgIndSent, NumMessagesBundled, msgPercent); finishTime = MPI_Wtime(); *ph2_time = finishTime - startTime; // Time taken for Phase-2 *ph2_card = myCard; // Cardinality at the end of Phase-2 } // End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate #endif #endif