diff --git a/README.md b/README.md index fbea8c39..1d330385 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ - - AMG4PSBLAS + AMG4PSBLAS Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.7) Salvatore Filippone (University of Rome Tor Vergata and IAC-CNR) diff --git a/amgprec/impl/aggregator/Makefile b/amgprec/impl/aggregator/Makefile index d857a3b0..1f6f52af 100644 --- a/amgprec/impl/aggregator/Makefile +++ b/amgprec/impl/aggregator/Makefile @@ -62,7 +62,8 @@ amg_s_parmatch_smth_bld.o \ amg_s_parmatch_spmm_bld_inner.o MPCOBJS=MatchBoxPC.o \ -algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o +algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o \ +algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o OBJS = $(FOBJS) $(MPCOBJS) diff --git a/amgprec/impl/aggregator/MatchBoxPC.cpp b/amgprec/impl/aggregator/MatchBoxPC.cpp index 270c6d04..90b448dc 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.cpp +++ b/amgprec/impl/aggregator/MatchBoxPC.cpp @@ -66,22 +66,35 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge, myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); #endif + #define TIME_TRACKER #ifdef TIME_TRACKER double tmr = MPI_Wtime(); #endif - dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, +#define OMP +#ifdef OMP + dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge, verLocPtr, verLocInd, edgeLocWeight, verDistance, Mate, myRank, numProcs, C_comm, msgIndSent, msgActualSent, msgPercent, ph0_time, ph1_time, ph2_time, ph1_card, ph2_card ); +#else + dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, + verLocPtr, verLocInd, edgeLocWeight, + verDistance, Mate, + myRank, numProcs, C_comm, + msgIndSent, msgActualSent, msgPercent, + ph0_time, ph1_time, ph2_time, + ph1_card, ph2_card ); +#endif + #ifdef TIME_TRACKER tmr = MPI_Wtime() - tmr; - fprintf(stderr, "Elaboration time: %f for $ld\n", tmr, NLEdge); + fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer); #endif #endif diff --git a/amgprec/impl/aggregator/MatchBoxPC.h b/amgprec/impl/aggregator/MatchBoxPC.h index 21d0a181..6c3f765f 100644 --- a/amgprec/impl/aggregator/MatchBoxPC.h +++ b/amgprec/impl/aggregator/MatchBoxPC.h @@ -152,6 +152,17 @@ extern "C" { inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, MilanInt myRank, MilanInt numProcs); +void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP + ( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ); + void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC ( MilanLongInt NLVer, MilanLongInt NLEdge, diff --git a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp index 818c9f07..8be438b6 100644 --- a/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp +++ b/amgprec/impl/aggregator/algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.cpp @@ -1,6 +1,4 @@ #include "MatchBoxPC.h" -#include -#include // *********************************************************************** // // MatchboxP: A C++ library for approximate weighted matching @@ -94,21 +92,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC( MilanReal* msgPercent, MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { - - /* - * verDistance: it's a vector long as the number of processors. - * verDistance[i] contains the first node index of the i-th processor - * verDistance[i + 1] contains the last node index of the i-th processor - * NLVer: number of elements in the LocPtr - * NLEdge: number of edges assigned to the current processor - * - * Contains the portion of matrix assigned to the processor in - * Yale notation - * verLocInd: contains the positions on row of the matrix - * verLocPtr: i-th value is the position of the first element on the i-th row and - * i+1-th value is the position of the first element on the i+1-th row - */ - #if !defined(SERIAL_MPI) #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex + map Ghost2LocalMap; //Map each ghost vertex to a local vertex // index that starts with zero to |Vg| - 1 map::iterator storedAlready; - vector Counter; //Store the edge count for each ghost vertex - MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices + vector Counter; //Store the edge count for each ghost vertex + MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< verGhostPtr, verGhostInd, tempCounter; - //Mate array for ghost vertices: - vector GMate; //Proportional to the number of ghost vertices - -#ifdef TIME_TRACKER - double Ghost2LocalInitialization = MPI_Wtime(); -#endif - -//#define OMP -#ifdef OMP -#pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) - { -#endif - //printf("Id %d\n", omp_get_thread_num()); - -#ifdef OMP -#pragma omp for -#endif - for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice - insertMe = verLocInd[i]; - //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost -#ifdef OMP -#pragma omp critical - { -#endif - numGhostEdges++; - storedAlready = Ghost2LocalMap.find(insertMe); - if (storedAlready != Ghost2LocalMap.end()) { //Has already been added - //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter - } else { //Insert an entry for the ghost: - //cout<<"Process "< EndIndex) ) { //Find a ghost + storedAlready = Ghost2LocalMap.find( insertMe ); + if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added + //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + numGhostEdges++; + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]<second<<" - "<first<<" : "<second]< verGhostPtr, verGhostInd, tempCounter; + //Mate array for ghost vertices: + vector GMate; //Proportional to the number of ghost vertices + try { + verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector + tempCounter.reserve(numGhostVertices); //Pointer Vector + verGhostInd.reserve(numGhostEdges); //Index Vector + GMate.reserve(numGhostVertices); //Ghost Mate Vector + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + //Initialize the Vectors: + verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector + tempCounter.resize(numGhostVertices, 0); //Temporary Counter + verGhostInd.resize(numGhostEdges, -1); //Index Vector + GMate.resize(numGhostVertices, -1); //Temporary Counter + verGhostPtr[0] = 0; //The first value #ifdef PRINT_DEBUG_INFO_ - cout<<"\n("< 0 ) cout< EndIndex) ) { //Find a ghost -#ifdef OMP -#pragma omp critical - { -#endif - insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert - verGhostInd[insertMe] = v + StartIndex; //Add the adjacency - tempCounter[Ghost2LocalMap[w]]++; //Increment the counter -#ifdef OMP - } -#endif + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert + verGhostInd[insertMe] = v+StartIndex; //Add the adjacency + tempCounter[Ghost2LocalMap[w]]++; //Increment the counter } //End of if((w < StartIndex) || (w > EndIndex)) } //End of for(k) } //End of for (v) tempCounter.clear(); //Do not need this any more - -#ifdef OMP - } //end of parallel region -#endif -#ifdef TIME_TRACKER - verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; - fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); -#endif - #ifdef PRINT_DEBUG_INFO_ cout<<"\n("< +#include +// *********************************************************************** +// +// MatchboxP: A C++ library for approximate weighted matching +// Mahantesh Halappanavar (hala@pnnl.gov) +// Pacific Northwest National Laboratory +// +// *********************************************************************** +// +// Copyright (2021) Battelle Memorial Institute +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// ************************************************************************ +////////////////////////////////////////////////////////////////////////////////////// +/////////////////////////// DOMINATING EDGES MODEL /////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////// +/* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate() + * + * Date : New update: Feb 17, 2019, Richland, Washington. + * Date : Original development: May 17, 2009, E&CS Bldg. + * + * Purpose : Compute Approximate Maximum Weight Matching in Linear Time + * + * Args : inputMatrix - instance of Compressed-Col format of Matrix + * Mate - The Mate array + * + * Returns : By Value: (void) + * By Reference: Mate + * + * Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge. + * Assumption: The Mate Array is empty. + */ + +/* + NLVer = #of vertices, NLEdge = #of edges + CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers) + verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process + Mate = A vector of size |V_p| (local subgraph) to store the output (matching) + MPI: myRank, numProcs, comm, + Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world + Statistics: ph0_time, ph1_time, ph2_time: Runtimes + Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2) + */ + +#ifdef SERIAL_MPI +#else +//MPI type map +template MPI_Datatype TypeMap(); +template<> inline MPI_Datatype TypeMap() { return MPI_LONG_LONG; } +template<> inline MPI_Datatype TypeMap() { return MPI_INT; } +template<> inline MPI_Datatype TypeMap() { return MPI_DOUBLE; } +template<> inline MPI_Datatype TypeMap() { return MPI_FLOAT; } + +// DOUBLE PRECISION VERSION +//WARNING: The vertex block on a given rank is contiguous +void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP( + MilanLongInt NLVer, MilanLongInt NLEdge, + MilanLongInt* verLocPtr, MilanLongInt* verLocInd, + MilanReal* edgeLocWeight, + MilanLongInt* verDistance, + MilanLongInt* Mate, + MilanInt myRank, MilanInt numProcs, MPI_Comm comm, + MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, + MilanReal* msgPercent, + MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, + MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { + + /* + * verDistance: it's a vector long as the number of processors. + * verDistance[i] contains the first node index of the i-th processor + * verDistance[i + 1] contains the last node index of the i-th processor + * NLVer: number of elements in the LocPtr + * NLEdge: number of edges assigned to the current processor + * + * Contains the portion of matrix assigned to the processor in + * Yale notation + * verLocInd: contains the positions on row of the matrix + * verLocPtr: i-th value is the position of the first element on the i-th row and + * i+1-th value is the position of the first element on the i+1-th row + */ + +#if !defined(SERIAL_MPI) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<::iterator verLocPtr = inputSubGraph.getVerPtr_b(); + //vector::iterator verLocInd = inputSubGraph.getVerInd_b(); + //vector::iterator edgeLocWeight = inputSubGraph.getEdgeWt_b(); + + //Data structures for sending and receiving messages: + vector Message; // [ u, v, message_type ] + Message.resize(3,-1); + const MilanLongInt REQUEST = 1; + const MilanLongInt SUCCESS = 2; + const MilanLongInt FAILURE = 3; + const MilanLongInt SIZEINFO = 4; + MilanLongInt message_type = 0; + //Data structures for Message Bundling: + //Although up to two messages can be sent along any cross edge, + //only one message will be sent in the initialization phase - + //one of: REQUEST/FAILURE/SUCCESS + vector QLocalVtx, QGhostVtx, QMsgType; + vector QOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + vector PCounter; + MilanLongInt NumMessagesBundled; + MilanInt ghostOwner; // Changed by Fabio to be an integer, addresses needs to be integers! + vector candidateMate; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< Ghost2LocalMap; //Map each ghost vertex to a local vertex + // index that starts with zero to |Vg| - 1 + map::iterator storedAlready; + vector Counter; //Store the edge count for each ghost vertex + MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< verGhostPtr, verGhostInd, tempCounter; + //Mate array for ghost vertices: + vector GMate; //Proportional to the number of ghost vertices + +#ifdef TIME_TRACKER + double Ghost2LocalInitialization = MPI_Wtime(); +#endif + + +#pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4) + { + +#pragma omp for + for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice + insertMe = verLocInd[i]; + //cout<<"InsertMe on Process "< EndIndex)) { //Find a ghost +#pragma omp critical + { + numGhostEdges++; + storedAlready = Ghost2LocalMap.find(insertMe); + if (storedAlready != Ghost2LocalMap.end()) { //Has already been added + //cout<<"Process "<first<<" - "<second<second]++; //Increment the counter + } else { //Insert an entry for the ghost: + //cout<<"Process "<second<<" - "<first<<" : "<second]< 0 ) + cout< EndIndex) ) { //Find a ghost +#pragma omp critical + { + insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert + verGhostInd[insertMe] = v + StartIndex; //Add the adjacency + tempCounter[Ghost2LocalMap[w]]++; //Increment the counter + } + } //End of if((w < StartIndex) || (w > EndIndex)) + } //End of for(k) + } //End of for (v) + tempCounter.clear(); //Do not need this any more + +#pragma omp single + { + +#ifdef TIME_TRACKER + verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization; + fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization); +#endif + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k] > heaviestEdgeWt) || + ((edgeLocWeight[k] == heaviestEdgeWt)&&(w < verLocInd[k])) ) { + heaviestEdgeWt = edgeLocWeight[k]; + w = verLocInd[k]; + } + } //End of for loop + candidateMate[v] = w; + + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost vertex + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { // w is a local vertex + if ( candidateMate[w-StartIndex] == (v+StartIndex) ) { + Mate[v] = w; //v is local + Mate[w-StartIndex] = v+StartIndex; //w is local + //Q.push_back(u); + U.push_back(v+StartIndex); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v]; + adj12 = verLocPtr[v+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v+StartIndex; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only the Local Vertices + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //If Local Vertex: + if ( (vEndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[v]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[v-StartIndex] >= 0 ) // Already matched + continue; + } //End of else + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is a local vertex + Mate[w-StartIndex] = v; //w is a local vertex + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + //Message[0] = v; //LOCAL + //Message[1] = w; //GHOST + //Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is local + //Build the Message Packet: + //Message[0] = u; //LOCAL + //Message[1] = v; //GHOST + //Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("< PCumulative, PMessageBundle, PSizeInfoMessages; + MilanLongInt myIndex=0; + try { + PMessageBundle.reserve(NumMessagesBundled*3); //Three integers per message + PCumulative.reserve(numProcs+1); //Similar to Row Pointer vector in CSR data structure + PSizeInfoMessages.reserve(numProcs*3); //Buffer to hold the Size info message packets + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + PMessageBundle.resize(NumMessagesBundled*3, -1);//Initialize + PCumulative.resize(numProcs+1, 0); //Only initialize the counter variable + PSizeInfoMessages.resize(numProcs*3, 0); + + for (MilanInt i=0; i SRequest; //Requests that are used for each send message + vector SStatus; //Status of sent messages, used in MPI_Wait + MilanLongInt MessageIndex=0; //Pointer for current message + try { + SRequest.reserve(numProcs*2); //At most two messages per processor + SStatus.reserve(numProcs*2);//At most two messages per processor + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + MPI_Request myReq; //A sample request + SRequest.resize(numProcs*2,myReq); + MPI_Status myStat; //A sample status + SStatus.resize(numProcs*2,myStat); + //Send the Messages + for (MilanInt i=0; i 0 ) { //Send only if it is a nonempty packet + MPI_Isend(&PSizeInfoMessages[i*3+0], 3, TypeMap(), i, ComputeTag, comm, &SRequest[MessageIndex]); + msgActual++; + MessageIndex++; + //Now Send the message with the data packet: +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), i, BundleTag, comm, &SRequest[MessageIndex]); + MessageIndex++; + } //End of if size > 0 + } + //Free up temporary memory: + PCumulative.clear(); + QLocalVtx.clear(); + QGhostVtx.clear(); + QMsgType.clear(); + QOwner.clear(); + PCounter.clear(); +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), comm, &OneMessageSize); //Size of one message packet + //How many messages to send? + //Potentially three kinds of messages will be sent/received: + //Request, Success, Failure. + //But only two will be sent from a given processor. + //Substract the number of messages that have already been sent as bundled messages: + MilanLongInt numMessagesToSend = numGhostEdges*2 - NumMessagesBundled; + MilanInt BufferSize = (OneMessageSize+MPI_BSEND_OVERHEAD)*numMessagesToSend; + + MilanLongInt *Buffer=0; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Buffer = (MilanLongInt *) malloc(BufferSize); //Allocate memory + if ( Buffer == 0 ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; + cout<<"Not enough memory to allocate for send buffer on process "< ReceiveBuffer; + MilanLongInt bundleSize=0, bundleCounter=0; + try { + ReceiveBuffer.reserve(numGhostEdges*2*3); //Three integers per cross edge + } catch ( length_error ) { + cout<<"Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n"; + cout<<"Not enough memory to allocate the internal variables \n"; + exit(1); + } + while ( true ) { +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("<= StartIndex) && (u <= EndIndex) ) { //Process Only If a Local Vertex + //Get the Adjacency list for u + adj1 = verLocPtr[u-StartIndex]; //Pointer + adj2 = verLocPtr[u-StartIndex+1]; + for( k = adj1; k < adj2; k++ ) { + v = verLocInd[k]; + if ( (v >= StartIndex) && (v <= EndIndex) ) { //v is a Local Vertex: + if ( Mate[v-StartIndex] >= 0 ) // v is already matched + continue; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //no dominating edge found: w == -1 + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of If (candidateMate[v-StartIndex] == u) + } //End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex: + else { //Neighbor v is a ghost vertex + if ( candidateMate[NLVer+Ghost2LocalMap[v]] == u ) + candidateMate[NLVer+Ghost2LocalMap[v]] = -1; + if ( v != Mate[u-StartIndex] ) { //u is a local vertex + //Build the Message Packet: + Message[0] = u; //LOCAL + Message[1] = v; //GHOST + Message[2] = SUCCESS; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; +#ifdef DEBUG_GHOST_ + if ((uEndIndex)) { + cout<<"\n("<= StartIndex) && (u <= EndIndex) ) //Process Only If a Local Vertex + } //End of while ( /*!Q.empty()*/ !U.empty() ) + ///////////////////////// END OF PROCESS MATCHED VERTICES ///////////////////////// + + //// BREAK IF NO MESSAGES EXPECTED ///////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on Slave: "<(), Sender, BundleTag, comm, &computeStatus); + if (error_codeC != MPI_SUCCESS ) { + MPI_Error_string(error_codeC, error_message, &message_length); + cout<<"\n*Error in call to MPI_Receive on processor "<NLVer)) { + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of if ( candidateMate[v-StartIndex] == u )e + } //End of if ( Mate[v] == -1 ) + } //End of REQUEST + else { //CASE II: SUCCESS + if ( message_type == SUCCESS ) { +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) +#ifdef DEBUG_GHOST_ + if ((v<0)||(vNLVer)) { + cout<<"\n("<EndIndex) ) { //Is it a ghost vertex? + if(GMate[Ghost2LocalMap[verLocInd[k1]]] >= 0 )// Already matched + continue; + } + else { //A local vertex + if( Mate[verLocInd[k1]-StartIndex] >= 0 ) // Already matched + continue; + } + + if( (edgeLocWeight[k1] > heaviestEdgeWt) || + ((edgeLocWeight[k1] == heaviestEdgeWt)&&(w < verLocInd[k1])) ) { + heaviestEdgeWt = edgeLocWeight[k1]; + w = verLocInd[k1]; + } + } //End of for loop + candidateMate[v-StartIndex] = w; + //End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<= 0 ) { + if ( (w < StartIndex) || (w > EndIndex) ) { //w is a ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = REQUEST; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + if ( candidateMate[NLVer+Ghost2LocalMap[w]] == v ) { + Mate[v-StartIndex] = w; //v is local + GMate[Ghost2LocalMap[w]] = v; //w is ghost + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[w]] = Counter[Ghost2LocalMap[w]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[w]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,w) + } //End of if CandidateMate[w] = v + } //End of if a Ghost Vertex + else { //w is a local vertex + if ( candidateMate[w-StartIndex] == v ) { + Mate[v-StartIndex] = w; //v is local + Mate[w-StartIndex] = v; //w is local + //Q.push_back(u); + U.push_back(v); + U.push_back(w); + myCard++; +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<=0) + else { //No dominant edge found + adj11 = verLocPtr[v-StartIndex]; + adj12 = verLocPtr[v-StartIndex+1]; + for( k1 = adj11; k1 < adj12; k1++ ) { + w = verLocInd[k1]; + if ( (w < StartIndex) || (w > EndIndex) ) { //A ghost + //Build the Message Packet: + Message[0] = v; //LOCAL + Message[1] = w; //GHOST + Message[2] = FAILURE; //TYPE + //Send a Request (Asynchronous) +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("<(), ghostOwner, ComputeTag, comm); + msgInd++; msgActual++; + } //End of if(GHOST) + } //End of for loop + } // End of Else: w == -1 + //End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v) + } //End of if ( candidateMate[v-StartIndex] == u ) + } //End of if ( Mate[v] == -1 ) + } //End of if ( message_type == SUCCESS ) + else { //CASE III: FAILURE +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + Counter[Ghost2LocalMap[u]] = Counter[Ghost2LocalMap[u]] - 1; //Decrement + if ( Counter[Ghost2LocalMap[u]] == 0 ) { + S--; //Decrement S +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 + //End: PARALLEL_PROCESS_CROSS_EDGE_B(v,u) + } //End of else: CASE III + } //End of else: CASE I + } //End of if (!MsgQ.empty()) + ///////////////////////// END OF PROCESS MESSAGES ///////////////////////////////// +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0 ) { + MPI_Buffer_detach(&Buffer, &BufferSize); //Detach the Buffer + free(Buffer); //Free the memory that was allocated + } + finishTime = MPI_Wtime(); + *ph2_time = finishTime-startTime; //Time taken for Phase-2 + *ph2_card = myCard ; //Cardinality at the end of Phase-2 + +#ifdef PRINT_DEBUG_INFO_ + cout<<"\n("< 0) { + *msgPercent = ((double)NumMessagesBundled/(double)(msgInd))*100.0; + } else { + *msgPercent = 0; + } + + } //end single + + } //end of parallel region + +#ifdef DEBUG_HANG_ + if (myRank == 0) cout<<"\n("< vtxIndex ) + End = Current - 1; + else //CASE 3: + Start = Current + 1; + } + } //End of While() + if ( Current == 0 ) + return (Current); + else { + if ( mVerDistance[Current] > vtxIndex ) + return (Current-1); + else + return (Current); + } //End of else + return (-1); //It should not reach here! +} //End of findOwnerOfGhost() +#endif + +#endif \ No newline at end of file diff --git a/exec.sh b/exec.sh index d6e77a21..3174e0a5 100755 --- a/exec.sh +++ b/exec.sh @@ -2,6 +2,6 @@ make all cd samples/advanced/pdegen make amg_d_pde3d cd runs -mpirun -np 2 amg_d_pde3d amg_pde3d.inp +mpirun -np 4 amg_d_pde3d amg_pde3d.inp