Merge branch 'petrilli-m' into openmp-match

omp-walther
Salvatore Filippone 2 years ago
commit 0d624df346

@ -1,4 +1,3 @@
AMG4PSBLAS
Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.8)

@ -62,7 +62,21 @@ amg_s_parmatch_smth_bld.o \
amg_s_parmatch_spmm_bld_inner.o
MPCOBJS=MatchBoxPC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o
sendBundledMessages.o \
initialize.o \
extractUChunk.o \
isAlreadyMatched.o \
findOwnerOfGhost.o \
computeCandidateMate.o \
parallelComputeCandidateMateB.o \
processMatchedVertices.o \
processMatchedVerticesAndSendMessages.o \
processCrossEdge.o \
queueTransfer.o \
processMessages.o \
processExposedVertex.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
OBJS = $(FOBJS) $(MPCOBJS)

@ -60,17 +60,43 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* ph1_card, MilanLongInt* ph2_card ) {
#if !defined(SERIAL_MPI)
MPI_Comm C_comm=MPI_Comm_f2c(icomm);
#ifdef DEBUG
fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n",
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge,
#define TIME_TRACKER
#ifdef TIME_TRACKER
double tmr = MPI_Wtime();
#endif
#define OMP
#ifdef OMP
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate,
myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card );
#else
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate,
myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card );
#endif
#ifdef TIME_TRACKER
tmr = MPI_Wtime() - tmr;
fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer);
#endif
#endif
}

@ -52,145 +52,412 @@
#ifndef _matchboxpC_H_
#define _matchboxpC_H_
//Turn on a lot of debugging information with this switch:
// Turn on a lot of debugging information with this switch:
//#define PRINT_DEBUG_INFO_
#include <stdio.h>
#include <iostream>
#include <assert.h>
#include <map>
#include <vector>
// #include "matchboxp.h"
#include "omp.h"
#include "primitiveDataTypeDefinitions.h"
#include "dataStrStaticQueue.h"
using namespace std;
const int NUM_THREAD = 4;
const int UCHUNK = 10;
const MilanLongInt REQUEST = 1;
const MilanLongInt SUCCESS = 2;
const MilanLongInt FAILURE = 3;
const MilanLongInt SIZEINFO = 4;
const int ComputeTag = 7; // Predefined tag
const int BundleTag = 9; // Predefined tag
static vector<MilanLongInt> DEFAULT_VECTOR;
// MPI type map
template <typename T>
MPI_Datatype TypeMap();
template <>
inline MPI_Datatype TypeMap<int64_t>() { return MPI_LONG_LONG; }
template <>
inline MPI_Datatype TypeMap<int>() { return MPI_INT; }
template <>
inline MPI_Datatype TypeMap<double>() { return MPI_DOUBLE; }
template <>
inline MPI_Datatype TypeMap<float>() { return MPI_FLOAT; }
#ifdef __cplusplus
extern "C" {
extern "C"
{
#endif
#if !defined(SERIAL_MPI)
#define MilanMpiLongInt MPI_LONG_LONG
#define MilanMpiLongInt MPI_LONG_LONG
#ifndef _primitiveDataType_Definition_
#define _primitiveDataType_Definition_
//Regular integer:
#ifndef INTEGER_H
#define INTEGER_H
typedef int32_t MilanInt;
#endif
//Regular long integer:
#ifndef LONG_INT_H
#define LONG_INT_H
#ifdef BIT64
typedef int64_t MilanLongInt;
typedef MPI_LONG MilanMpiLongInt;
#else
typedef int32_t MilanLongInt;
typedef MPI_INT MilanMpiLongInt;
#endif
#endif
//Regular boolean
#ifndef BOOL_H
#define BOOL_H
typedef bool MilanBool;
#endif
//Regular double and absolute value computation:
#ifndef REAL_H
#define REAL_H
typedef double MilanReal;
typedef MPI_DOUBLE MilanMpiReal;
inline MilanReal MilanAbs(MilanReal value)
{
return fabs(value);
}
#endif
//Regular float and absolute value computation:
#ifndef FLOAT_H
#define FLOAT_H
typedef float MilanFloat;
typedef MPI_FLOAT MilanMpiFloat;
inline MilanFloat MilanAbsFloat(MilanFloat value)
{
return fabs(value);
}
#endif
//// Define the limits:
#ifndef LIMITS_H
#define LIMITS_H
//Integer Maximum and Minimum:
// #define MilanIntMax INT_MAX
// #define MilanIntMin INT_MIN
#define MilanIntMax INT32_MAX
#define MilanIntMin INT32_MIN
#ifdef BIT64
#define MilanLongIntMax INT64_MAX
#define MilanLongIntMin -INT64_MAX
#else
#define MilanLongIntMax INT32_MAX
#define MilanLongIntMin -INT32_MAX
#endif
#endif
// Regular integer:
#ifndef INTEGER_H
#define INTEGER_H
typedef int32_t MilanInt;
#endif
// Regular long integer:
#ifndef LONG_INT_H
#define LONG_INT_H
#ifdef BIT64
typedef int64_t MilanLongInt;
typedef MPI_LONG MilanMpiLongInt;
#else
typedef int32_t MilanLongInt;
typedef MPI_INT MilanMpiLongInt;
#endif
#endif
// Regular boolean
#ifndef BOOL_H
#define BOOL_H
typedef bool MilanBool;
#endif
// Regular double and absolute value computation:
#ifndef REAL_H
#define REAL_H
typedef double MilanReal;
typedef MPI_DOUBLE MilanMpiReal;
inline MilanReal MilanAbs(MilanReal value)
{
return fabs(value);
}
#endif
// Regular float and absolute value computation:
#ifndef FLOAT_H
#define FLOAT_H
typedef float MilanFloat;
typedef MPI_FLOAT MilanMpiFloat;
inline MilanFloat MilanAbsFloat(MilanFloat value)
{
return fabs(value);
}
#endif
//// Define the limits:
#ifndef LIMITS_H
#define LIMITS_H
// Integer Maximum and Minimum:
// #define MilanIntMax INT_MAX
// #define MilanIntMin INT_MIN
#define MilanIntMax INT32_MAX
#define MilanIntMin INT32_MIN
#ifdef BIT64
#define MilanLongIntMax INT64_MAX
#define MilanLongIntMin -INT64_MAX
#else
#define MilanLongIntMax INT32_MAX
#define MilanLongIntMin -INT32_MAX
#endif
#endif
// +INFINITY
const double PLUS_INFINITY = numeric_limits<int>::infinity();
const double MINUS_INFINITY = -PLUS_INFINITY;
//#define MilanRealMax LDBL_MAX
#define MilanRealMax PLUS_INFINITY
#define MilanRealMin MINUS_INFINITY
//#define MilanRealMax LDBL_MAX
#define MilanRealMax PLUS_INFINITY
#define MilanRealMin MINUS_INFINITY
#endif
//Function of find the owner of a ghost vertex using binary search:
inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC
(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC
(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanFloat* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MilanInt icomm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanFloat* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MilanInt icomm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
// Function of find the owner of a ghost vertex using binary search:
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs);
MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
MilanLongInt adj2,
MilanLongInt *verLocInd,
MilanReal *edgeLocWeight);
void queuesTransfer(vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
bool isAlreadyMatched(MilanLongInt node,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap);
MilanLongInt computeCandidateMate(MilanLongInt adj1,
MilanLongInt adj2,
MilanReal *edgeLocWeight,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap);
void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt *numGhostEdgesPtr,
MilanLongInt *numGhostVerticesPtr,
MilanLongInt *S,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &Counter,
vector<MilanLongInt> &verGhostPtr,
vector<MilanLongInt> &verGhostInd,
vector<MilanLongInt> &tempCounter,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Message,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
MilanLongInt *&candidateMate,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void clean(MilanLongInt NLVer,
MilanInt myRank,
MilanLongInt MessageIndex,
vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus,
MilanInt BufferSize,
MilanLongInt *Buffer,
MilanLongInt msgActual,
MilanLongInt *msgActualSent,
MilanLongInt msgInd,
MilanLongInt *msgIndSent,
MilanLongInt NumMessagesBundled,
MilanReal *msgPercent);
void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanInt myRank,
MilanReal *edgeLocWeight,
MilanLongInt *candidateMate);
void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
MilanLongInt *candidateMate,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *Mate,
vector<MilanLongInt> &GMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void PROCESS_CROSS_EDGE(MilanLongInt *edge,
MilanLongInt *SPtr);
void processMatchedVertices(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner);
void processMatchedVerticesAndSendMessages(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *NumMessagesBundledPtr,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message);
void sendBundledMessages(MilanLongInt *numGhostEdgesPtr,
MilanInt *BufferSizePtr,
MilanLongInt *Buffer,
vector<MilanLongInt> &PCumulative,
vector<MilanLongInt> &PMessageBundle,
vector<MilanLongInt> &PSizeInfoMessages,
MilanLongInt *PCounter,
MilanLongInt NumMessagesBundled,
MilanLongInt *msgActualPtr,
MilanLongInt *MessageIndexPtr,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus);
void processMessages(
MilanLongInt NLVer,
MilanLongInt *Mate,
MilanLongInt *candidateMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Counter,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCardPtr,
MilanLongInt *msgIndPtr,
MilanLongInt *msgActualPtr,
MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *verLocPtr,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &Message,
MilanLongInt numGhostEdges,
MilanLongInt u,
MilanLongInt v,
MilanLongInt *SPtr,
vector<MilanLongInt> &U);
void extractUChunk(
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
void salgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanFloat *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MilanInt icomm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
void sMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd, MilanFloat *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MilanInt icomm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent, MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card);
#endif
#ifdef __cplusplus

@ -72,12 +72,6 @@
#ifdef SERIAL_MPI
#else
//MPI type map
template<typename T> MPI_Datatype TypeMap();
template<> inline MPI_Datatype TypeMap<int64_t>() { return MPI_LONG_LONG; }
template<> inline MPI_Datatype TypeMap<int>() { return MPI_INT; }
template<> inline MPI_Datatype TypeMap<double>() { return MPI_DOUBLE; }
template<> inline MPI_Datatype TypeMap<float>() { return MPI_FLOAT; }
// DOUBLE PRECISION VERSION
//WARNING: The vertex block on a given rank is contiguous

@ -0,0 +1,485 @@
#include "MatchBoxPC.h"
// ***********************************************************************
//
// MatchboxP: A C++ library for approximate weighted matching
// Mahantesh Halappanavar (hala@pnnl.gov)
// Pacific Northwest National Laboratory
//
// ***********************************************************************
//
// Copyright (2021) Battelle Memorial Institute
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// ************************************************************************
//////////////////////////////////////////////////////////////////////////////////////
/////////////////////////// DOMINATING EDGES MODEL ///////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////
/* Function : algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate()
*
* Date : New update: Feb 17, 2019, Richland, Washington.
* Date : Original development: May 17, 2009, E&CS Bldg.
*
* Purpose : Compute Approximate Maximum Weight Matching in Linear Time
*
* Args : inputMatrix - instance of Compressed-Col format of Matrix
* Mate - The Mate array
*
* Returns : By Value: (void)
* By Reference: Mate
*
* Comments : 1/2 Approx Algorithm. Picks the locally available heaviest edge.
* Assumption: The Mate Array is empty.
*/
/*
NLVer = #of vertices, NLEdge = #of edges
CSR/CSC/Compressed format: verLocPtr = Pointer, verLocInd = Index, edgeLocWeight = edge weights (positive real numbers)
verDistance = A vector of size |P|+1 containing the cumulative number of vertices per process
Mate = A vector of size |V_p| (local subgraph) to store the output (matching)
MPI: myRank, numProcs, comm,
Statistics: msgIndSent, msgActualSent, msgPercent : Size: |P| number of processes in the comm-world
Statistics: ph0_time, ph1_time, ph2_time: Runtimes
Statistics: ph1_card, ph2_card : Size: |P| number of processes in the comm-world (number of matched edges in Phase 1 and Phase 2)
*/
#ifdef SERIAL_MPI
#else
// DOUBLE PRECISION VERSION
// WARNING: The vertex block on a given rank is contiguous
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt *verLocPtr, MilanLongInt *verLocInd,
MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt *msgIndSent, MilanLongInt *msgActualSent,
MilanReal *msgPercent,
MilanReal *ph0_time, MilanReal *ph1_time, MilanReal *ph2_time,
MilanLongInt *ph1_card, MilanLongInt *ph2_card)
{
/*
* verDistance: it's a vector long as the number of processors.
* verDistance[i] contains the first node index of the i-th processor
* verDistance[i + 1] contains the last node index of the i-th processor
* NLVer: number of elements in the LocPtr
* NLEdge: number of edges assigned to the current processor
*
* Contains the portion of matrix assigned to the processor in
* Yale notation
* verLocInd: contains the positions on row of the matrix
* verLocPtr: i-th value is the position of the first element on the i-th row and
* i+1-th value is the position of the first element on the i+1-th row
*/
#if !defined(SERIAL_MPI)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]";
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") verDistance [" << verDistance[0] << "," << verDistance[1] << "," << verDistance[2] << "," << verDistance[3] << "]";
fflush(stdout);
#endif
MilanLongInt StartIndex = verDistance[myRank]; // The starting vertex owned by the current rank
MilanLongInt EndIndex = verDistance[myRank + 1] - 1; // The ending vertex owned by the current rank
MPI_Status computeStatus;
MilanLongInt msgActual = 0, msgInd = 0;
MilanReal heaviestEdgeWt = 0.0f; // Assumes positive weight
MilanReal startTime, finishTime;
startTime = MPI_Wtime();
// Data structures for sending and receiving messages:
vector<MilanLongInt> Message; // [ u, v, message_type ]
Message.resize(3, -1);
// Data structures for Message Bundling:
// Although up to two messages can be sent along any cross edge,
// only one message will be sent in the initialization phase -
// one of: REQUEST/FAILURE/SUCCESS
vector<MilanLongInt> QLocalVtx, QGhostVtx, QMsgType;
vector<MilanInt> QOwner; // Changed by Fabio to be an integer, addresses needs to be integers!
MilanLongInt *PCounter = new MilanLongInt[numProcs];
for (int i = 0; i < numProcs; i++)
PCounter[i] = 0;
MilanLongInt NumMessagesBundled = 0;
// TODO when the last computational section will be refactored this could be eliminated
MilanInt ghostOwner = 0; // Changed by Fabio to be an integer, addresses needs to be integers!
MilanLongInt *candidateMate = nullptr;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")NV: " << NLVer << " Edges: " << NLEdge;
fflush(stdout);
cout << "\n(" << myRank << ")StartIndex: " << StartIndex << " EndIndex: " << EndIndex;
fflush(stdout);
#endif
// Other Variables:
MilanLongInt u = -1, v = -1, w = -1, i = 0;
MilanLongInt k = -1, adj1 = -1, adj2 = -1;
MilanLongInt k1 = -1, adj11 = -1, adj12 = -1;
MilanLongInt myCard = 0;
// Build the Ghost Vertex Set: Vg
map<MilanLongInt, MilanLongInt> Ghost2LocalMap; // Map each ghost vertex to a local vertex
vector<MilanLongInt> Counter; // Store the edge count for each ghost vertex
MilanLongInt numGhostVertices = 0, numGhostEdges = 0; // Number of Ghost vertices
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ")About to compute Ghost Vertices...";
fflush(stdout);
#endif
// Define Adjacency Lists for Ghost Vertices:
// cout<<"Building Ghost data structures ... \n\n";
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
// Mate array for ghost vertices:
vector<MilanLongInt> GMate; // Proportional to the number of ghost vertices
MilanLongInt S;
MilanLongInt privateMyCard = 0;
vector<MilanLongInt> PCumulative, PMessageBundle, PSizeInfoMessages;
vector<MPI_Request> SRequest; // Requests that are used for each send message
vector<MPI_Status> SStatus; // Status of sent messages, used in MPI_Wait
MilanLongInt MessageIndex = 0; // Pointer for current message
MilanInt BufferSize;
MilanLongInt *Buffer;
vector<MilanLongInt> privateQLocalVtx, privateQGhostVtx, privateQMsgType;
vector<MilanInt> privateQOwner;
vector<MilanLongInt> U, privateU;
initialize(NLVer, NLEdge, StartIndex,
EndIndex, &numGhostEdges,
&numGhostVertices, &S,
verLocInd, verLocPtr,
Ghost2LocalMap, Counter,
verGhostPtr, verGhostInd,
tempCounter, GMate,
Message, QLocalVtx,
QGhostVtx, QMsgType, QOwner,
candidateMate, U,
privateU,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
finishTime = MPI_Wtime();
*ph0_time = finishTime - startTime; // Time taken for Phase-0: Initialization
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////// INITIALIZATION /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Compute the Initial Matching Set:
/*
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B has been splitted from
* PARALLEL_PROCESS_EXPOSED_VERTEX_B in order to better parallelize
* the two.
* PARALLEL_COMPUTE_CANDIDATE_MATE_B is now totally parallel.
*/
PARALLEL_COMPUTE_CANDIDATE_MATE_B(NLVer,
verLocPtr,
verLocInd,
myRank,
edgeLocWeight,
candidateMate);
/*
* PARALLEL_PROCESS_EXPOSED_VERTEX_B
* TODO: write comment
*
* TODO: Test when it's actually more efficient to execute this code
* in parallel.
*/
PARALLEL_PROCESS_EXPOSED_VERTEX_B(NLVer,
candidateMate,
verLocInd,
verLocPtr,
StartIndex,
EndIndex,
Mate,
GMate,
Ghost2LocalMap,
edgeLocWeight,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
U,
privateU,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
tempCounter.clear(); // Do not need this any more
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
// TODO what would be the optimal UCHUNK
vector<MilanLongInt> UChunkBeingProcessed;
UChunkBeingProcessed.reserve(UCHUNK);
processMatchedVertices(NLVer,
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
/////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////// SEND BUNDLED MESSAGES /////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
sendBundledMessages(&numGhostEdges,
&BufferSize,
Buffer,
PCumulative,
PMessageBundle,
PSizeInfoMessages,
PCounter,
NumMessagesBundled,
&msgActual,
&MessageIndex,
numProcs,
myRank,
comm,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
SRequest,
SStatus);
///////////////////////// END OF SEND BUNDLED MESSAGES //////////////////////////////////
finishTime = MPI_Wtime();
*ph1_time = finishTime - startTime; // Time taken for Phase-1
*ph1_card = myCard; // Cardinality at the end of Phase-1
startTime = MPI_Wtime();
/////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////// MAIN LOOP //////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////
// Main While Loop:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Entering While(true) loop..";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
while (true)
{
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") Main loop" << endl;
fflush(stdout);
#endif
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MATCHED VERTICES //////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMatchedVerticesAndSendMessages(NLVer,
UChunkBeingProcessed,
U,
privateU,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&NumMessagesBundled,
&S,
verLocPtr,
verLocInd,
verDistance,
PCounter,
Counter,
myRank,
numProcs,
candidateMate,
GMate,
Mate,
Ghost2LocalMap,
edgeLocWeight,
QLocalVtx,
QGhostVtx,
QMsgType,
QOwner,
privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner,
comm,
&msgActual,
Message);
///////////////////////// END OF PROCESS MATCHED VERTICES /////////////////////////
//// BREAK IF NO MESSAGES EXPECTED /////////
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Deciding whether to break: S= " << S << endl;
#endif
if (S == 0)
{
#ifdef DEBUG_HANG_
cout << "\n(" << myRank << ") Breaking out" << endl;
fflush(stdout);
#endif
break;
}
///////////////////////////////////////////////////////////////////////////////////
/////////////////////////// PROCESS MESSAGES //////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
processMessages(NLVer,
Mate,
candidateMate,
Ghost2LocalMap,
GMate,
Counter,
StartIndex,
EndIndex,
&myCard,
&msgInd,
&msgActual,
edgeLocWeight,
verDistance,
verLocPtr,
k,
verLocInd,
numProcs,
myRank,
comm,
Message,
numGhostEdges,
u,
v,
&S,
U);
///////////////////////// END OF PROCESS MESSAGES /////////////////////////////////
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Finished Message processing phase: S= " << S;
fflush(stdout);
cout << "\n(" << myRank << ")** SENT : ACTUAL= " << msgActual;
fflush(stdout);
cout << "\n(" << myRank << ")** SENT : INDIVIDUAL= " << msgInd << endl;
fflush(stdout);
#endif
} // End of while (true)
clean(NLVer,
myRank,
MessageIndex,
SRequest,
SStatus,
BufferSize,
Buffer,
msgActual,
msgActualSent,
msgInd,
msgIndSent,
NumMessagesBundled,
msgPercent);
finishTime = MPI_Wtime();
*ph2_time = finishTime - startTime; // Time taken for Phase-2
*ph2_card = myCard; // Cardinality at the end of Phase-2
}
// End of algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMate
#endif
#endif

@ -0,0 +1,91 @@
#include "MatchBoxPC.h"
// TODO comment
void clean(MilanLongInt NLVer,
MilanInt myRank,
MilanLongInt MessageIndex,
vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus,
MilanInt BufferSize,
MilanLongInt *Buffer,
MilanLongInt msgActual,
MilanLongInt *msgActualSent,
MilanLongInt msgInd,
MilanLongInt *msgIndSent,
MilanLongInt NumMessagesBundled,
MilanReal *msgPercent)
{
// Cleanup Phase
#pragma omp parallel
{
#pragma omp master
{
#pragma omp task
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") Waitall= " << endl;
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
cout << "\n(" << myRank << ") Waitall " << endl;
fflush(stdout);
#endif
return;
MPI_Waitall(MessageIndex, &SRequest[0], &SStatus[0]);
// MPI_Buffer_attach(&Buffer, BufferSize); //Attach the Buffer
if (BufferSize > 0)
{
MPI_Buffer_detach(&Buffer, &BufferSize); // Detach the Buffer
free(Buffer); // Free the memory that was allocated
}
}
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")End of function to compute matching: " << endl;
fflush(stdout);
cout << "\n(" << myRank << ")myCardinality: " << myCard << endl;
fflush(stdout);
cout << "\n(" << myRank << ")Matching took " << finishTime - startTime << "seconds" << endl;
fflush(stdout);
cout << "\n(" << myRank << ")** Getting out of the matching function **" << endl;
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") Number of Ghost edges = " << numGhostEdges;
cout << "\n(" << myRank << ") Total number of potential message X 2 = " << numGhostEdges * 2;
cout << "\n(" << myRank << ") Number messages bundled = " << NumMessagesBundled;
cout << "\n(" << myRank << ") Total Individual Messages sent = " << msgInd;
if (msgInd > 0)
{
cout << "\n(" << myRank << ") Percentage of messages bundled = " << ((double)NumMessagesBundled / (double)(msgInd)) * 100.0 << "% \n";
}
fflush(stdout);
#endif
#pragma omp task
{
*msgActualSent = msgActual;
*msgIndSent = msgInd;
if (msgInd > 0)
{
*msgPercent = ((double)NumMessagesBundled / (double)(msgInd)) * 100.0;
}
else
{
*msgPercent = 0;
}
}
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") Done" << endl;
fflush(stdout);
#endif
}
}
}

@ -0,0 +1,78 @@
#include "MatchBoxPC.h"
/**
* Execute the research fr the Candidate Mate without controlling if the vertices are already matched.
* Returns the vertices with the highest weight
* @param adj1
* @param adj2
* @param verLocInd
* @param edgeLocWeight
* @return
*/
MilanLongInt firstComputeCandidateMate(MilanLongInt adj1,
MilanLongInt adj2,
MilanLongInt *verLocInd,
MilanReal *edgeLocWeight)
{
MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
int finalK;
for (int k = adj1; k < adj2; k++)
{
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k])))
{
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
finalK = k;
}
} // End of for loop
return finalK;
}
/**
* //TODO documentation
* @param adj1
* @param adj2
* @param edgeLocWeight
* @param k
* @param verLocInd
* @param StartIndex
* @param EndIndex
* @param GMate
* @param Mate
* @param Ghost2LocalMap
* @return
*/
MilanLongInt computeCandidateMate(MilanLongInt adj1,
MilanLongInt adj2,
MilanReal *edgeLocWeight,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap)
{
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
MilanInt w = -1;
MilanReal heaviestEdgeWt = MilanRealMin; // Assign the smallest Value possible first LDBL_MIN
for (k = adj1; k < adj2; k++)
{
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
continue;
if ((edgeLocWeight[k] > heaviestEdgeWt) ||
((edgeLocWeight[k] == heaviestEdgeWt) && (w < verLocInd[k])))
{
heaviestEdgeWt = edgeLocWeight[k];
w = verLocInd[k];
}
} // End of for loop
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
return w;
}

@ -80,9 +80,11 @@ class staticQueue
MilanLongInt squeueTail;
MilanLongInt NumNodes;
//FIXME I had to comment this piece of code in order to make everything work.
// why?
//Prevent Assignment and Pass by Value:
staticQueue(const staticQueue& src);
staticQueue& operator=(const staticQueue& rhs);
//staticQueue(const staticQueue& src);
//staticQueue& operator=(const staticQueue& rhs);
public:
//Constructors and Destructors

@ -0,0 +1,31 @@
#include "MatchBoxPC.h"
void extractUChunk(
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU)
{
UChunkBeingProcessed.clear();
#pragma omp critical(U)
{
if (U.empty() && !privateU.empty()) // If U is empty but there are nodes in private U
{
while (!privateU.empty())
UChunkBeingProcessed.push_back(privateU.back());
privateU.pop_back();
}
else
{
for (int i = 0; i < UCHUNK; i++)
{ // Pop the new nodes
if (U.empty())
break;
UChunkBeingProcessed.push_back(U.back());
U.pop_back();
}
}
} // End of critical U // End of critical U
}

@ -0,0 +1,29 @@
#include "MatchBoxPC.h"
/// Find the owner of a ghost node:
MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs)
{
MilanLongInt mStartInd = mVerDistance[myRank];
MilanInt Start = 0;
MilanInt End = numProcs;
MilanInt Current = 0;
while (Start <= End)
{
Current = (End + Start) / 2;
// CASE-1:
if (mVerDistance[Current] == vtxIndex) return Current;
else // CASE 2:
if (mVerDistance[Current] > vtxIndex)
End = Current - 1;
else // CASE 3:
Start = Current + 1;
} // End of While()
if (mVerDistance[Current] > vtxIndex)
return (Current - 1);
return Current;
} // End of findOwnerOfGhost()

@ -0,0 +1,304 @@
#include "MatchBoxPC.h"
void initialize(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt StartIndex, MilanLongInt EndIndex,
MilanLongInt *numGhostEdges,
MilanLongInt *numGhostVertices,
MilanLongInt *S,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &Counter,
vector<MilanLongInt> &verGhostPtr,
vector<MilanLongInt> &verGhostInd,
vector<MilanLongInt> &tempCounter,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Message,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
MilanLongInt *&candidateMate,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt insertMe = 0;
MilanLongInt adj1, adj2;
int i, v, k, w;
// index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready;
#pragma omp parallel private(insertMe, k, w, v, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(NUM_THREAD)
{
#pragma omp single
{
#ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime();
#endif
/*
* OMP Ghost2LocalInitialization
* This loop analyzes all the edges and when finds a ghost edge
* puts it in the Ghost2LocalMap.
* A critical region is needed when inserting data in the map.
*
* Despite the critical region it is still productive to
* parallelize this cycle because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority,
* circa 3.5% during the tests.
*/
#pragma omp task depend(out \
: *numGhostEdges, Counter, Ghost2LocalMap, insertMe, storedAlready, *numGhostVertices)
{
#pragma omp taskloop num_tasks(NUM_THREAD) reduction(+ \
: numGhostEdges[:1])
for (i = 0; i < NLEdge; i++)
{ // O(m) - Each edge stored twice
insertMe = verLocInd[i];
if ((insertMe < StartIndex) || (insertMe > EndIndex))
{ // Find a ghost
(*numGhostEdges)++;
#pragma omp critical
{
storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end())
{ // Has already been added
Counter[storedAlready->second]++; // Increment the counter
}
else
{ // Insert an entry for the ghost:
Ghost2LocalMap[insertMe] = *numGhostVertices; // Add a map entry
Counter.push_back(1); // Initialize the counter
(*numGhostVertices)++; // Increment the number of ghost vertices
} // End of else()
}
} // End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} // End of for(ghost vertices)
} // end of task depend
// *numGhostEdges = atomicNumGhostEdges;
#ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")NGhosts:" << *numGhostVertices << " GhostEdges: " << *numGhostEdges;
if (!Ghost2LocalMap.empty())
{
cout << "\n(" << myRank << ")Final Map : on process ";
cout << "\n(" << myRank << ")Key \t Value \t Counter \n";
fflush(stdout);
storedAlready = Ghost2LocalMap.begin();
do
{
cout << storedAlready->second << " - " << storedAlready->first << " : " << Counter[storedAlready->second] << endl;
fflush(stdout);
storedAlready++;
} while (storedAlready != Ghost2LocalMap.end());
}
#endif
#pragma omp task depend(out \
: verGhostPtr, tempCounter, verGhostInd, GMate) depend(in \
: *numGhostVertices, *numGhostEdges)
{
// Initialize adjacency Lists for Ghost Vertices:
try
{
verGhostPtr.reserve(*numGhostVertices + 1); // Pointer Vector
tempCounter.reserve(*numGhostVertices); // Pointer Vector
verGhostInd.reserve(*numGhostEdges); // Index Vector
GMate.reserve(*numGhostVertices); // Ghost Mate Vector
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
// Initialize the Vectors:
verGhostPtr.resize(*numGhostVertices + 1, 0); // Pointer Vector
tempCounter.resize(*numGhostVertices, 0); // Temporary Counter
verGhostInd.resize(*numGhostEdges, -1); // Index Vector
GMate.resize(*numGhostVertices, -1); // Temporary Counter
verGhostPtr[0] = 0; // The first value
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Ghost Vertex Pointer: ";
fflush(stdout);
#endif
} // End of task
#pragma omp task depend(out \
: verGhostPtr) depend(in \
: Counter, *numGhostVertices)
{
#ifdef TIME_TRACKER
double verGhostPtrInitialization = MPI_Wtime();
#endif
for (i = 0; i < *numGhostVertices; i++)
{ // O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_
cout << verGhostPtr[i] << "\t";
fflush(stdout);
#endif
}
#ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif
} // End of task
#ifdef PRINT_DEBUG_INFO_
if (*numGhostVertices > 0)
cout << verGhostPtr[*numGhostVertices] << "\n";
fflush(stdout);
#endif
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
/*
* OMP verGhostIndInitialization
*
* In this cycle the verGhostInd is initialized
* with the datas related to ghost edges.
* The check to see if a node is a ghost node is
* executed in paralle and when a ghost node
* is found a critical region is started.
*
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times, circa 3.5% of the times in the tests.
*/
#pragma omp task depend(in \
: insertMe, Ghost2LocalMap, tempCounter, verGhostPtr) depend(out \
: verGhostInd)
{
#pragma omp taskloop num_tasks(NUM_THREAD)
for (v = 0; v < NLVer; v++)
{
adj1 = verLocPtr[v]; // Vertex Pointer
adj2 = verLocPtr[v + 1];
for (k = adj1; k < adj2; k++)
{
w = verLocInd[k]; // Get the adjacent vertex
if ((w < StartIndex) || (w > EndIndex))
{ // Find a ghost
#pragma omp critical
{
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; // Where to insert
tempCounter[Ghost2LocalMap[w]]++; // Increment the counter
}
verGhostInd[insertMe] = v + StartIndex; // Add the adjacency
} // End of if((w < StartIndex) || (w > EndIndex))
} // End of for(k)
} // End of for (v)
} // end of tasklopp
#ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Ghost Vertex Index: ";
for (v = 0; v < *numGhostEdges; v++)
cout << verGhostInd[v] << "\t";
cout << endl;
fflush(stdout);
#endif
#pragma omp task depend(in \
: *numGhostEdges) depend(out \
: QLocalVtx, QGhostVtx, QMsgType, QOwner)
{
try
{
QLocalVtx.reserve(*numGhostEdges); // Local Vertex
QGhostVtx.reserve(*numGhostEdges); // Ghost Vertex
QMsgType.reserve(*numGhostEdges); // Message Type (Request/Failure)
QOwner.reserve(*numGhostEdges); // Owner of the ghost: COmpute once and use later
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
} // end of task
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Allocating CandidateMate.. ";
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") Setup Time :" << *ph0_time << endl;
fflush(stdout);
#endif
#pragma omp task depend(in \
: *numGhostVertices) depend(out \
: candidateMate, S, U, privateU, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner)
{
// Allocate Data Structures:
/*
* candidateMate was a vector and has been replaced with an array
* there is no point in using the vector (or maybe there is (???))
* so I replaced it with an array wich is slightly faster
*/
candidateMate = new MilanLongInt[NLVer + (*numGhostVertices)];
*S = (*numGhostVertices); // Initialize S with number of Ghost Vertices
/*
* Create the Queue Data Structure for the Dominating Set
*
* I had to declare the staticuQueue U before the parallel region
* to have it in the correct scope. Since we can't change the dimension
* of a staticQueue I had to destroy the previous object and instantiate
* a new one of the correct size.
*/
//new (&U) staticQueue(NLVer + (*numGhostVertices));
U.reserve(NLVer + (*numGhostVertices));
// Initialize the private vectors
privateQLocalVtx.reserve(*numGhostVertices);
privateQGhostVtx.reserve(*numGhostVertices);
privateQMsgType.reserve(*numGhostVertices);
privateQOwner.reserve(*numGhostVertices);
privateU.reserve(*numGhostVertices);
} // end of task
} // End of single region
} // End of parallel region
}

@ -0,0 +1,46 @@
#include "MatchBoxPC.h"
/**
* //TODO documentation
* @param k
* @param verLocInd
* @param StartIndex
* @param EndIndex
* @param GMate
* @param Mate
* @param Ghost2LocalMap
* @return
*/
bool isAlreadyMatched(MilanLongInt node,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap)
{
/*
#pragma omp critical(Mate)
{
if ((node < StartIndex) || (node > EndIndex)) { //Is it a ghost vertex?
result = GMate[Ghost2LocalMap[node]] >= 0;// Already matched
} else { //A local vertex
result = (Mate[node - StartIndex] >= 0); // Already matched
}
}
*/
MilanLongInt val;
if ((node < StartIndex) || (node > EndIndex)) // if ghost vertex
{
#pragma omp atomic read
val = GMate[Ghost2LocalMap[node]];
return val >= 0; // Already matched
}
// If not ghost vertex
#pragma omp atomic read
val = Mate[node - StartIndex];
return val >= 0; // Already matched
}

@ -0,0 +1,28 @@
#include "MatchBoxPC.h"
void PARALLEL_COMPUTE_CANDIDATE_MATE_B(MilanLongInt NLVer,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanInt myRank,
MilanReal *edgeLocWeight,
MilanLongInt *candidateMate)
{
MilanLongInt v = -1;
#pragma omp parallel private(v) default(shared) num_threads(NUM_THREAD)
{
#pragma omp for schedule(static)
for (v = 0; v < NLVer; v++)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout);
#endif
// Start: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
candidateMate[v] = firstComputeCandidateMate(verLocPtr[v], verLocPtr[v + 1], verLocInd, edgeLocWeight);
// End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
}
}
}

@ -0,0 +1,24 @@
#include "MatchBoxPC.h"
void PROCESS_CROSS_EDGE(MilanLongInt *edge,
MilanLongInt *S)
{
// Start: PARALLEL_PROCESS_CROSS_EDGE_B
MilanLongInt captureCounter;
#pragma omp atomic capture
captureCounter = --(*edge); // Decrement
//assert(captureCounter >= 0);
if (captureCounter == 0)
#pragma omp atomic
(*S)--; // Decrement S
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Decrementing S: Ghost vertex " << edge << " has received all its messages";
fflush(stdout);
#endif
// End: PARALLEL_PROCESS_CROSS_EDGE_B
}

@ -0,0 +1,205 @@
#include "MatchBoxPC.h"
void PARALLEL_PROCESS_EXPOSED_VERTEX_B(MilanLongInt NLVer,
MilanLongInt *candidateMate,
MilanLongInt *verLocInd,
MilanLongInt *verLocPtr,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *Mate,
vector<MilanLongInt> &GMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *S,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt v = -1, k = -1, w = -1, adj11 = 0, adj12 = 0, k1 = 0;
MilanInt ghostOwner = 0, option;
#pragma omp parallel private(option, k, w, v, k1, adj11, adj12, ghostOwner) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner) default(shared) \
num_threads(NUM_THREAD)
{
#pragma omp for reduction(+ \
: PCounter[:numProcs], myCard \
[:1], msgInd \
[:1], NumMessagesBundled \
[:1]) schedule(static)
for (v = 0; v < NLVer; v++)
{
option = -1;
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
k = candidateMate[v];
candidateMate[v] = verLocInd[k];
w = candidateMate[v];
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing: " << v + StartIndex << endl;
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v + StartIndex << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
#pragma omp critical(processExposed)
{
if (isAlreadyMatched(verLocInd[k], StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap))
{
w = computeCandidateMate(verLocPtr[v],
verLocPtr[v + 1],
edgeLocWeight, 0,
verLocInd,
StartIndex,
EndIndex,
GMate,
Mate,
Ghost2LocalMap);
candidateMate[v] = w;
}
if (w >= 0)
{
(*myCard)++;
if ((w < StartIndex) || (w > EndIndex))
{ // w is a ghost vertex
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v + StartIndex)
{
option = 1;
Mate[v] = w;
GMate[Ghost2LocalMap[w]] = v + StartIndex; // w is a Ghost
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == (v + StartIndex))
{
option = 3;
Mate[v] = w; // v is local
Mate[w - StartIndex] = v + StartIndex; // w is local
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ") ";
fflush(stdout);
#endif
} // End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
} // End of Else
} // End of second if
} // End critical processExposed
} // End of if(w >=0)
else
{
// This piece of code is executed a really small amount of times
adj11 = verLocPtr[v];
adj12 = verLocPtr[v + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
(*msgInd)++;
(*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
}
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
switch (option)
{
case -1:
break;
case 1:
privateU.push_back(v + StartIndex);
privateU.push_back(w);
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v + StartIndex << "," << w << ")";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
case 2:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message (291):";
cout << "\n(" << myRank << ")Local is: " << v + StartIndex << " Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
(*msgInd)++;
(*NumMessagesBundled)++;
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
privateQLocalVtx.push_back(v + StartIndex);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
default:
privateU.push_back(v + StartIndex);
privateU.push_back(w);
break;
}
} // End of for ( v=0; v < NLVer; v++ )
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of parallel region
}

@ -0,0 +1,309 @@
#include "MatchBoxPC.h"
void processMatchedVertices(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
int option;
MilanLongInt mateVal;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1])
{
while (!U.empty())
{
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
#endif
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
#endif
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0)
{
#pragma omp critical
{
if (candidateMate[v - StartIndex] == u)
{
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0,
verLocInd,
StartIndex,
EndIndex,
GMate,
Mate,
Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex])
option = 5; // u is local
} // End of critical
} // End of Else //A Ghost Vertex
switch (option)
{
case -1:
// No things to do
break;
case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2:
// Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
break;
case 4:
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
PCounter[ghostOwner]++;
(*NumMessagesBundled)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
break;
case 5:
default:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// assert(ghostOwner != -1);
// assert(ghostOwner != myRank);
(*NumMessagesBundled)++;
PCounter[ghostOwner]++;
(*msgInd)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
break;
} // End of switch
} // End of inner for
}
} // End of outer for
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
#pragma omp critical(U)
{
U.insert(U.end(), privateU.begin(), privateU.end());
}
privateU.clear();
#pragma omp critical(sendMessageTransfer)
{
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
}
privateQLocalVtx.clear();
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
} // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices,
omp_get_thread_num(),
myRank);
#endif
} // End of parallel region
}

@ -0,0 +1,315 @@
#include "MatchBoxPC.h"
void processMatchedVerticesAndSendMessages(
MilanLongInt NLVer,
vector<MilanLongInt> &UChunkBeingProcessed,
vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *NumMessagesBundled,
MilanLongInt *SPtr,
MilanLongInt *verLocPtr,
MilanLongInt *verLocInd,
MilanLongInt *verDistance,
MilanLongInt *PCounter,
vector<MilanLongInt> &Counter,
MilanInt myRank,
MilanInt numProcs,
MilanLongInt *candidateMate,
vector<MilanLongInt> &GMate,
MilanLongInt *Mate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
MilanReal *edgeLocWeight,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner,
MPI_Comm comm,
MilanLongInt *msgActual,
vector<MilanLongInt> &Message)
{
MilanLongInt initialSize = QLocalVtx.size();
MilanLongInt adj1, adj2, adj11, adj12, k, k1, v = -1, w = -1, ghostOwner;
int option;
MilanLongInt mateVal;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef COUNT_LOCAL_VERTEX
MilanLongInt localVertices = 0;
#endif
#pragma omp parallel private(k, w, v, k1, adj1, adj2, adj11, adj12, ghostOwner, option) \
firstprivate(Message, privateU, StartIndex, EndIndex, privateQLocalVtx, privateQGhostVtx, privateQMsgType, privateQOwner, UChunkBeingProcessed) default(shared) \
num_threads(NUM_THREAD) \
reduction(+ \
: msgInd[:1], PCounter \
[:numProcs], myCard \
[:1], NumMessagesBundled \
[:1], msgActual \
[:1])
{
while (!U.empty())
{
extractUChunk(UChunkBeingProcessed, U, privateU);
for (MilanLongInt u : UChunkBeingProcessed)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")u: " << u;
fflush(stdout);
#endif
if ((u >= StartIndex) && (u <= EndIndex))
{ // Process Only the Local Vertices
#ifdef COUNT_LOCAL_VERTEX
localVertices++;
#endif
// Get the Adjacency list for u
adj1 = verLocPtr[u - StartIndex]; // Pointer
adj2 = verLocPtr[u - StartIndex + 1];
for (k = adj1; k < adj2; k++)
{
option = -1;
v = verLocInd[k];
if ((v >= StartIndex) && (v <= EndIndex))
{ // If Local Vertex:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")v: " << v << " c(v)= " << candidateMate[v - StartIndex] << " Mate[v]: " << Mate[v];
fflush(stdout);
#endif
#pragma omp atomic read
mateVal = Mate[v - StartIndex];
// If the current vertex is pointing to a matched vertex and is not matched
if (mateVal < 0)
{
#pragma omp critical
{
if (candidateMate[v - StartIndex] == u)
{
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex],
verLocPtr[v - StartIndex + 1],
edgeLocWeight, 0,
verLocInd,
StartIndex,
EndIndex,
GMate,
Mate,
Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message:";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
#endif
option = 2;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
option = 1;
Mate[v - StartIndex] = w; // v is a local vertex
GMate[Ghost2LocalMap[w]] = v; // w is a ghost vertex
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
option = 3;
Mate[v - StartIndex] = w; // v is a local vertex
Mate[w - StartIndex] = v; // w is a local vertex
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
option = 4; // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of If (candidateMate[v-StartIndex] == u
} // End of task
} // mateval < 0
} // End of if ( (v >= StartIndex) && (v <= EndIndex) ) //If Local Vertex:
else
{ // Neighbor is a ghost vertex
#pragma omp critical
{
if (candidateMate[NLVer + Ghost2LocalMap[v]] == u)
candidateMate[NLVer + Ghost2LocalMap[v]] = -1;
if (v != Mate[u - StartIndex])
option = 5; // u is local
} // End of critical
} // End of Else //A Ghost Vertex
switch (option)
{
case -1:
// No things to do
break;
case 1:
// Found a dominating edge, it is a ghost and candidateMate[NLVer + Ghost2LocalMap[w]] == v
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") ";
fflush(stdout);
#endif
// Decrement the counter:
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], SPtr);
case 2:
// Found a dominating edge, it is a ghost
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = v; // LOCAL
// Message[1] = w; // GHOST
// Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(REQUEST);
privateQOwner.push_back(ghostOwner);
break;
case 3:
privateU.push_back(v);
privateU.push_back(w);
(*myCard)++;
break;
case 4:
// Could not find a dominating vertex
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs);
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = v; // LOCAL
// Message[1] = w; // GHOST
// Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(v);
privateQGhostVtx.push_back(w);
privateQMsgType.push_back(FAILURE);
privateQOwner.push_back(ghostOwner);
} // End of if(GHOST)
} // End of for loop
break;
case 5:
default:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a success message: ";
cout << "\n(" << myRank << ")Ghost is " << v << " Owner is: " << findOwnerOfGhost(v, verDistance, myRank, numProcs) << "\n";
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(v, verDistance, myRank, numProcs);
// Build the Message Packet:
// Message[0] = u; // LOCAL
// Message[1] = v; // GHOST
// Message[2] = SUCCESS; // TYPE
// Send a Request (Asynchronous)
// MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgActual)++;
(*msgInd)++;
privateQLocalVtx.push_back(u);
privateQGhostVtx.push_back(v);
privateQMsgType.push_back(SUCCESS);
privateQOwner.push_back(ghostOwner);
break;
} // End of switch
} // End of inner for
}
} // End of outer for
queuesTransfer(U, privateU, QLocalVtx,
QGhostVtx,
QMsgType, QOwner, privateQLocalVtx,
privateQGhostVtx,
privateQMsgType,
privateQOwner);
} // End of while ( !U.empty() )
#ifdef COUNT_LOCAL_VERTEX
printf("Count local vertexes: %ld for thread %d of processor %d\n",
localVertices,
omp_get_thread_num(),
myRank);
#endif
} // End of parallel region
// Send the messages
for (int i = initialSize; i < QOwner.size(); i++)
{
Message[0] = QLocalVtx[i];
Message[1] = QGhostVtx[i];
Message[2] = QMsgType[i];
ghostOwner = QOwner[i];
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
}
}

@ -0,0 +1,329 @@
#include "MatchBoxPC.h"
void processMessages(
MilanLongInt NLVer,
MilanLongInt *Mate,
MilanLongInt *candidateMate,
map<MilanLongInt, MilanLongInt> &Ghost2LocalMap,
vector<MilanLongInt> &GMate,
vector<MilanLongInt> &Counter,
MilanLongInt StartIndex,
MilanLongInt EndIndex,
MilanLongInt *myCard,
MilanLongInt *msgInd,
MilanLongInt *msgActual,
MilanReal *edgeLocWeight,
MilanLongInt *verDistance,
MilanLongInt *verLocPtr,
MilanLongInt k,
MilanLongInt *verLocInd,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &Message,
MilanLongInt numGhostEdges,
MilanLongInt u,
MilanLongInt v,
MilanLongInt *S,
vector<MilanLongInt> &U)
{
//#define PRINT_DEBUG_INFO_
MilanInt Sender;
MPI_Status computeStatus;
MilanLongInt bundleSize, w;
MilanLongInt adj11, adj12, k1;
MilanLongInt ghostOwner;
int error_codeC;
error_codeC = MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
char error_message[MPI_MAX_ERROR_STRING];
int message_length;
MilanLongInt message_type = 0;
// Buffer to receive bundled messages
// Maximum messages that can be received from any processor is
// twice the edge cut: REQUEST; REQUEST+(FAILURE/SUCCESS)
vector<MilanLongInt> ReceiveBuffer;
try
{
ReceiveBuffer.reserve(numGhostEdges * 2 * 3); // Three integers per cross edge
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
#ifdef PRINT_DEBUG_INFO_
cout
<< "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")About to begin Message processing phase ... *S=" << *S << endl;
fflush(stdout);
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << "=========================************===============================" << endl;
fflush(stdout);
fflush(stdout);
#endif
// BLOCKING RECEIVE:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << " Waiting for blocking receive..." << endl;
fflush(stdout);
fflush(stdout);
#endif
error_codeC = MPI_Recv(&Message[0], 3, TypeMap<MilanLongInt>(), MPI_ANY_SOURCE, ComputeTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS)
{
MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on Slave: " << error_message << "\n";
fflush(stdout);
}
Sender = computeStatus.MPI_SOURCE;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received message from Process " << Sender << " Type= " << Message[2] << endl;
fflush(stdout);
#endif
if (Message[2] == SIZEINFO)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received bundled message from Process " << Sender << " Size= " << Message[0] << endl;
fflush(stdout);
#endif
bundleSize = Message[0]; //#of integers in the message
// Build the Message Buffer:
if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle Before: " << endl;
for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ",";
cout << endl;
fflush(stdout);
#endif
// Receive the message
error_codeC = MPI_Recv(&ReceiveBuffer[0], bundleSize, TypeMap<MilanLongInt>(), Sender, BundleTag, comm, &computeStatus);
if (error_codeC != MPI_SUCCESS)
{
MPI_Error_string(error_codeC, error_message, &message_length);
cout << "\n*Error in call to MPI_Receive on processor " << myRank << " Error: " << error_message << "\n";
fflush(stdout);
}
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message Bundle After: " << endl;
for (int i = 0; i < bundleSize; i++)
cout << ReceiveBuffer[i] << ",";
cout << endl;
fflush(stdout);
#endif
}
else
{ // Just a single message:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Received regular message from Process " << Sender << " u= " << Message[0] << " v= " << Message[1] << endl;
fflush(stdout);
#endif
// Add the current message to Queue:
bundleSize = 3; //#of integers in the message
// Build the Message Buffer:
if (!ReceiveBuffer.empty())
ReceiveBuffer.clear(); // Empty it out first
ReceiveBuffer.resize(bundleSize, -1); // Initialize
ReceiveBuffer[0] = Message[0]; // u
ReceiveBuffer[1] = Message[1]; // v
ReceiveBuffer[2] = Message[2]; // message_type
}
#ifdef DEBUG_GHOST_
if ((v < StartIndex) || (v > EndIndex))
{
cout << "\n(" << myRank << ") From ReceiveBuffer: This should not happen: u= " << u << " v= " << v << " Type= " << message_type << " StartIndex " << StartIndex << " EndIndex " << EndIndex << endl;
fflush(stdout);
}
#endif
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Processing message: u= " << u << " v= " << v << " Type= " << message_type << endl;
fflush(stdout);
#endif
// Most of the time bundleSize == 3, thus, it's not worth parallelizing thi loop
for (MilanLongInt bundleCounter = 3; bundleCounter < bundleSize + 3; bundleCounter += 3)
{
u = ReceiveBuffer[bundleCounter - 3]; // GHOST
v = ReceiveBuffer[bundleCounter - 2]; // LOCAL
message_type = ReceiveBuffer[bundleCounter - 1]; // TYPE
// CASE I: REQUEST
if (message_type == REQUEST)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is REQUEST" << endl;
fflush(stdout);
#endif
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer))
{
cout << "\n(" << myRank << ") case 1 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1)
{ // Process only if not already matched (v is local)
candidateMate[NLVer + Ghost2LocalMap[u]] = v; // Set CandidateMate for the ghost
if (candidateMate[v - StartIndex] == u)
{
GMate[Ghost2LocalMap[u]] = v; // u is ghost
Mate[v - StartIndex] = u; // v is local
U.push_back(v);
U.push_back(u);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << u << ") " << endl;
fflush(stdout);
#endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
} // End of if ( candidateMate[v-StartIndex] == u )e
} // End of if ( Mate[v] == -1 )
} // End of REQUEST
else
{ // CASE II: SUCCESS
if (message_type == SUCCESS)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is SUCCESS" << endl;
fflush(stdout);
#endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process it again
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S);
#ifdef DEBUG_GHOST_
if ((v < 0) || (v < StartIndex) || ((v - StartIndex) > NLVer))
{
cout << "\n(" << myRank << ") case 2 Bad address " << v << " " << StartIndex << " " << v - StartIndex << " " << NLVer << endl;
fflush(stdout);
}
#endif
if (Mate[v - StartIndex] == -1)
{ // Process only if not already matched ( v is local)
if (candidateMate[v - StartIndex] == u)
{
// Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
w = computeCandidateMate(verLocPtr[v - StartIndex], verLocPtr[v - StartIndex + 1], edgeLocWeight, k, verLocInd, StartIndex, EndIndex, GMate, Mate, Ghost2LocalMap);
candidateMate[v - StartIndex] = w;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")" << v << " Points to: " << w << endl;
fflush(stdout);
#endif
// If found a dominating edge:
if (w >= 0)
{
if ((w < StartIndex) || (w > EndIndex))
{ // w is a ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = REQUEST; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a request message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
if (candidateMate[NLVer + Ghost2LocalMap[w]] == v)
{
Mate[v - StartIndex] = w; // v is local
GMate[Ghost2LocalMap[w]] = v; // w is ghost
U.push_back(v);
U.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout);
#endif
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[w]], S);
} // End of if CandidateMate[w] = v
} // End of if a Ghost Vertex
else
{ // w is a local vertex
if (candidateMate[w - StartIndex] == v)
{
Mate[v - StartIndex] = w; // v is local
Mate[w - StartIndex] = v; // w is local
// Q.push_back(u);
U.push_back(v);
U.push_back(w);
(*myCard)++;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")MATCH: (" << v << "," << w << ") " << endl;
fflush(stdout);
#endif
} // End of if(CandidateMate(w) = v
} // End of Else
} // End of if(w >=0)
else
{ // No dominant edge found
adj11 = verLocPtr[v - StartIndex];
adj12 = verLocPtr[v - StartIndex + 1];
for (k1 = adj11; k1 < adj12; k1++)
{
w = verLocInd[k1];
if ((w < StartIndex) || (w > EndIndex))
{ // A ghost
// Build the Message Packet:
Message[0] = v; // LOCAL
Message[1] = w; // GHOST
Message[2] = FAILURE; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending a failure message: ";
cout << "\n(" << myRank << ")Ghost is " << w << " Owner is: " << findOwnerOfGhost(w, verDistance, myRank, numProcs) << endl;
fflush(stdout);
#endif
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
//assert(ghostOwner != -1);
//assert(ghostOwner != myRank);
MPI_Bsend(&Message[0], 3, TypeMap<MilanLongInt>(), ghostOwner, ComputeTag, comm);
(*msgInd)++;
(*msgActual)++;
} // End of if(GHOST)
} // End of for loop
} // End of Else: w == -1
// End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
} // End of if ( candidateMate[v-StartIndex] == u )
} // End of if ( Mate[v] == -1 )
} // End of if ( message_type == SUCCESS )
else
{ // CASE III: FAILURE
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Message type is FAILURE" << endl;
fflush(stdout);
#endif
GMate[Ghost2LocalMap[u]] = EndIndex + 1; // Set a Dummy Mate to make sure that we do not (u is a ghost) process this anymore
PROCESS_CROSS_EDGE(&Counter[Ghost2LocalMap[u]], S); // Decrease the counter
} // End of else: CASE III
} // End of else: CASE I
}
return;
}

@ -0,0 +1,35 @@
#include "MatchBoxPC.h"
void queuesTransfer(vector<MilanLongInt> &U,
vector<MilanLongInt> &privateU,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MilanLongInt> &privateQLocalVtx,
vector<MilanLongInt> &privateQGhostVtx,
vector<MilanLongInt> &privateQMsgType,
vector<MilanInt> &privateQOwner)
{
#pragma omp critical(U)
{
U.insert(U.end(), privateU.begin(), privateU.end());
}
privateU.clear();
#pragma omp critical(sendMessageTransfer)
{
QLocalVtx.insert(QLocalVtx.end(), privateQLocalVtx.begin(), privateQLocalVtx.end());
QGhostVtx.insert(QGhostVtx.end(), privateQGhostVtx.begin(), privateQGhostVtx.end());
QMsgType.insert(QMsgType.end(), privateQMsgType.begin(), privateQMsgType.end());
QOwner.insert(QOwner.end(), privateQOwner.begin(), privateQOwner.end());
}
privateQLocalVtx.clear();
privateQGhostVtx.clear();
privateQMsgType.clear();
privateQOwner.clear();
}

@ -0,0 +1,210 @@
#include "MatchBoxPC.h"
void sendBundledMessages(MilanLongInt *numGhostEdges,
MilanInt *BufferSize,
MilanLongInt *Buffer,
vector<MilanLongInt> &PCumulative,
vector<MilanLongInt> &PMessageBundle,
vector<MilanLongInt> &PSizeInfoMessages,
MilanLongInt *PCounter,
MilanLongInt NumMessagesBundled,
MilanLongInt *msgActual,
MilanLongInt *msgInd,
MilanInt numProcs,
MilanInt myRank,
MPI_Comm comm,
vector<MilanLongInt> &QLocalVtx,
vector<MilanLongInt> &QGhostVtx,
vector<MilanLongInt> &QMsgType,
vector<MilanInt> &QOwner,
vector<MPI_Request> &SRequest,
vector<MPI_Status> &SStatus)
{
MilanLongInt myIndex = 0, numMessagesToSend;
MilanInt i = 0, OneMessageSize = 0;
#ifdef DEBUG_HANG_
if (myRank == 0)
cout << "\n(" << myRank << ") Send Bundles" << endl;
fflush(stdout);
#endif
#pragma omp parallel private(i) default(shared) num_threads(NUM_THREAD)
{
#pragma omp master
{
// Data structures for Bundled Messages:
#pragma omp task depend(inout \
: PCumulative, PMessageBundle, PSizeInfoMessages) depend(in \
: NumMessagesBundled, numProcs)
{try {
PMessageBundle.reserve(NumMessagesBundled * 3); // Three integers per message
PCumulative.reserve(numProcs + 1); // Similar to Row Pointer vector in CSR data structure
PSizeInfoMessages.reserve(numProcs * 3); // Buffer to hold the Size info message packets
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesMessageBundling: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
PMessageBundle.resize(NumMessagesBundled * 3, -1); // Initialize
PCumulative.resize(numProcs + 1, 0); // Only initialize the counter variable
PSizeInfoMessages.resize(numProcs * 3, 0);
}
#pragma omp task depend(inout \
: PCumulative) depend(in \
: PCounter)
{
for (i = 0; i < numProcs; i++)
PCumulative[i + 1] = PCumulative[i] + PCounter[i];
}
#pragma omp task depend(inout \
: PCounter)
{
// Reuse PCounter to keep track of how many messages were inserted:
for (MilanInt i = 0; i < numProcs; i++) // Changed by Fabio to be an integer, addresses needs to be integers!
PCounter[i] = 0;
}
// Build the Message Bundle packet:
#pragma omp task depend(in \
: PCounter, QLocalVtx, QGhostVtx, QMsgType, QOwner, PMessageBundle, PCumulative) depend(out \
: myIndex, PMessageBundle, PCounter)
{
for (i = 0; i < NumMessagesBundled; i++)
{
myIndex = (PCumulative[QOwner[i]] + PCounter[QOwner[i]]) * 3;
PMessageBundle[myIndex + 0] = QLocalVtx[i];
PMessageBundle[myIndex + 1] = QGhostVtx[i];
PMessageBundle[myIndex + 2] = QMsgType[i];
PCounter[QOwner[i]]++;
}
}
// Send the Bundled Messages: Use ISend
#pragma omp task depend(out \
: SRequest, SStatus)
{
try
{
SRequest.reserve(numProcs * 2); // At most two messages per processor
SStatus.reserve(numProcs * 2); // At most two messages per processor
}
catch (length_error)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearchImmediateSend: \n";
cout << "Not enough memory to allocate the internal variables \n";
exit(1);
}
}
// Send the Messages
#pragma omp task depend(inout \
: SRequest, PSizeInfoMessages, PCumulative) depend(out \
: *msgActual, *msgInd)
{
for (i = 0; i < numProcs; i++)
{ // Changed by Fabio to be an integer, addresses needs to be integers!
if (i == myRank) // Do not send anything to yourself
continue;
// Send the Message with information about the size of next message:
// Build the Message Packet:
PSizeInfoMessages[i * 3 + 0] = (PCumulative[i + 1] - PCumulative[i]) * 3; // # of integers in the next message
PSizeInfoMessages[i * 3 + 1] = -1; // Dummy packet
PSizeInfoMessages[i * 3 + 2] = SIZEINFO; // TYPE
// Send a Request (Asynchronous)
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Sending bundled message to process " << i << " size: " << PSizeInfoMessages[i * 3 + 0] << endl;
fflush(stdout);
#endif
if (PSizeInfoMessages[i * 3 + 0] > 0)
{ // Send only if it is a nonempty packet
MPI_Isend(&PSizeInfoMessages[i * 3 + 0], 3, TypeMap<MilanLongInt>(), i, ComputeTag, comm,
&SRequest[(*msgInd)]);
(*msgActual)++;
(*msgInd)++;
// Now Send the message with the data packet:
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")SendiFFng Bundle to : " << i << endl;
for (k = (PCumulative[i] * 3); k < (PCumulative[i] * 3 + PSizeInfoMessages[i * 3 + 0]); k++)
cout << PMessageBundle[k] << ",";
cout << endl;
fflush(stdout);
#endif
MPI_Isend(&PMessageBundle[PCumulative[i] * 3], PSizeInfoMessages[i * 3 + 0],
TypeMap<MilanLongInt>(), i, BundleTag, comm, &SRequest[(*msgInd)]);
(*msgInd)++;
} // End of if size > 0
}
}
#pragma omp task depend(inout \
: PCumulative, QLocalVtx, QGhostVtx, QMsgType, QOwner)
{
// Free up temporary memory:
PCumulative.clear();
QLocalVtx.clear();
QGhostVtx.clear();
QMsgType.clear();
QOwner.clear();
}
#pragma omp task depend(inout : OneMessageSize, *BufferSize) depend(out : numMessagesToSend) depend(in : *numGhostEdges)
{
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
cout << "\n(" << myRank << ")Total number of potential message X 2 = " << *numGhostEdges * 2;
cout << "\n(" << myRank << ")Number messages already sent in bundles = " << NumMessagesBundled;
if (*numGhostEdges > 0)
{
cout << "\n(" << myRank << ")Percentage of total = " << ((double)NumMessagesBundled / (double)(*numGhostEdges * 2)) * 100.0 << "% \n";
}
fflush(stdout);
#endif
// Allocate memory for MPI Send messages:
/* WILL COME BACK HERE - NO NEED TO STORE ALL THIS MEMORY !! */
OneMessageSize = 0;
MPI_Pack_size(3, TypeMap<MilanLongInt>(), comm, &OneMessageSize); // Size of one message packet
// How many messages to send?
// Potentially three kinds of messages will be sent/received:
// Request, Success, Failure.
// But only two will be sent from a given processor.
// Substract the number of messages that have already been sent as bundled messages:
numMessagesToSend = (*numGhostEdges) * 2 - NumMessagesBundled;
*BufferSize = (OneMessageSize + MPI_BSEND_OVERHEAD) * numMessagesToSend;
}
#pragma omp task depend(out : Buffer) depend(in : *BufferSize)
{
Buffer = 0;
#ifdef PRINT_DEBUG_INFO_
cout << "\n(" << myRank << ")Size of One Message from PACK= " << OneMessageSize;
cout << "\n(" << myRank << ")Size of Message overhead = " << MPI_BSEND_OVERHEAD;
cout << "\n(" << myRank << ")Number of Ghost edges = " << *numGhostEdges;
cout << "\n(" << myRank << ")Number of remaining message = " << numMessagesToSend;
cout << "\n(" << myRank << ")BufferSize = " << (*BufferSize);
cout << "\n(" << myRank << ")Attaching Buffer on.. ";
fflush(stdout);
#endif
if ((*BufferSize) > 0)
{
Buffer = (MilanLongInt *)malloc((*BufferSize)); // Allocate memory
if (Buffer == 0)
{
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
cout << "Not enough memory to allocate for send buffer on process " << myRank << "\n";
exit(1);
}
MPI_Buffer_attach(Buffer, *BufferSize); // Attach the Buffer
}
}
}
}
}

@ -0,0 +1 @@
!<arch>

@ -0,0 +1,25 @@
cd amgprec/impl/aggregator/
rm MatchBoxPC.o
rm sendBundledMessages.o
rm initialize.o
rm extractUChunk.o
rm isAlreadyMatched.o
rm findOwnerOfGhost.o
rm computeCandidateMate.o
rm parallelComputeCandidateMateB.o
rm processMatchedVertices.o
rm processCrossEdge.o
rm queueTransfer.o
rm processMessages.o
rm processExposedVertex.o
rm algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o
rm algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
cd ../../../
make all
cd samples/advanced/pdegen
make amg_d_pde3d
cd runs
mpirun -np 4 amg_d_pde3d amg_pde3d.inp

@ -3,7 +3,7 @@ AMGINCDIR=$(AMGDIR)/include
include $(AMGINCDIR)/Make.inc.amg4psblas
AMGMODDIR=$(AMGDIR)/modules
AMGLIBDIR=$(AMGDIR)/lib
AMG_LIBS=-L$(AMGLIBDIR) -lpsb_krylov -lamg_prec -lpsb_prec
AMG_LIBS=-L$(AMGLIBDIR) -lpsb_krylov -lamg_prec -lpsb_prec -llapack -lblas
FINCLUDES=$(FMFLAG). $(FMFLAG)$(AMGMODDIR) $(FMFLAG)$(AMGINCDIR) $(PSBLAS_INCLUDES) $(FIFLAG).
LINKOPT=

@ -1,6 +1,6 @@
%%%%%%%%%%% General arguments % Lines starting with % are ignored.
CSR ! Storage format CSR COO JAD
0080 ! IDIM; domain size. Linear system size is IDIM**3
0020 ! IDIM; domain size. Linear system size is IDIM**3
CONST ! PDECOEFF: CONST, EXP, GAUSS Coefficients of the PDE
BICGSTAB ! Iterative method: BiCGSTAB BiCGSTABL BiCG CG CGS FCG GCR RGMRES
2 ! ISTOPC

Loading…
Cancel
Save