The OMP and MPI version is now separated in two different files

omp-walther
StefanoPetrilli 3 years ago
parent 0a8debe43a
commit 76e04ee997

@ -1,5 +1,4 @@
AMG4PSBLAS
AMG4PSBLAS
Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.7) Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.7)
Salvatore Filippone (University of Rome Tor Vergata and IAC-CNR) Salvatore Filippone (University of Rome Tor Vergata and IAC-CNR)

@ -62,7 +62,8 @@ amg_s_parmatch_smth_bld.o \
amg_s_parmatch_spmm_bld_inner.o amg_s_parmatch_spmm_bld_inner.o
MPCOBJS=MatchBoxPC.o \ MPCOBJS=MatchBoxPC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
OBJS = $(FOBJS) $(MPCOBJS) OBJS = $(FOBJS) $(MPCOBJS)

@ -66,22 +66,35 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif #endif
#define TIME_TRACKER #define TIME_TRACKER
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double tmr = MPI_Wtime(); double tmr = MPI_Wtime();
#endif #endif
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, #define OMP
#ifdef OMP
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight, verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate, verDistance, Mate,
myRank, numProcs, C_comm, myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent, msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time, ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card ); ph1_card, ph2_card );
#else
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate,
myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card );
#endif
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
tmr = MPI_Wtime() - tmr; tmr = MPI_Wtime() - tmr;
fprintf(stderr, "Elaboration time: %f for $ld\n", tmr, NLEdge); fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer);
#endif #endif
#endif #endif

@ -152,6 +152,17 @@ extern "C" {
inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs); MilanInt myRank, MilanInt numProcs);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP
(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC
( (
MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt NLVer, MilanLongInt NLEdge,

@ -1,6 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#include <omp.h>
#include <stdio.h>
// *********************************************************************** // ***********************************************************************
// //
// MatchboxP: A C++ library for approximate weighted matching // MatchboxP: A C++ library for approximate weighted matching
@ -94,21 +92,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
MilanReal* msgPercent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { MilanLongInt* ph1_card, MilanLongInt* ph2_card ) {
/*
* verDistance: it's a vector long as the number of processors.
* verDistance[i] contains the first node index of the i-th processor
* verDistance[i + 1] contains the last node index of the i-th processor
* NLVer: number of elements in the LocPtr
* NLEdge: number of edges assigned to the current processor
*
* Contains the portion of matrix assigned to the processor in
* Yale notation
* verLocInd: contains the positions on row of the matrix
* verLocPtr: i-th value is the position of the first element on the i-th row and
* i+1-th value is the position of the first element on the i+1-th row
*/
#if !defined(SERIAL_MPI) #if !defined(SERIAL_MPI)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout); cout<<"\n("<<myRank<<")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout);
@ -124,7 +107,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
//inputSubGraph.getStartEndIndices(StartIndex, EndIndex); //inputSubGraph.getStartEndIndices(StartIndex, EndIndex);
MilanLongInt StartIndex = verDistance[myRank]; //The starting vertex owned by the current rank MilanLongInt StartIndex = verDistance[myRank]; //The starting vertex owned by the current rank
//MilanLongInt EndIndex = verDistance[myRank+1]; //The ending vertex owned by the current rank //MilanLongInt EndIndex = verDistance[myRank+1]; //The ending vertex owned by the current rank
MilanLongInt EndIndex = verDistance[myRank + 1] - 1; //The ending vertex owned by the current rank MilanLongInt EndIndex = verDistance[myRank+1]-1; //The ending vertex owned by the current rank
MPI_Status computeStatus; MPI_Status computeStatus;
const int ComputeTag = 7; //Predefined tag const int ComputeTag = 7; //Predefined tag
@ -135,8 +118,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
int message_length; int message_length;
//MilanLongInt NLVer=0, NLEdge=0, StartIndex=0, EndIndex=0; //MilanLongInt NLVer=0, NLEdge=0, StartIndex=0, EndIndex=0;
MilanLongInt msgActual = 0, msgInd = 0; MilanLongInt msgActual=0, msgInd=0;
MilanReal heaviestEdgeWt = 0.0f; //Assumes positive weight MilanReal heaviestEdgeWt=0.0f; //Assumes positive weight
MilanReal startTime, finishTime; MilanReal startTime, finishTime;
//MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer //MilanReal Precision = MPI_Wtick(); //Get the precision of the MPI Timer
startTime = MPI_Wtime(); startTime = MPI_Wtime();
@ -150,18 +133,18 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
cout<<"\n("<<myRank<<")StartIndex: "<<StartIndex<<" EndIndex: "<<EndIndex; fflush(stdout); cout<<"\n("<<myRank<<")StartIndex: "<<StartIndex<<" EndIndex: "<<EndIndex; fflush(stdout);
#endif #endif
//Other Variables: //Other Variables:
MilanLongInt u = -1, v = -1, w = -1, i = 0; MilanLongInt u=-1, v=-1, w=-1, i=0;
MilanLongInt k = -1, adj1 = -1, adj2 = -1; MilanLongInt k=-1, adj1=-1, adj2=-1;
MilanLongInt k1 = -1, adj11 = -1, adj12 = -1; MilanLongInt k1=-1, adj11=-1, adj12=-1;
MilanLongInt myCard = 0; MilanLongInt myCard = 0;
MilanInt Sender = 0; // This is the rank of the sending nodes, it has to be an integer! Fabio MilanInt Sender=0; // This is the rank of the sending nodes, it has to be an integer! Fabio
//Build the Ghost Vertex Set: Vg //Build the Ghost Vertex Set: Vg
map <MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex map<MilanLongInt, MilanLongInt> Ghost2LocalMap; //Map each ghost vertex to a local vertex
// index that starts with zero to |Vg| - 1 // index that starts with zero to |Vg| - 1
map<MilanLongInt, MilanLongInt>::iterator storedAlready; map<MilanLongInt, MilanLongInt>::iterator storedAlready;
vector <MilanLongInt> Counter; //Store the edge count for each ghost vertex vector<MilanLongInt> Counter; //Store the edge count for each ghost vertex
MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe = 0; //Number of Ghost vertices MilanLongInt numGhostVertices = 0, numGhostEdges = 0, insertMe=0; //Number of Ghost vertices
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
#endif #endif
@ -169,160 +152,71 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
#endif #endif
/* for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice
* OMP Ghost2LocalInitialization insertMe = verLocInd[i];
* The cycle analyzes all the edges and when finds a ghost edge //cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
* puts it in the Ghost2LocalMap. if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost
* A critical region is needed when inserting data in the map. storedAlready = Ghost2LocalMap.find( insertMe );
* if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added
* Despite the critical region it is still productive to //cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl;
* parallelize this for because the critical region is exeuted Counter[storedAlready->second]++; //Increment the counter
* only when a ghost edge is found and ghost edges are a minority. numGhostEdges++;
*/ } else { //Insert an entry for the ghost:
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl;
//Define Adjacency Lists for Ghost Vertices: Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry
//cout<<"Building Ghost data structures ... \n\n"; Counter.push_back(1); //Initialize the counter
vector <MilanLongInt> verGhostPtr, verGhostInd, tempCounter; numGhostEdges++;
//Mate array for ghost vertices: numGhostVertices++; //Increment the number of ghost vertices
vector <MilanLongInt> GMate; //Proportional to the number of ghost vertices } //End of else()
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
#ifdef TIME_TRACKER } //End of for(ghost vertices)
double Ghost2LocalInitialization = MPI_Wtime();
#endif
//#define OMP
#ifdef OMP
#pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
{
#endif
//printf("Id %d\n", omp_get_thread_num());
#ifdef OMP
#pragma omp for
#endif
for (i = 0; i < NLEdge; i++) { //O(m) - Each edge stored twice
insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
if ((insertMe < StartIndex) || (insertMe > EndIndex)) { //Find a ghost
#ifdef OMP
#pragma omp critical
{
#endif
numGhostEdges++;
storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end()) { //Has already been added
//cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl;
Counter[storedAlready->second]++; //Increment the counter
} else { //Insert an entry for the ghost:
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl;
Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry
Counter.push_back(1); //Initialize the counter
numGhostVertices++; //Increment the number of ghost vertices
} //End of else()
#ifdef OMP
}
#endif
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices)
#ifdef OMP
#pragma omp single
{
#endif
#ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges; cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges;
if (!Ghost2LocalMap.empty()) { if (!Ghost2LocalMap.empty()) {
cout<<"\n("<<myRank<<")Final Map : on process "; cout<<"\n("<<myRank<<")Final Map : on process ";
cout<<"\n("<<myRank<<")Key \t Value \t Counter \n"; fflush(stdout); cout<<"\n("<<myRank<<")Key \t Value \t Counter \n"; fflush(stdout);
storedAlready = Ghost2LocalMap.begin(); storedAlready = Ghost2LocalMap.begin();
do { do {
cout<<storedAlready->second<<" - "<<storedAlready->first<<" : "<<Counter[storedAlready->second]<<endl; cout<<storedAlready->second<<" - "<<storedAlready->first<<" : "<<Counter[storedAlready->second]<<endl;
fflush(stdout); fflush(stdout);
storedAlready++; storedAlready++;
} while ( storedAlready != Ghost2LocalMap.end() ); } while ( storedAlready != Ghost2LocalMap.end() );
} }
#endif #endif
//Build Adjacency Lists for Ghost Vertices:
//Initialize adjacency Lists for Ghost Vertices: //cout<<"Building Ghost data structures ... \n\n";
try { vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
verGhostPtr.reserve(numGhostVertices + 1); //Pointer Vector //Mate array for ghost vertices:
tempCounter.reserve(numGhostVertices); //Pointer Vector vector<MilanLongInt> GMate; //Proportional to the number of ghost vertices
verGhostInd.reserve(numGhostEdges); //Index Vector try {
GMate.reserve(numGhostVertices); //Ghost Mate Vector verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector
} catch (length_error) { tempCounter.reserve(numGhostVertices); //Pointer Vector
cout << "Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n"; verGhostInd.reserve(numGhostEdges); //Index Vector
cout << "Not enough memory to allocate the internal variables \n"; GMate.reserve(numGhostVertices); //Ghost Mate Vector
exit(1); } catch ( length_error ) {
} cout<<"Error in function algoDistEdgeApproxDominatingEdgesLinearSearch: \n";
//Initialize the Vectors: cout<<"Not enough memory to allocate the internal variables \n";
verGhostPtr.resize(numGhostVertices + 1, 0); //Pointer Vector exit(1);
tempCounter.resize(numGhostVertices, 0); //Temporary Counter }
verGhostInd.resize(numGhostEdges, -1); //Index Vector //Initialize the Vectors:
GMate.resize(numGhostVertices, -1); //Temporary Counter verGhostPtr.resize(numGhostVertices+1, 0); //Pointer Vector
verGhostPtr[0] = 0; //The first value tempCounter.resize(numGhostVertices, 0); //Temporary Counter
verGhostInd.resize(numGhostEdges, -1); //Index Vector
GMate.resize(numGhostVertices, -1); //Temporary Counter
verGhostPtr[0] = 0; //The first value
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout); cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout);
#endif
#ifdef TIME_TRACKER
double verGhostPtrInitialization = MPI_Wtime();
#endif
#ifdef OMP
}
#endif
/*
* OMP verGhostPtrInitialization
*
*/
#ifdef OMP
#pragma omp for nowait
#endif #endif
for (i = 0; i < numGhostVertices; i++) { //O(|Ghost Vertices|) for ( i=0; i<numGhostVertices; i++ ) { //O(|Ghost Vertices|)
verGhostPtr[i + 1] = verGhostPtr[i] + Counter[i]; verGhostPtr[i+1] = verGhostPtr[i] + Counter[i];
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<verGhostPtr[i]<<"\t"; fflush(stdout); cout<<verGhostPtr[i]<<"\t"; fflush(stdout);
#endif
}
#ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif #endif
}
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
if ( numGhostVertices > 0 ) if ( numGhostVertices > 0 )
cout<<verGhostPtr[numGhostVertices]<<"\n"; cout<<verGhostPtr[numGhostVertices]<<"\n";
fflush(stdout); fflush(stdout);
#endif
/*
* OMP verGhostIndInitialization
*
* In this cycle the verGhostInd is initialized
* with the datas related to ghost edges.
* The check to see if a node is a ghost node is
* executed in paralle and when a ghost node
* is found a critical region is started.
*
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times.
*/
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
#ifdef OMP
#pragma omp for
#endif #endif
for ( v=0; v < NLVer; v++ ) { for ( v=0; v < NLVer; v++ ) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
@ -330,29 +224,13 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
for( k = adj1; k < adj2; k++ ) { for( k = adj1; k < adj2; k++ ) {
w = verLocInd[k]; //Get the adjacent vertex w = verLocInd[k]; //Get the adjacent vertex
if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost
#ifdef OMP insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
#pragma omp critical verGhostInd[insertMe] = v+StartIndex; //Add the adjacency
{ tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
#endif
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
#ifdef OMP
}
#endif
} //End of if((w < StartIndex) || (w > EndIndex)) } //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } //End of for(k)
} //End of for (v) } //End of for (v)
tempCounter.clear(); //Do not need this any more tempCounter.clear(); //Do not need this any more
#ifdef OMP
} //end of parallel region
#endif
#ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Index: "; cout<<"\n("<<myRank<<")Ghost Vertex Index: ";
for ( v=0; v < numGhostEdges; v++ ) for ( v=0; v < numGhostEdges; v++ )

@ -2,6 +2,6 @@ make all
cd samples/advanced/pdegen cd samples/advanced/pdegen
make amg_d_pde3d make amg_d_pde3d
cd runs cd runs
mpirun -np 2 amg_d_pde3d amg_pde3d.inp mpirun -np 4 amg_d_pde3d amg_pde3d.inp

Loading…
Cancel
Save