The OMP and MPI version is now separated in two different files

omp-walther
StefanoPetrilli 3 years ago
parent 0a8debe43a
commit 76e04ee997

@ -1,4 +1,3 @@
AMG4PSBLAS AMG4PSBLAS
Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.7) Algebraic Multigrid Package based on PSBLAS (Parallel Sparse BLAS version 3.7)

@ -62,7 +62,8 @@ amg_s_parmatch_smth_bld.o \
amg_s_parmatch_spmm_bld_inner.o amg_s_parmatch_spmm_bld_inner.o
MPCOBJS=MatchBoxPC.o \ MPCOBJS=MatchBoxPC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC.o \
algoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP.o
OBJS = $(FOBJS) $(MPCOBJS) OBJS = $(FOBJS) $(MPCOBJS)

@ -66,11 +66,22 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif #endif
#define TIME_TRACKER #define TIME_TRACKER
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double tmr = MPI_Wtime(); double tmr = MPI_Wtime();
#endif #endif
#define OMP
#ifdef OMP
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate,
myRank, numProcs, C_comm,
msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card );
#else
dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge, dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(NLVer, NLEdge,
verLocPtr, verLocInd, edgeLocWeight, verLocPtr, verLocInd, edgeLocWeight,
verDistance, Mate, verDistance, Mate,
@ -78,10 +89,12 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
msgIndSent, msgActualSent, msgPercent, msgIndSent, msgActualSent, msgPercent,
ph0_time, ph1_time, ph2_time, ph0_time, ph1_time, ph2_time,
ph1_card, ph2_card ); ph1_card, ph2_card );
#endif
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
tmr = MPI_Wtime() - tmr; tmr = MPI_Wtime() - tmr;
fprintf(stderr, "Elaboration time: %f for $ld\n", tmr, NLEdge); fprintf(stderr, "Elaboration time: %f for %ld nodes\n", tmr, NLVer);
#endif #endif
#endif #endif

@ -152,6 +152,17 @@ extern "C" {
inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance, inline MilanInt findOwnerOfGhost(MilanLongInt vtxIndex, MilanLongInt *mVerDistance,
MilanInt myRank, MilanInt numProcs); MilanInt myRank, MilanInt numProcs);
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP
(
MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* verLocPtr, MilanLongInt* verLocInd, MilanReal* edgeLocWeight,
MilanLongInt* verDistance,
MilanLongInt* Mate,
MilanInt myRank, MilanInt numProcs, MPI_Comm comm,
MilanLongInt* msgIndSent, MilanLongInt* msgActualSent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card );
void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC
( (
MilanLongInt NLVer, MilanLongInt NLEdge, MilanLongInt NLVer, MilanLongInt NLEdge,

@ -1,6 +1,4 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#include <omp.h>
#include <stdio.h>
// *********************************************************************** // ***********************************************************************
// //
// MatchboxP: A C++ library for approximate weighted matching // MatchboxP: A C++ library for approximate weighted matching
@ -94,21 +92,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
MilanReal* msgPercent, MilanReal* msgPercent,
MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time, MilanReal* ph0_time, MilanReal* ph1_time, MilanReal* ph2_time,
MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { MilanLongInt* ph1_card, MilanLongInt* ph2_card ) {
/*
* verDistance: it's a vector long as the number of processors.
* verDistance[i] contains the first node index of the i-th processor
* verDistance[i + 1] contains the last node index of the i-th processor
* NLVer: number of elements in the LocPtr
* NLEdge: number of edges assigned to the current processor
*
* Contains the portion of matrix assigned to the processor in
* Yale notation
* verLocInd: contains the positions on row of the matrix
* verLocPtr: i-th value is the position of the first element on the i-th row and
* i+1-th value is the position of the first element on the i+1-th row
*/
#if !defined(SERIAL_MPI) #if !defined(SERIAL_MPI)
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout); cout<<"\n("<<myRank<<")Within algoEdgeApproxDominatingEdgesLinearSearchMessageBundling()"; fflush(stdout);
@ -169,72 +152,24 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
#endif #endif
/*
* OMP Ghost2LocalInitialization
* The cycle analyzes all the edges and when finds a ghost edge
* puts it in the Ghost2LocalMap.
* A critical region is needed when inserting data in the map.
*
* Despite the critical region it is still productive to
* parallelize this for because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority.
*/
//Define Adjacency Lists for Ghost Vertices:
//cout<<"Building Ghost data structures ... \n\n";
vector <MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
//Mate array for ghost vertices:
vector <MilanLongInt> GMate; //Proportional to the number of ghost vertices
#ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime();
#endif
//#define OMP
#ifdef OMP
#pragma omp parallel private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared) num_threads(4)
{
#endif
//printf("Id %d\n", omp_get_thread_num());
#ifdef OMP
#pragma omp for
#endif
for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice
insertMe = verLocInd[i]; insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl; //cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost
#ifdef OMP
#pragma omp critical
{
#endif
numGhostEdges++;
storedAlready = Ghost2LocalMap.find( insertMe ); storedAlready = Ghost2LocalMap.find( insertMe );
if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added if ( storedAlready != Ghost2LocalMap.end() ) { //Has already been added
//cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl; //cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl;
Counter[storedAlready->second]++; //Increment the counter Counter[storedAlready->second]++; //Increment the counter
numGhostEdges++;
} else { //Insert an entry for the ghost: } else { //Insert an entry for the ghost:
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl; //cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl;
Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry
Counter.push_back(1); //Initialize the counter Counter.push_back(1); //Initialize the counter
numGhostEdges++;
numGhostVertices++; //Increment the number of ghost vertices numGhostVertices++; //Increment the number of ghost vertices
} //End of else() } //End of else()
#ifdef OMP
}
#endif
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) } //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices) } //End of for(ghost vertices)
#ifdef OMP
#pragma omp single
{
#endif
#ifdef TIME_TRACKER
Ghost2LocalInitialization = MPI_Wtime() - Ghost2LocalInitialization;
fprintf(stderr, "Ghost2LocalInitialization time: %f\n", Ghost2LocalInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges; cout<<"\n("<<myRank<<")NGhosts:" << numGhostVertices << " GhostEdges: "<<numGhostEdges;
if (!Ghost2LocalMap.empty()) { if (!Ghost2LocalMap.empty()) {
@ -248,8 +183,11 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
} while ( storedAlready != Ghost2LocalMap.end() ); } while ( storedAlready != Ghost2LocalMap.end() );
} }
#endif #endif
//Build Adjacency Lists for Ghost Vertices:
//Initialize adjacency Lists for Ghost Vertices: //cout<<"Building Ghost data structures ... \n\n";
vector<MilanLongInt> verGhostPtr, verGhostInd, tempCounter;
//Mate array for ghost vertices:
vector<MilanLongInt> GMate; //Proportional to the number of ghost vertices
try { try {
verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector verGhostPtr.reserve(numGhostVertices+1); //Pointer Vector
tempCounter.reserve(numGhostVertices); //Pointer Vector tempCounter.reserve(numGhostVertices); //Pointer Vector
@ -268,22 +206,6 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
verGhostPtr[0] = 0; //The first value verGhostPtr[0] = 0; //The first value
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout); cout<<"\n("<<myRank<<")Ghost Vertex Pointer: "; fflush(stdout);
#endif
#ifdef TIME_TRACKER
double verGhostPtrInitialization = MPI_Wtime();
#endif
#ifdef OMP
}
#endif
/*
* OMP verGhostPtrInitialization
*
*/
#ifdef OMP
#pragma omp for nowait
#endif #endif
for ( i=0; i<numGhostVertices; i++ ) { //O(|Ghost Vertices|) for ( i=0; i<numGhostVertices; i++ ) { //O(|Ghost Vertices|)
verGhostPtr[i+1] = verGhostPtr[i] + Counter[i]; verGhostPtr[i+1] = verGhostPtr[i] + Counter[i];
@ -291,38 +213,10 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
cout<<verGhostPtr[i]<<"\t"; fflush(stdout); cout<<verGhostPtr[i]<<"\t"; fflush(stdout);
#endif #endif
} }
#ifdef TIME_TRACKER
verGhostPtrInitialization = MPI_Wtime() - verGhostPtrInitialization;
fprintf(stderr, "verGhostPtrInitialization time: %f\n", verGhostPtrInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
if ( numGhostVertices > 0 ) if ( numGhostVertices > 0 )
cout<<verGhostPtr[numGhostVertices]<<"\n"; cout<<verGhostPtr[numGhostVertices]<<"\n";
fflush(stdout); fflush(stdout);
#endif
/*
* OMP verGhostIndInitialization
*
* In this cycle the verGhostInd is initialized
* with the datas related to ghost edges.
* The check to see if a node is a ghost node is
* executed in paralle and when a ghost node
* is found a critical region is started.
*
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times.
*/
#ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime();
#endif
#ifdef OMP
#pragma omp for
#endif #endif
for ( v=0; v < NLVer; v++ ) { for ( v=0; v < NLVer; v++ ) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
@ -330,29 +224,13 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
for( k = adj1; k < adj2; k++ ) { for( k = adj1; k < adj2; k++ ) {
w = verLocInd[k]; //Get the adjacent vertex w = verLocInd[k]; //Get the adjacent vertex
if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost
#ifdef OMP
#pragma omp critical
{
#endif
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v+StartIndex; //Add the adjacency verGhostInd[insertMe] = v+StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
#ifdef OMP
}
#endif
} //End of if((w < StartIndex) || (w > EndIndex)) } //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } //End of for(k)
} //End of for (v) } //End of for (v)
tempCounter.clear(); //Do not need this any more tempCounter.clear(); //Do not need this any more
#ifdef OMP
} //end of parallel region
#endif
#ifdef TIME_TRACKER
verGhostIndInitialization = MPI_Wtime() - verGhostIndInitialization;
fprintf(stderr, "verGhostIndInitialization time: %f\n", verGhostIndInitialization);
#endif
#ifdef PRINT_DEBUG_INFO_ #ifdef PRINT_DEBUG_INFO_
cout<<"\n("<<myRank<<")Ghost Vertex Index: "; cout<<"\n("<<myRank<<")Ghost Vertex Index: ";
for ( v=0; v < numGhostEdges; v++ ) for ( v=0; v < numGhostEdges; v++ )

@ -2,6 +2,6 @@ make all
cd samples/advanced/pdegen cd samples/advanced/pdegen
make amg_d_pde3d make amg_d_pde3d
cd runs cd runs
mpirun -np 2 amg_d_pde3d amg_pde3d.inp mpirun -np 4 amg_d_pde3d amg_pde3d.inp

Loading…
Cancel
Save