verGhostIndInitialization and Ghost2LocalInitialization cycles parallelization

omp-walther
StefanoPetrilli 3 years ago
parent 1760afbe97
commit 7d40fde21d

@ -4,7 +4,7 @@ INCDIR=../../../include
MODDIR=../../../modules MODDIR=../../../modules
HERE=../.. HERE=../..
FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) FINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(PSBLAS_INCLUDES) -fopenmp
CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/. CXXINCLUDES=$(FMFLAG)$(HERE) $(FMFLAG)$(INCDIR) $(FMFLAG)/.
#CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm #CINCLUDES= -I${SUPERLU_INCDIR} -I${HSL_INCDIR} -I${SPRAL_INCDIR} -I/home/users/pasqua/Ambra/BootCMatch/include -lBCM -L/home/users/pasqua/Ambra/BootCMatch/lib -lm

@ -60,13 +60,12 @@ void dMatchBoxPC(MilanLongInt NLVer, MilanLongInt NLEdge,
MilanLongInt* ph1_card, MilanLongInt* ph2_card ) { MilanLongInt* ph1_card, MilanLongInt* ph2_card ) {
#if !defined(SERIAL_MPI) #if !defined(SERIAL_MPI)
MPI_Comm C_comm=MPI_Comm_f2c(icomm); MPI_Comm C_comm=MPI_Comm_f2c(icomm);
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n", fprintf(stderr,"MatchBoxPC: rank %d nlver %ld nledge %ld [ %ld %ld ]\n",
myRank,NLVer, NLEdge,verDistance[0],verDistance[1]); myRank,NLVer, NLEdge,verDistance[0],verDistance[1]);
#endif #endif
#ifdef #IE
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double tmr = MPI_Wtime(); double tmr = MPI_Wtime();
#endif #endif

@ -1,4 +1,6 @@
#include "MatchBoxPC.h" #include "MatchBoxPC.h"
#include <omp.h>
#include <stdio.h>
// *********************************************************************** // ***********************************************************************
// //
// MatchboxP: A C++ library for approximate weighted matching // MatchboxP: A C++ library for approximate weighted matching
@ -167,25 +169,40 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout); if (myRank == 0) cout<<"\n("<<myRank<<")About to compute Ghost Vertices..."; fflush(stdout);
#endif #endif
/*
* OMP Ghost2LocalInitialization
* The cycle analyzes all the edges and when finds a ghost edge
* puts it in the Ghost2LocalMap.
* A critical region is needed when inserting data in the map.
*
* Despite the critical region it is still productive to
* parallelize this for because the critical region is exeuted
* only when a ghost edge is found and ghost edges are a minority.
*/
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double Ghost2LocalInitialization = MPI_Wtime(); double Ghost2LocalInitialization = MPI_Wtime();
#endif #endif
#pragma omp parallel for private(insertMe) firstprivate(StartIndex, EndIndex) default(shared)
for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice for ( i=0; i<NLEdge; i++ ) { //O(m) - Each edge stored twice
insertMe = verLocInd[i]; insertMe = verLocInd[i];
//cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl; //cout<<"InsertMe on Process "<<myRank<<" is: "<<insertMe<<endl;
if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) { //Find a ghost
#pragma omp critical
{
numGhostEdges++;
storedAlready = Ghost2LocalMap.find(insertMe); storedAlready = Ghost2LocalMap.find(insertMe);
if (storedAlready != Ghost2LocalMap.end()) { //Has already been added if (storedAlready != Ghost2LocalMap.end()) { //Has already been added
//cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl; //cout<<"Process "<<myRank<<" found: "<<storedAlready->first<<" - "<<storedAlready->second<<endl;
Counter[storedAlready->second]++; //Increment the counter Counter[storedAlready->second]++; //Increment the counter
numGhostEdges++;
} else { //Insert an entry for the ghost: } else { //Insert an entry for the ghost:
//cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl; //cout<<"Process "<<myRank<<" * New insert: Key="<<insertMe<< " : Value="<<numGhostVertices<<endl;
Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry Ghost2LocalMap[insertMe] = numGhostVertices; //Add a map entry
Counter.push_back(1); //Initialize the counter Counter.push_back(1); //Initialize the counter
numGhostEdges++;
numGhostVertices++; //Increment the number of ghost vertices numGhostVertices++; //Increment the number of ghost vertices
} //End of else() } //End of else()
}
} //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) ) } //End of if ( (insertMe < StartIndex) || (insertMe > EndIndex) )
} //End of for(ghost vertices) } //End of for(ghost vertices)
@ -243,19 +260,37 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateC(
fflush(stdout); fflush(stdout);
#endif #endif
/*
* OMP verGhostIndInitialization
*
* In this cycle the verGhostInd is initialized
* with the datas related to ghost edges.
* The check to see if a node is a ghost node is
* executed in paralle and when a ghost node
* is found a critical region is started.
*
* Despite the critical region it's still useful to
* parallelize the for cause the ghost nodes
* are a minority hence the critical region is executed
* few times.
*/
#ifdef TIME_TRACKER #ifdef TIME_TRACKER
double verGhostIndInitialization = MPI_Wtime(); double verGhostIndInitialization = MPI_Wtime();
#endif #endif
#pragma omp parallel for private(insertMe, k, adj1, adj2) firstprivate(StartIndex, EndIndex) default(shared)
for ( v=0; v < NLVer; v++ ) { for ( v=0; v < NLVer; v++ ) {
adj1 = verLocPtr[v]; //Vertex Pointer adj1 = verLocPtr[v]; //Vertex Pointer
adj2 = verLocPtr[v+1]; adj2 = verLocPtr[v+1];
for( k = adj1; k < adj2; k++ ) { for( k = adj1; k < adj2; k++ ) {
w = verLocInd[k]; //Get the adjacent vertex w = verLocInd[k]; //Get the adjacent vertex
if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost if ( (w < StartIndex) || (w > EndIndex) ) { //Find a ghost
#pragma omp critical
{
insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert insertMe = verGhostPtr[Ghost2LocalMap[w]] + tempCounter[Ghost2LocalMap[w]]; //Where to insert
verGhostInd[insertMe] = v + StartIndex; //Add the adjacency verGhostInd[insertMe] = v + StartIndex; //Add the adjacency
tempCounter[Ghost2LocalMap[w]]++; //Increment the counter tempCounter[Ghost2LocalMap[w]]++; //Increment the counter
}
} //End of if((w < StartIndex) || (w > EndIndex)) } //End of if((w < StartIndex) || (w > EndIndex))
} //End of for(k) } //End of for(k)
} //End of for (v) } //End of for (v)

@ -0,0 +1,7 @@
make all
cd samples/advanced/pdegen
make amg_d_pde3d
cd runs
mpirun -np 8 amg_d_pde3d amg_pde3d.inp

@ -3,7 +3,7 @@ AMGINCDIR=$(AMGDIR)/include
include $(AMGINCDIR)/Make.inc.amg4psblas include $(AMGINCDIR)/Make.inc.amg4psblas
AMGMODDIR=$(AMGDIR)/modules AMGMODDIR=$(AMGDIR)/modules
AMGLIBDIR=$(AMGDIR)/lib AMGLIBDIR=$(AMGDIR)/lib
AMG_LIBS=-L$(AMGLIBDIR) -lpsb_krylov -lamg_prec -lpsb_prec AMG_LIBS=-L$(AMGLIBDIR) -lpsb_krylov -lamg_prec -lpsb_prec -llapack -lblas
FINCLUDES=$(FMFLAG). $(FMFLAG)$(AMGMODDIR) $(FMFLAG)$(AMGINCDIR) $(PSBLAS_INCLUDES) $(FIFLAG). FINCLUDES=$(FMFLAG). $(FMFLAG)$(AMGMODDIR) $(FMFLAG)$(AMGINCDIR) $(PSBLAS_INCLUDES) $(FIFLAG).
LINKOPT= LINKOPT=

Loading…
Cancel
Save