|
|
|
@ -424,22 +424,15 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
//Compute the Initial Matching Set:
|
|
|
|
|
|
|
|
|
|
S = numGhostVertices; //Initialize S with number of Ghost Vertices
|
|
|
|
|
} // end of single region
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* OMP PARALLEL_COMPUTE_CANDIDATE_MATE_B
|
|
|
|
|
* The next portion of code has been splitted
|
|
|
|
|
* to make it 100% parallelized
|
|
|
|
|
*
|
|
|
|
|
* TODO: I think it diminish the cache update, does it?
|
|
|
|
|
*
|
|
|
|
|
* TODO: would it make any sense to parallelize also the
|
|
|
|
|
* inner for?
|
|
|
|
|
* It is actually not possible to parallelize this cycle
|
|
|
|
|
* as it is.
|
|
|
|
|
*
|
|
|
|
|
* TODO: we have a false sharing on candidateMate
|
|
|
|
|
* TODO think how it could be parallelizable
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma omp for
|
|
|
|
|
for ( v=0; v < NLVer; v++ ) {
|
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
|
|
|
cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout);
|
|
|
|
@ -468,30 +461,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
candidateMate[v] = w;
|
|
|
|
|
//End: PARALLEL_COMPUTE_CANDIDATE_MATE_B(v)
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
TODO this cycle has a lot of margin of improvement!!!!
|
|
|
|
|
This current version introduce some errors.
|
|
|
|
|
1 - ollback to the previous verison and check if it is
|
|
|
|
|
100% stable
|
|
|
|
|
2 - if the previous verison was stable all right, if not
|
|
|
|
|
that's a big deal
|
|
|
|
|
3 - reimplement step by step to check from where the instability
|
|
|
|
|
comes from
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#pragma omp for reduction(+: msgInd, NumMessagesBundled, myCard, PCounter[:numProcs])
|
|
|
|
|
for ( v=0; v < NLVer; v++ ) {
|
|
|
|
|
|
|
|
|
|
//Start: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
|
|
|
|
|
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
|
|
|
cout<<"\n("<<myRank<<")Processing: "<<v+StartIndex<<endl; fflush(stdout);
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
w = candidateMate[v];
|
|
|
|
|
|
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
|
|
|
cout<<"\n("<<myRank<<")"<<v+StartIndex<<" Points to: "<<w; fflush(stdout);
|
|
|
|
|
#endif
|
|
|
|
@ -514,9 +489,8 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
msgInd++;
|
|
|
|
|
NumMessagesBundled++;
|
|
|
|
|
ghostOwner = findOwnerOfGhost(w, verDistance, myRank, numProcs);
|
|
|
|
|
PCounter[ghostOwner]++; //TODO maybe reduction?
|
|
|
|
|
#pragma omp critical
|
|
|
|
|
{
|
|
|
|
|
PCounter[ghostOwner]++;
|
|
|
|
|
|
|
|
|
|
QLocalVtx.push_back(v + StartIndex);
|
|
|
|
|
QGhostVtx.push_back(w);
|
|
|
|
|
QMsgType.push_back(REQUEST);
|
|
|
|
@ -550,13 +524,11 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
} //End of if Counter[w] > 0
|
|
|
|
|
//End: PARALLEL_PROCESS_CROSS_EDGE_B(v)
|
|
|
|
|
} //End of if CandidateMate[w] = v
|
|
|
|
|
} // end of critical region
|
|
|
|
|
} //End of if a Ghost Vertex
|
|
|
|
|
else { // w is a local vertex
|
|
|
|
|
|
|
|
|
|
if (candidateMate[w - StartIndex] == (v + StartIndex)) {
|
|
|
|
|
#pragma omp critical
|
|
|
|
|
{
|
|
|
|
|
|
|
|
|
|
Mate[v] = w; //v is local
|
|
|
|
|
Mate[w - StartIndex] = v + StartIndex; //w is local
|
|
|
|
|
//Q.push_back(u);
|
|
|
|
@ -566,7 +538,7 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
|
|
|
cout<<"\n("<<myRank<<")MATCH: ("<<v+StartIndex<<","<<w<<") "; fflush(stdout);
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
} //End of if ( candidateMate[w-StartIndex] == (v+StartIndex) )
|
|
|
|
|
} //End of Else
|
|
|
|
|
} //End of if(w >=0)
|
|
|
|
@ -607,12 +579,12 @@ void dalgoDistEdgeApproxDomEdgesLinearSearchMesgBndlSmallMateCMP(
|
|
|
|
|
} //End of for loop
|
|
|
|
|
} // End of Else: w == -1
|
|
|
|
|
//End: PARALLEL_PROCESS_EXPOSED_VERTEX_B(v)
|
|
|
|
|
//} // end of critical
|
|
|
|
|
} //End of for ( v=0; v < NLVer; v++ )
|
|
|
|
|
|
|
|
|
|
} // end of single region
|
|
|
|
|
} // end of parallel region
|
|
|
|
|
|
|
|
|
|
tempCounter.clear(); //Do not need this any more
|
|
|
|
|
//} // end of parallel region
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef PRINT_DEBUG_INFO_
|
|
|
|
|