@ -1,4 +1,4 @@
// g++ -Wall -pedantic -std=c++17 -pthread kenobi.cpp -o kenobi
// g++ -Wall -pedantic -std=c++17 kenobi.cpp -o kenobi
# include <iostream>
# include <iostream>
# include <iomanip>
# include <iomanip>
# include <vector>
# include <vector>
@ -8,7 +8,6 @@
# include <list>
# include <list>
# include <stack>
# include <stack>
# include <set>
# include <set>
# include <thread>
# include <fstream> // getline
# include <fstream> // getline
# include <algorithm> // find
# include <algorithm> // find
# include <math.h> // ceil
# include <math.h> // ceil
@ -131,3 +130,162 @@ int FindActor(string name)
return id ;
return id ;
return - 1 ;
return - 1 ;
}
}
vector < pair < int , double > > closeness ( const size_t k ) {
/* **************************** ALGORITHM ****************************
Input : A graph G = ( V , E )
Output : Top k nodes with highest closeness and their closeness values c ( v )
global L , Q ← computeBounds ( G ) ;
global Top ← [ ] ;
global Farn ;
for v ∈ V do Farn [ v ] = + ∞ ;
while Q is not empty do
v ← Q . extractMin ( ) ;
if | Top | ≥ k and L [ v ] > Farn [ Top [ k ] ] then return Top ;
Farn [ v ] ← updateBounds ( v ) ; // This function might also modify L
add v to Top , and sort Top according to Farn ;
update Q according to the new bounds ;
- We use a list TOP containing all “ analysed ” vertices v1 , . . . , vl in increasing order of farness
- A priority queue Q containing all vertices “ not analysed , yet ” , in increasing order of lower bound L ( this way , the head of Q always has the smallest value of L among all vertices in Q ) .
- At the beginning , using the function computeBounds ( ) , we compute a first bound L ( v ) for each vertex v , and we fill the queue Q according to this bound .
- Then , at each step , we extract the first element v of Q : if L ( v ) is smaller than the k - th biggest farness computed until now ( that is , the farness of the k - th vertex in variable Top ) , we can safely stop , because for each x ∈ Q , f ( x ) ≤ L ( x ) ≤ L ( v ) < f ( Top [ k ] ) , and x is not in the top k .
- Otherwise , we run the function updateBounds ( v ) , which performs a BFS from v , returns the farness of v , and improves the bounds L of all other vertices . Finally , we insert v into Top in the right position , and we update Q if the lower bounds have changed .
The crucial point of the algorithm is the definition of the lower bounds , that is , the
definition of the functions computeBounds and updateBounds .
Now let ' s define a conservative way ( due to the fact that I only have a laptop and 16 GB of RAM ) to implement this two functions
- computeBounds :
The conservative strategy computeBoundsDeg needs time O ( n ) : it simply sets L ( v ) = 0 for each v , and it fills Q by inserting nodes in decreasing order of degree ( the idea is that vertices with high degree have small farness , and they should be analysed as early as possible , so that the values in TOP are correct as soon as possible ) . Note that the vertices can be sorted in time O ( n ) using counting sort .
- updateBounds ( w ) :
the conservative strategy updateBoundsBFSCut ( w ) does not improve L , and it cuts the BFS as soon as it is sure that the farness of w is smaller than the k - th biggest farness found until now , that is , Farn [ Top [ k ] ] . If the BFS is cut , the function returns + ∞ , otherwise , at the end of the BFS we have computed the farness of v , and we can return it . The running time of this procedure is O ( m ) in the worst case , but
it can be much better in practice . It remains to define how the procedure can be sure that the farness of v is at least x : to this purpose , during the BFS , we update a lower bound on the farness of v . The idea behind this bound is that , if we have already visited all nodes up to distance d , we can upper bound the closeness centrality of v by setting distance d + 1 to a number of vertices equal to the number of edges “ leaving ” level d , and distance d + 2 to all the remaining vertices .
*/
// L = 0 for all vertices and is never update, so we do not need to define it. We will just loop over each vertex, in the order the map prefers.
// We do not need to define Q either, as we will loop over each vertex anyway, and the order does not matter.
vector < pair < int , double > > top_actors ; // Each pair is (actor_index, farness).
top_actors . reserve ( k + 1 ) ; // We need exactly k items, no more and no less.
vector < bool > enqueued ( MAX_ACTOR_ID , false ) ; // Vector to see which vertices with put in the queue during the BSF
// We loop over each vertex
for ( const auto & [ actor_id , actor ] : A ) {
// if |Top| ≥ k and L[v] > Farn[Top[k]] then return Top; => We can not exploit the lower bound of our vertex to stop the loop, as we are not updating lower bounds L.
// We just compute the farness of our vertex using a BFS
queue < pair < int , int > > q ; // FIFO of pairs (actor_index, distance from our vertex).
for ( size_t i = 0 ; i < enqueued . size ( ) ; i + + )
enqueued [ i ] = false ;
int r = 0 ; // |R|, where R is the set of vertices reachable from our vertex
long long int sum_distances = 0 ; // Sum of the distances to other nodes
int prev_distance = 0 ; // Previous distance, to see when we get to a deeper level of the BFS
q . push ( make_pair ( actor_id , 0 ) ) ;
enqueued [ actor_id ] = true ;
bool skip = false ;
while ( ! q . empty ( ) ) {
auto [ bfs_actor_id , distance ] = q . front ( ) ;
q . pop ( ) ;
// Try to set a lower bound on the farness
if ( top_actors . size ( ) = = k & & distance > prev_distance ) { // We are in the first item of the next exploration level
// We assume r = A.size(), the maximum possible value
double farness_lower_bound = 1.0 / ( ( double ) A . size ( ) - 1 ) * ( sum_distances + q . size ( ) * distance ) ;
if ( top_actors [ k - 1 ] . second < = farness_lower_bound ) { // Stop the BFS
skip = true ;
break ;
}
}
// We compute the farness of our vertex actor_id
r + + ;
sum_distances + = distance ;
// We loop on the adjacencies of bfs_actor_id and add them to the queue
for ( int bfs_film_id : A [ bfs_actor_id ] . film_indices ) {
for ( int adj_actor_id : F [ bfs_film_id ] . actor_indicies ) {
if ( ! enqueued [ adj_actor_id ] ) {
// The adjacent vertices have distance +1 w.r.t. the current vertex
q . push ( make_pair ( adj_actor_id , distance + 1 ) ) ;
enqueued [ adj_actor_id ] = true ;
}
}
}
}
if ( skip ) {
cout < < actor_id < < " " < < A [ actor_id ] . name < < " SKIPPED " < < endl ;
continue ;
}
// BFS is over, we compute the farness
double farness = ( A . size ( ) - 1 ) / pow ( ( double ) r - 1 , 2 ) * sum_distances ;
if ( isnan ( farness ) ) // This happens when r = 1
continue ;
// Insert the actor in top_actors, before the first element with farness >= than our actor's (i.e. sorted insert)
auto idx = find_if ( top_actors . begin ( ) , top_actors . end ( ) ,
[ & farness ] ( const pair < int , double > & p ) { return p . second > = farness ; } ) ;
if ( top_actors . size ( ) < k | | idx ! = top_actors . end ( ) ) {
top_actors . insert ( idx , make_pair ( actor_id , farness ) ) ;
if ( top_actors . size ( ) > k )
top_actors . pop_back ( ) ;
}
cout < < actor_id < < " " < < A [ actor_id ] . name < < " " < < farness < < endl ;
}
return top_actors ;
}
int main ( )
{
srand ( time ( NULL ) ) ;
// # info.txt valore massimo di un identificativo di un attore dentro Relazioni.txt, non so scriverlo in python quindi eccolo in bash
// echo "$(cut -f2 -d' ' data/Relazioni.txt | sort --numeric-sort | tail -1)" > data/info.txt
DataRead ( ) ;
BuildGraph ( ) ;
cout < < " Numero film: " < < F . size ( ) < < endl ;
cout < < " Numero attori: " < < A . size ( ) < < endl ;
PrintGraph ( ) ;
// ------------------------------------------------------------- //
// // FUNZIONE CERCA FILMclos
// cout << "Cerca film: ";
// string titolo;
// getline(cin, titolo);
// int id_film = FindFilm(titolo);
// cout << id_film << "(" << F[id_film].name << ")";
// if (!F[id_film].actor_indicies.empty()) {
// cout << ":";
// for (int id_attore : F[id_film].actor_indicies)
// cout << " " << id_attore << "(" << A[id_attore].name << ")";
// }clos
// cout << endl;
// // FUNZIONE CERCA ATTORE
// cout << "Cerca attore: ";
// string attore;
// getline(cin, attore);
// int id_attore = FindActor(attore);
// cout << id_attore << "(" << A[id_attore].name << ")";
// if (!A[id_attore].film_indices.empty()) {
// cout << ":";
// for (int id_attore : A[id_attore].film_indices)
// cout << " " << id_attore << "(" << A[id_attore].name << ")";
// }
// cout << endl;
// ------------------------------------------------------------- //
cout < < " Grafo, grafo delle mie brame... chi è il più centrale del reame? " < < endl ;
for ( const auto & [ actor_id , farness ] : closeness ( 3 ) ) {
cout < < A [ actor_id ] . name < < " " < < farness < < endl ;
}
}