closeness and harmonic works

main
Luca Lombardo 3 years ago
parent 6cd192bcaf
commit b53f8514b0

@ -5,7 +5,7 @@ import numpy as np
import os
import csv
MIN_MOVIES = 42 # Only keep relations for actors that have made more than this many movies
MIN_MOVIES = 72 # Only keep relations for actors that have made more than this many movies
#-----------------DOWNLOAD .GZ FILES FROM IMDB DATABASE-----------------#

@ -13,7 +13,7 @@
#include <fstream> // getline
#include <algorithm> // find
#include <math.h> // ceil
#include <sys/time.h> // per gettimeofday
#include <sys/time.h>
using namespace std;
@ -197,8 +197,11 @@ vector<pair<int, double>> closeness(const size_t k) {
the conservative strategy updateBoundsBFSCut(w) does not improve L, and it cuts the BFS as soon as it is sure that the farness of w is smaller than the k-th biggest farness found until now, that is, Farn[Top[k]]. If the BFS is cut, the function returns +, otherwise, at the end of the BFS we have computed the farness of v, and we can return it. The running time of this procedure is O(m) in the worst case, but
it can be much better in practice. It remains to define how the procedure can be sure that the farness of v is at least x: to this purpose, during the BFS, we update a lower bound on the farness of v. The idea behind this bound is that, if we have already visited all nodes up to distance d, we can upper bound the closeness centrality of v by setting distance d + 1 to a number of vertices equal to the number of edges leaving level d, and distance d + 2 to all the remaining vertices.
*/
// L = 0 for all vertices and is never update, so we do not need to define it. We will just loop over each vertex, in the order the map prefers.
// We do not need to define Q either, as we will loop over each vertex anyway, and the order does not matter.
vector<pair<int, double>> top_actors; // Each pair is (actor_index, farness).
top_actors.reserve(k+1); // We need exactly k items, no more and no less.
@ -258,19 +261,104 @@ vector<pair<int, double>> closeness(const size_t k) {
continue;
}
// BFS is over, we compute the farness
double farness = (A.size()-1) / pow((double)r-1, 2) * sum_distances;
if (isnan(farness)) // This happens when r = 1
continue;
double farness = numeric_limits<double>::infinity();
if (r > 1)
farness = (double)(A.size()-1) / pow((double)r-1, 2) * (double)sum_distances;
// Insert the actor in top_actors, before the first element with farness >= than our actor's (i.e. sorted insert)
const lock_guard<mutex> top_actors_lock(top_actors_mutex); // Acquire ownership of the mutex, wait if another thread already owns it. Release the mutex when destroyed.
auto idx = find_if(top_actors.begin(), top_actors.end(),
[&farness](const pair<int, double>& p) { return p.second >= farness; });
[&farness](const pair<int, double>& p) { return p.second > farness; });
if (top_actors.size() < k || idx != top_actors.end()) {
top_actors.insert(idx, make_pair(actor_id, farness));
if (top_actors.size() > k)
top_actors.pop_back();
}
cout << actor_id << " " << A[actor_id].name << " " << farness << endl;
cout << actor_id << " " << A[actor_id].name << "\n\tCC: " << 1.0/farness << endl;
// top_actors_lock gets destroyed after this line, releasing the mutex
}
}, i));
}
for (auto& thread : threads)
thread.join();
return top_actors;
}
vector<pair<int, double>> harmonic(const size_t k) { // NON RIESCO AD INVERTIRE L'ARGOMENTO DELLA SOMMA
vector<pair<int, double>> top_actors; // Each pair is (actor_index, harmonic centrality).
top_actors.reserve(k+1); // We need exactly k items, no more and no less.
vector<thread> threads;
mutex top_actors_mutex; // To prevent simultaneous accesses to top_actors
threads.reserve(N_THREADS);
for (int i = 0; i < N_THREADS; i++) {
threads.push_back(thread([&top_actors,&top_actors_mutex,&k](int start) {
vector<bool> enqueued(MAX_ACTOR_ID, false); // Vector to see which vertices with put in the queue during the BSF
// We loop over each vertex
for (int actor_id = start; actor_id <= MAX_ACTOR_ID; actor_id += N_THREADS) {
if (!A.count(actor_id)) // The actor must exist, otherwise A[actor_id] would attempt to write A, and this may produce a race condition if multiple threads do it at the same time
continue;
// if |Top| ≥ k and L[v] > Farn[Top[k]] then return Top; => We can not exploit the lower bound of our vertex to stop the loop, as we are not updating lower bounds L.
// We just compute the farness of our vertex using a BFS
queue<pair<int,int>> q; // FIFO of pairs (actor_index, distance from our vertex).
for (size_t i = 0; i < enqueued.size(); i++)
enqueued[i] = false;
int r = 0; // |R|, where R is the set of vertices reachable from our vertex
double sum_reverse_distances = 0; // Sum of the distances to other nodes
int prev_distance = 0; // Previous distance, to see when we get to a deeper level of the BFS
q.push(make_pair(actor_id, 0));
enqueued[actor_id] = true;
bool skip = false;
while (!q.empty()) {
auto [bfs_actor_id, distance] = q.front();
q.pop();
// Try to set an upper bound on the centrality
if (distance > prev_distance) {
const lock_guard<mutex> top_actors_lock(top_actors_mutex); // Acquire ownership of the mutex, wait if another thread already owns it. Release the mutex when destroyed.
if (top_actors.size() == k) { // We are in the first item of the next exploration level
double harmonic_centrality_upper_bound = sum_reverse_distances + q.size() / (double)distance + (A.size() - r - q.size()) / (double)(distance + 1);
if (top_actors[k-1].second >= harmonic_centrality_upper_bound) { // Stop the BFS
skip = true;
break; // top_actors_lock gets destroyed also if we do this break
}
}
// top_actors_lock gets destroyed after this line, releasing the mutex
}
// We compute the farness of our vertex actor_id
r++;
if (distance != 0)
sum_reverse_distances += 1.0/distance;
// We loop on the adjacencies of bfs_actor_id and add them to the queue
for (int bfs_film_id : A[bfs_actor_id].film_indices) {
for (int adj_actor_id : F[bfs_film_id].actor_indicies) {
if (!enqueued[adj_actor_id]) {
// The adjacent vertices have distance +1 w.r.t. the current vertex
q.push(make_pair(adj_actor_id, distance+1));
enqueued[adj_actor_id] = true;
}
}
}
}
if (skip) {
cout << actor_id << " " << A[actor_id].name << " SKIPPED" << endl;
continue;
}
// BFS is over, we compute the farness
double harmonic_centrality = sum_reverse_distances;
if (!isfinite(harmonic_centrality))
continue;
// Insert the actor in top_actors, before the first element with farness >= than our actor's (i.e. sorted insert)
const lock_guard<mutex> top_actors_lock(top_actors_mutex); // Acquire ownership of the mutex, wait if another thread already owns it. Release the mutex when destroyed.
auto idx = find_if(top_actors.begin(), top_actors.end(),
[&harmonic_centrality](const pair<int, double>& p) { return p.second < harmonic_centrality; });
if (top_actors.size() < k || idx != top_actors.end()) {
top_actors.insert(idx, make_pair(actor_id, harmonic_centrality));
if (top_actors.size() > k)
top_actors.pop_back();
}
cout << actor_id << " " << A[actor_id].name << "\n\tHC: " << harmonic_centrality << endl;
// top_actors_lock gets destroyed after this line, releasing the mutex
}
}, i));
@ -328,9 +416,22 @@ int main()
// ------------------------------------------------------------- //
cout << "Grafo, grafo delle mie brame... chi è il più centrale del reame?" << endl;
for (const auto& [actor_id, farness] : closeness(100)) {
cout << A[actor_id].name << " " << farness << endl;
cout << "Grafo, grafo delle mie brame... chi è il più centrale del reame?\n" <<endl;
const size_t k = 10;
auto top_by_closeness = closeness(k);
auto top_by_harmonic = harmonic(k);
printf("\n%36s %36s\n", "CLOSENESS CENTRALITY", "HARMONIC CENTRALITY");
for (size_t i = 0; i < k; i++) {
const auto& [closeness_actor_id, farness] = top_by_closeness[i];
const auto& [centrality_actor_id, centrality] = top_by_harmonic[i];
printf("%25s : %8lg %25s : %8lg\n",
A[closeness_actor_id].name.c_str(), 1.0/farness,
A[centrality_actor_id].name.c_str(), centrality);
}
// for (const auto& [actor_id, farness] : top_by_closeness) {
// cout << A[actor_id].name << "\n\tCloseness Centrality: " << 1.0/farness << endl;
// }
// for (const auto& [actor_id, centrality] : top_by_harmonic) {
// cout << A[actor_id].name << "\n\tHarmonic Centrality: " << centrality << endl;
// }
}

Loading…
Cancel
Save