#include #include #include #include #include #include #include #include #include #include class Graph { public: // Default constructor Graph() {} // Add a node to the graph void add_node(int node) { nodes_.insert(node); } // Add an edge to the graph void add_edge(int u, int v) { adjacency_list_[u].insert(v); adjacency_list_[v].insert(u); } // Remove a node from the graph void remove_node(int node) { nodes_.erase(node); adjacency_list_.erase(node); for (auto& pair : adjacency_list_) { pair.second.erase(node); } } // Remove an edge from the graph void remove_edge(int u, int v) { adjacency_list_[u].erase(v); adjacency_list_[v].erase(u); } // Return the number of nodes in the graph. Use unsigned int to avoid compiler warning unsigned int num_nodes() const { return nodes_.size(); } // Return the number of edges in the graph unsigned int num_edges() const { int num_edges = 0; for (const auto& pair : adjacency_list_) { num_edges += pair.second.size(); } return num_edges / 2; } // Return the degree of a node int degree(int node) const { return adjacency_list_.at(node).size(); } // Return the neighbors of a node std::vector neighbors(int node) const { return std::vector(adjacency_list_.at(node).begin(), adjacency_list_.at(node).end()); } // Check if a node is in the graph bool has_node(int node) const { return nodes_.count(node) > 0; } // Check if an edge exists in the graph bool has_edge(int u, int v) const { if (!has_node(u) || !has_node(v)) { return false; } return adjacency_list_.at(u).count(v) > 0; } // Check if the graph is connected bool is_connected() const { if (num_nodes() == 0) { return true; } std::unordered_set visited; std::queue queue; queue.push(*nodes_.begin()); while (!queue.empty()) { int node = queue.front(); queue.pop(); if (visited.count(node) == 0) { visited.insert(node); for (int neighbor : neighbors(node)) { queue.push(neighbor); } } } // convert num_nodes() to unsigned int to avoid compiler warning return visited.size() == num_nodes(); } // Return the set of nodes in the graph const std::unordered_set& nodes() const { return nodes_; } // Return the set of edges in the graph std::vector> edges() const { std::vector> edges; for (const auto& pair : adjacency_list_) { int u = pair.first; for (int v : pair.second) { if (u < v) { edges.push_back({u, v}); } } } return edges; } // Return the adjacency list representation of the graph const std::unordered_map>& adjacency_list() const { return adjacency_list_; } // Info function that prints number of nodes and edges void info() const { std::cout << "Graph with " << num_nodes() << " nodes and " << num_edges() << " edges" << std::endl; } // Density function that returns the density of the graph double density() const { return 2.0 * num_edges() / (num_nodes() * (num_nodes() - 1)); } private: // Set of nodes in the graph std::unordered_set nodes_; // Adjacency list representation of the graph std::unordered_map> adjacency_list_; }; // Read graph from edge list file Graph read_graph(const std::string& filename) { Graph G; std::ifstream file(filename); std::string line; while (std::getline(file, line)) { std::stringstream ss(line); int u, v; ss >> u >> v; G.add_node(u); G.add_node(v); G.add_edge(u, v); } G.info(); return G; } // Return a bool if there is a path between two nodes bool has_path(const Graph& G, int u, int v) { if (!G.has_node(u) || !G.has_node(v)) { return false; } std::unordered_set visited; std::queue queue; queue.push(u); while (!queue.empty()) { int node = queue.front(); queue.pop(); if (visited.count(node) == 0) { visited.insert(node); for (int neighbor : G.neighbors(node)) { if (neighbor == v) { return true; } queue.push(neighbor); } } } return false; } // Check if the graph is connected. If not, return a list of connected components std::vector> connected_components(const Graph& G) { std::vector> components; std::unordered_set visited; for (int u : G.nodes()) { if (visited.count(u) == 0) { std::vector component; std::queue q; q.push(u); visited.insert(u); while (!q.empty()) { int v = q.front(); q.pop(); component.push_back(v); for (int w : G.neighbors(v)) { if (visited.count(w) == 0) { visited.insert(w); q.push(w); } } } components.push_back(component); } } return components; } // Return the largest connected component of a graph, use the connected_components function Graph largest_component(const Graph& G) { std::vector> components = connected_components(G); std::vector largest_component; unsigned int largest_size = 0; for (const std::vector& component : components) { if (component.size() > largest_size) { largest_size = component.size(); largest_component = component; } } Graph H; for (int u : largest_component) { for (int v : G.neighbors(u)) { if (std::find(largest_component.begin(), largest_component.end(), v) != largest_component.end()) { H.add_edge(u, v); } } } return H; } // Randomly rewire an edge with probability `p` void rewire(Graph& G, int u, int v, double p) { std::random_device rd; std::mt19937 gen(rd()); std::uniform_real_distribution<> dis(0, 1); if (dis(gen) < p) { // Choose a new node randomly std::uniform_int_distribution<> node_dis(0, G.nodes().size() - 1); int w = *std::next(G.nodes().begin(), node_dis(gen)); // Remove the edge (u, v) and add the edge (u, w) G.add_edge(u, w); G.add_edge(v, w); } } // Compute a random graph by swapping edges of a given graph. Graph random_reference(const Graph& G, int k) { Graph H; // Add the nodes to the new graph for (int u : G.nodes()) { H.add_node(u); } // Choose the probability of rewiring an edge double p = k / (G.nodes().size() - 1.0); // Add edges to the new graph for (int u : G.nodes()) { for (int v : G.neighbors(u)) { if (u < v) { H.add_edge(u, v); rewire(H, u, v, p); } } } return H; } // Latticize the given graph by swapping edges. Graph lattice_reference(const Graph& G, int n) { Graph H; // Add the nodes to the new graph for (int u : G.nodes()) { H.add_node(u); } // Add edges to the new graph for (int u : G.nodes()) { for (int v : G.neighbors(u)) { if (u < v && has_path(G, u, v)) { // Check if u and v are neighbors in the lattice int u_x = u / n; int u_y = u % n; int v_x = v / n; int v_y = v % n; if ((u_x == v_x && std::abs(u_y - v_y) == 1) || (u_y == v_y && std::abs(u_x - v_x) == 1)) { H.add_edge(u, v); } } } } return H; } std::vector cumulative_distribution(const Graph& G) { std::vector cdf; int sum = 0; for (int u : G.nodes()) { sum += G.degree(u); cdf.push_back(sum); } return cdf; } // Calculate the D matrix for the given number of nodes. std::vector> D_matrix(int n) { std::vector> D(n, std::vector(n)); std::vector un(n - 1), um(n - 1); std::iota(un.begin(), un.end(), 1); std::iota(um.rbegin(), um.rend(), 1); std::vector u(n); u[0] = 0; for (int i = 1; i < n; i++) { u[i] = (un[i - 1] < um[i - 1]) ? un[i - 1] : um[i - 1]; } for (int v = 0; v < std::ceil(n / 2.0); v++) { std::vector d(u.begin() + v + 1, u.end()); d.insert(d.end(), u.begin(), u.begin() + v + 1); D[n - v - 1] = d; D[v] = std::vector(d.rbegin(), d.rend()); } return D; } // Choose a value from the given cumulative distribution. int discrete_sequence(int n, const std::vector& cdf, std::mt19937& rng) { std::uniform_int_distribution dist(0, cdf.back()); int value = dist(rng); return std::lower_bound(cdf.begin(), cdf.end(), value) - cdf.begin(); } int random_choice(const std::vector& cdf, std::mt19937& rng) { return discrete_sequence(1, cdf, rng); } // takes as input a graph, an int, an array of distance to the diagonal matrix (default None), a bool for connected (default True) Graph lattice_reference2(const Graph& G, int niter, std::vector> distance_to_diagonal = {}, bool connected = true) { Graph H; // Add the nodes to the new graph for (int u : G.nodes()) { H.add_node(u); } // if there are less then 4 nodes, return an error if (G.nodes().size() < 4) { std::cout << "Error: Graph must have at least 4 nodes" << std::endl; return H; } // Calculate the cumulative distribution of node degree std::vector cdf = cumulative_distribution(G); // Calculate the D matrix std::vector> D = D_matrix(G.num_nodes()); // niter = niter * nedges niter = niter * G.num_edges(); // # maximal number of rewiring attempts per 'niter' int max_attempts = G.num_nodes() * G.num_edges() / (G.num_nodes() * (G.num_nodes() - 1) / 2); // For loop in range niter for (int i = 0; i < niter; i++) { int n = 0; while (n < max_attempts) { // pick two random edges without creating edge list // choose source node indices from discrete distribution std::mt19937 rng; rng.seed(std::random_device{}()); int u = random_choice(cdf, rng); int v = random_choice(cdf, rng); // choose target node indices from discrete distribution int w = random_choice(cdf, rng); int x = random_choice(cdf, rng); // check if the edges are distinct if (u == v || u == w || u == x || v == w || v == x || w == x) { n++; continue; } // check if the edges are already present if (H.has_edge(u, v) || H.has_edge(u, w) || H.has_edge(u, x) || H.has_edge(v, w) || H.has_edge(v, x) || H.has_edge(w, x)) { n++; continue; } // check if the edges are neighbors if (G.has_edge(u, v) || G.has_edge(u, w) || G.has_edge(u, x) || G.has_edge(v, w) || G.has_edge(v, x) || G.has_edge(w, x)) { n++; continue; } // check if the edges are in the same distance to the diagonal. Do not create parallel edges if (distance_to_diagonal.size() > 0) { int u_x = u / n; int u_y = u % n; int v_x = v / n; int v_y = v % n; int w_x = w / n; int w_y = w % n; int x_x = x / n; int x_y = x % n; int d_uv = distance_to_diagonal[u_x][u_y] + distance_to_diagonal[v_x][v_y]; int d_ux = distance_to_diagonal[u_x][u_y] + distance_to_diagonal[x_x][x_y]; int d_uw = distance_to_diagonal[u_x][u_y] + distance_to_diagonal[w_x][w_y]; int d_vx = distance_to_diagonal[v_x][v_y] + distance_to_diagonal[x_x][x_y]; int d_vw = distance_to_diagonal[v_x][v_y] + distance_to_diagonal[w_x][w_y]; int d_wx = distance_to_diagonal[w_x][w_y] + distance_to_diagonal[x_x][x_y]; if (d_uv == d_ux || d_uv == d_uw || d_uv == d_vx || d_uv == d_vw || d_uv == d_wx) { n++; continue; } } // check if the edges are connected if (connected) { if (has_path(H, u, v) || has_path(H, u, w) || has_path(H, u, x) || has_path(H, v, w) || has_path(H, v, x) || has_path(H, w, x)) { n++; continue; } } // add the edges to the new graph H.add_edge(u, v); H.add_edge(u, w); H.add_edge(u, x); H.add_edge(v, w); H.add_edge(v, x); H.add_edge(w, x); break; } } return H; } // Latticize the given graph by swapping edges and return the modified graph. Graph latticize(const Graph& G, int niter) { Graph H = G; // Create a random number generator std::mt19937 rng; rng.seed(std::random_device{}()); std::vector degrees; for (int u : H.nodes()) { degrees.push_back(H.degree(u)); } // Create a cumulative distribution of the node degrees std::vector keys; for (int u : H.nodes()) { keys.push_back(u); degrees.push_back(H.degree(u)); } std::vector cdf = cumulative_distribution(H); for (int i = 0; i < niter; i++) { // Choose two random nodes int u = keys[random_choice(cdf, rng)]; int v = keys[random_choice(cdf, rng)]; // Choose two random neighbors of the nodes std::vector neighbors_u = H.neighbors(u); int w = neighbors_u[rng() % neighbors_u.size()]; std::vector neighbors_v = H.neighbors(v); int x = neighbors_v[rng() % neighbors_v.size()]; // Swap the edges H.add_edge(u, x); H.add_edge(v, w); H.remove_edge(u, w); H.remove_edge(v, x); } // Return the modified graph return H; } // Calculate the clustering coefficient of a node double clustering_coefficient(const Graph& G, int u) { std::unordered_set neighbors; for (int v : G.neighbors(u)) { neighbors.insert(v); } int num_neighbors = neighbors.size(); if (num_neighbors < 2) { return 0.0; } int num_edges = 0; for (int v : neighbors) { for (int w : neighbors) { if (v < w && G.has_edge(v, w)) { num_edges++; } } } return static_cast(num_edges) / (num_neighbors * (num_neighbors - 1) / 2.0); } // Calculate the average clustering coefficient of a graph double average_clustering(const Graph& G) { int num_nodes = G.nodes().size(); double sum = 0.0; for (int u : G.nodes()) { sum += clustering_coefficient(G, u); } return sum / num_nodes; } // Calculate the shortest path between two nodes using breadth-first search (Dijkstra's algorithm) std::vector shortest_path(const Graph& G, int source, int target) { std::queue q; std::unordered_map predecessor; std::unordered_set visited; q.push(source); visited.insert(source); while (!q.empty()) { int u = q.front(); q.pop(); for (int v : G.neighbors(u)) { if (visited.count(v) == 0) { visited.insert(v); predecessor[v] = u; q.push(v); } } } // Construct the shortest path std::vector path; if (predecessor.count(target) > 0) { int u = target; while (u != source) { path.push_back(u); u = predecessor[u]; } path.push_back(source); } std::reverse(path.begin(), path.end()); return path; } // Calculate the average shortest path length of a graph double average_shortest_path_length(const Graph& G) { double sum = 0.0; int num_paths = 0; for (int u : G.nodes()) { for (int v : G.nodes()) { if (u < v && has_path(G, u, v)) { sum += shortest_path(G, u, v).size(); num_paths++; } } } return sum / num_paths; } // Calculate the average degree of a graph double average_degree(const Graph& G) { int num_nodes = G.nodes().size(); int sum = 0; for (int u : G.nodes()) { sum += G.neighbors(u).size(); } return static_cast(sum) / num_nodes; } // Calculate the omega index of a graph double omega(const Graph& G, int niter=2, int nrand=2) { double C = average_clustering(G); std::cout << "Clustering Coefficient of the original graph = " << C << std::endl; double L = average_shortest_path_length(G); std::cout << "L = " << L << std::endl; double Lr_sum = 0.0; std::cout << "Starting random reference" << std::endl; for (int i = 0; i < nrand; i++) { std::cout << "\tIteration " << i+1 << std::endl; Graph H = random_reference(G, niter); std::cout << "\tCreated random reference" << std::endl; Lr_sum += average_shortest_path_length(H); std::cout << "\tCalculated average shortest path length of the random reference" << std::endl; } double Lr = Lr_sum / nrand; double Cl_sum = 0.0; std::cout << "Starting lattice reference" << std::endl; for (int i = 0; i < nrand; i++) { std::cout << "\tIteration " << i+1 << std::endl; Graph H = latticize(G, niter); std::cout << "\tCreated lattice reference" << std::endl; Cl_sum += average_clustering(H); std::cout << "\tCalculated average clustering of the lattice reference" << std::endl; } double Cl = Cl_sum / nrand; return Lr / L - C / Cl; } // Calculate the sigma index of a graph double sigma(const Graph& G, int niter=2, int nrand=2) { double C = average_clustering(G); std::cout << "Clustering Coefficient of the original graph = " << C << std::endl; double L = average_shortest_path_length(G); std::cout << "Average shortest path of the original graph = " << L << std::endl; double Lr_sum = 0.0; double Cl_sum = 0.0; for (int i = 0; i < nrand; i++) { std::cout << "\tIteration " << i+1 << std::endl; Graph H = random_reference(G, niter); std::cout << "\tCreated random reference" << std::endl; Lr_sum += average_shortest_path_length(H); std::cout << "\tCalculated average shortest path length of the random reference" << std::endl; Cl_sum += average_clustering(H); std::cout << "\tCalculated average clustering of the random reference" << std::endl; } double Lr = Lr_sum / nrand; double Cl = Cl_sum / nrand; return (Lr / L) / (C / Cl); } int main() { std::cout << "\nStarting the computation for the Foursquare network" << std::endl; Graph foursquare = read_graph("data/foursquare/foursquare_friends_edges_filtered.tsv"); // std::cout << "Omega: " << omega(foursquare) << std::endl; std::cout << "Sigma: " << sigma(foursquare) << std::endl; std::cout << "\nStarting the computation for the Brightkite network" << std::endl; Graph brightkite = read_graph("data/brightkite/brightkite_friends_edges_filtered.tsv"); // std::cout << "Omega: " << omega(brightkite) << std::endl; std::cout << "Sigma: " << sigma(brightkite) << std::endl; std::cout << "\nStarting the computation for the Gowalla network" << std::endl; Graph gowalla = read_graph("data/gowalla/gowalla_friends_edges_filtered.tsv"); // std::cout << "Omega: " << omega(gowalla) << std::endl; std::cout << "Sigma: " << sigma(gowalla) << std::endl; return 0; }