You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
435 lines
17 KiB
Plaintext
435 lines
17 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%load_ext autoreload\n",
|
|
"%autoreload 2\n",
|
|
"\n",
|
|
"import os\n",
|
|
"import wget\n",
|
|
"import zipfile\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import networkx as nx\n",
|
|
"import plotly.graph_objects as go\n",
|
|
"from utils import *\n",
|
|
"from collections import Counter\n",
|
|
"from tqdm import tqdm\n",
|
|
"import time\n",
|
|
"\n",
|
|
"# ignore warnings\n",
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>Graph</th>\n",
|
|
" <th>Number of Nodes</th>\n",
|
|
" <th>Number of Edges</th>\n",
|
|
" <th>Average Degree</th>\n",
|
|
" <th>Average Clustering Coefficient</th>\n",
|
|
" <th>log N</th>\n",
|
|
" <th>Average Shortest Path Length</th>\n",
|
|
" <th>betweenness centrality</th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>0</th>\n",
|
|
" <td>Brightkite Checkins Graph</td>\n",
|
|
" <td>7191</td>\n",
|
|
" <td>3663807</td>\n",
|
|
" <td>1018.997914</td>\n",
|
|
" <td>0.702854</td>\n",
|
|
" <td>8.880586</td>\n",
|
|
" <td>2.411011</td>\n",
|
|
" <td>0.00022</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>1</th>\n",
|
|
" <td>Gowalla Checkins Graph</td>\n",
|
|
" <td>10702</td>\n",
|
|
" <td>303104</td>\n",
|
|
" <td>56.644366</td>\n",
|
|
" <td>0.505597</td>\n",
|
|
" <td>9.278186</td>\n",
|
|
" <td>5.222903</td>\n",
|
|
" <td>0.000301</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2</th>\n",
|
|
" <td>Foursquare EU Checkins Graph</td>\n",
|
|
" <td>20282</td>\n",
|
|
" <td>7430376</td>\n",
|
|
" <td>732.706439</td>\n",
|
|
" <td>0.597097</td>\n",
|
|
" <td>9.917489</td>\n",
|
|
" <td>2.2843</td>\n",
|
|
" <td>0.000089</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3</th>\n",
|
|
" <td>Foursquare IT Checkins Graph</td>\n",
|
|
" <td>3730</td>\n",
|
|
" <td>629749</td>\n",
|
|
" <td>337.667024</td>\n",
|
|
" <td>0.683565</td>\n",
|
|
" <td>8.224164</td>\n",
|
|
" <td>2.185477</td>\n",
|
|
" <td>0.000428</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>4</th>\n",
|
|
" <td>Brightkite Friendship Graph</td>\n",
|
|
" <td>5928</td>\n",
|
|
" <td>34673</td>\n",
|
|
" <td>11.698043</td>\n",
|
|
" <td>0.219749</td>\n",
|
|
" <td>8.687442</td>\n",
|
|
" <td>5.052162</td>\n",
|
|
" <td>0.000448</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>5</th>\n",
|
|
" <td>(Filtered) Gowalla Friendship Graph</td>\n",
|
|
" <td>8396</td>\n",
|
|
" <td>29122</td>\n",
|
|
" <td>6.937113</td>\n",
|
|
" <td>0.217544</td>\n",
|
|
" <td>9.035511</td>\n",
|
|
" <td>4.558532</td>\n",
|
|
" <td>0.000357</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>6</th>\n",
|
|
" <td>Foursquare IT Friendship Graph</td>\n",
|
|
" <td>2073</td>\n",
|
|
" <td>6217</td>\n",
|
|
" <td>5.99807</td>\n",
|
|
" <td>0.148489</td>\n",
|
|
" <td>7.636752</td>\n",
|
|
" <td>19.530752</td>\n",
|
|
" <td>0.000879</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7</th>\n",
|
|
" <td>Foursquare EU Friendship Graph</td>\n",
|
|
" <td>16491</td>\n",
|
|
" <td>59419</td>\n",
|
|
" <td>7.206234</td>\n",
|
|
" <td>0.167946</td>\n",
|
|
" <td>9.710570</td>\n",
|
|
" <td>23.713864</td>\n",
|
|
" <td>0.000272</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" Graph Number of Nodes Number of Edges \\\n",
|
|
"0 Brightkite Checkins Graph 7191 3663807 \n",
|
|
"1 Gowalla Checkins Graph 10702 303104 \n",
|
|
"2 Foursquare EU Checkins Graph 20282 7430376 \n",
|
|
"3 Foursquare IT Checkins Graph 3730 629749 \n",
|
|
"4 Brightkite Friendship Graph 5928 34673 \n",
|
|
"5 (Filtered) Gowalla Friendship Graph 8396 29122 \n",
|
|
"6 Foursquare IT Friendship Graph 2073 6217 \n",
|
|
"7 Foursquare EU Friendship Graph 16491 59419 \n",
|
|
"\n",
|
|
" Average Degree Average Clustering Coefficient log N \\\n",
|
|
"0 1018.997914 0.702854 8.880586 \n",
|
|
"1 56.644366 0.505597 9.278186 \n",
|
|
"2 732.706439 0.597097 9.917489 \n",
|
|
"3 337.667024 0.683565 8.224164 \n",
|
|
"4 11.698043 0.219749 8.687442 \n",
|
|
"5 6.937113 0.217544 9.035511 \n",
|
|
"6 5.99807 0.148489 7.636752 \n",
|
|
"7 7.206234 0.167946 9.710570 \n",
|
|
"\n",
|
|
" Average Shortest Path Length betweenness centrality \n",
|
|
"0 2.411011 0.00022 \n",
|
|
"1 5.222903 0.000301 \n",
|
|
"2 2.2843 0.000089 \n",
|
|
"3 2.185477 0.000428 \n",
|
|
"4 5.052162 0.000448 \n",
|
|
"5 4.558532 0.000357 \n",
|
|
"6 19.530752 0.000879 \n",
|
|
"7 23.713864 0.000272 "
|
|
]
|
|
},
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# import the graphs from the saved files\n",
|
|
"G_brighkite_checkins = nx.read_gpickle(os.path.join('data', 'brightkite', 'brightkite_checkins_graph.gpickle'))\n",
|
|
"G_gowalla_checkins = nx.read_gpickle(os.path.join('data', 'gowalla', 'gowalla_checkins_graph.gpickle'))\n",
|
|
"G_foursquareEU_checkins = nx.read_gpickle(os.path.join('data', 'foursquare', 'foursquareEU_checkins_graph.gpickle'))\n",
|
|
"G_foursquareIT_checkins = nx.read_gpickle(os.path.join('data', 'foursquare', 'foursquareIT_checkins_graph.gpickle'))\n",
|
|
"\n",
|
|
"G_brighkite_friends = nx.read_gpickle(os.path.join('data', 'brightkite', 'brightkite_friendships_graph.gpickle'))\n",
|
|
"G_gowalla_friends = nx.read_gpickle(os.path.join('data', 'gowalla', 'gowalla_friendships_graph.gpickle'))\n",
|
|
"G_foursquareEU_friends = nx.read_gpickle(os.path.join('data', 'foursquare', 'foursquareEU_friendships_graph.gpickle'))\n",
|
|
"G_foursquareIT_friends = nx.read_gpickle(os.path.join('data', 'foursquare', 'foursquareIT_friendships_graph.gpickle'))\n",
|
|
"\n",
|
|
"# open the dataframe object\n",
|
|
"analysis_results = pd.read_pickle('analysis_results.pkl')\n",
|
|
"analysis_results"
|
|
]
|
|
},
|
|
{
|
|
"attachments": {},
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"The first thing that we want to do is very simple, create a random reference for each graph"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# analysis_results = pd.DataFrame(columns=['Graph', 'Number of Nodes', 'Number of Edges', 'Average Degree', 'Average Clustering Coefficient', 'log N', 'Average Shortest Path Length', 'betweenness centrality'], index=None)\n",
|
|
"\n",
|
|
"checkins_graphs = [G_brighkite_checkins, G_gowalla_checkins, G_foursquareEU_checkins, G_foursquareIT_checkins]\n",
|
|
"friendships_graph = [G_brighkite_friends, G_gowalla_friends, G_foursquareIT_friends, G_foursquareEU_friends]\n",
|
|
"\n",
|
|
"graphs_all = checkins_graphs + friendships_graph"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Random shit"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# analysis_results_erods = pd.DataFrame(columns=['Graph', 'Number of Nodes', 'Number of Edges', 'Average Degree', 'Average Clustering Coefficient', 'log N', 'Average Shortest Path Length', 'betweenness centrality'], index=None)\n",
|
|
"\n",
|
|
"# analysis_results_ws = pd.DataFrame(columns=['Graph', 'Number of Nodes', 'Number of Edges', 'Average Degree', 'Average Clustering Coefficient', 'log N', 'Average Shortest Path Length', 'betweenness centrality'], index=None)\n",
|
|
"\n",
|
|
"# for graph in graphs_all:\n",
|
|
"# print(\"\\nCreating random graph for graph: \", graph.name)\n",
|
|
"# G_erd = create_random_graphs(graph, model='erdos', save=False)\n",
|
|
"# G_ws = create_random_graphs(graph, model='watts_strogatz', save=False)\n",
|
|
" \n",
|
|
"# # add the basic information to the dataframe\n",
|
|
"# analysis_results_erods = analysis_results_erods.append({\n",
|
|
"# 'Graph': G_erd.name,\n",
|
|
"# 'Number of Nodes': G_erd.number_of_nodes(),\n",
|
|
"# 'Number of Edges': G_erd.number_of_edges(),\n",
|
|
"# 'log N': np.log(G_erd.number_of_nodes())\n",
|
|
"# }, ignore_index=True)\n",
|
|
"\n",
|
|
"# # add the basic information to the dataframe\n",
|
|
"# analysis_results_ws = analysis_results_ws.append({\n",
|
|
"# 'Graph': G_ws.name,\n",
|
|
"# 'Number of Nodes': G_ws.number_of_nodes(),\n",
|
|
"# 'Number of Edges': G_ws.number_of_edges(),\n",
|
|
"# 'log N': np.log(G_ws.number_of_nodes())\n",
|
|
"# }, ignore_index=True)\n",
|
|
"\n",
|
|
"# # compute the average degree and add it to the dataframes\n",
|
|
"# avg_deg_erd = np.mean([d for n, d in G_erd.degree()])\n",
|
|
"# avg_deg_ws = np.mean([d for n, d in G_ws.degree()])\n",
|
|
"# analysis_results_erods.loc[analysis_results_erods['Graph'] == G_erd.name, 'Average Degree'] = avg_deg_erd\n",
|
|
"# analysis_results_ws.loc[analysis_results_ws['Graph'] == G_ws.name, 'Average Degree'] = avg_deg_ws\n",
|
|
"\n",
|
|
"# # compute the average clustering coefficient and add it to the dataframes\n",
|
|
"# avg_clustering_erd = average_clustering_coefficient(G_erd, k = 0.9)\n",
|
|
"# avg_clustering_ws = average_clustering_coefficient(G_ws, k = 0.9)\n",
|
|
"# analysis_results_erods.loc[analysis_results_erods['Graph'] == G_erd.name, 'Average Clustering Coefficient'] = avg_clustering_erd\n",
|
|
"# analysis_results_ws.loc[analysis_results_ws['Graph'] == G_ws.name, 'Average Clustering Coefficient'] = avg_clustering_ws\n",
|
|
"\n",
|
|
"# # compute the average shortest path length and add it to the dataframes\n",
|
|
"# average_shortest_path_length_erd = average_shortest_path(G_erd, k = 0.9)\n",
|
|
"# average_shortest_path_length_ws = average_shortest_path(G_ws, k = 0.9)\n",
|
|
"# analysis_results_erods.loc[analysis_results_erods['Graph'] == G.name, 'Average Shortest Path Length'] = average_shortest_path_length_erd\n",
|
|
"# analysis_results_ws.loc[analysis_results_ws['Graph'] == G.name, 'Average Shortest Path Length'] = average_shortest_path_length_ws\n",
|
|
"\n",
|
|
"# # compute the betweenness centrality and add it to the dataframes\n",
|
|
"# betweenness_centrality_erd = np.mean(list(betweenness_centrality_parallel(G_erd, 4, k = 0.9).values()))\n",
|
|
"# betweenness_centrality_ws = np.mean(list(betweenness_centrality_parallel(G_ws, 4, k = 0.9).values()))\n",
|
|
"# analysis_results_erods.loc[analysis_results_erods['Graph'] == G.name, 'betweenness centrality'] = betweenness_centrality_erd\n",
|
|
"# analysis_results_ws.loc[analysis_results_ws['Graph'] == G.name, 'betweenness centrality'] = betweenness_centrality_ws\n",
|
|
"\n",
|
|
"# # save memory\n",
|
|
"# del G_erd, G_ws\n",
|
|
"\n",
|
|
"# analysis_results_erods.to_pickle('analysis_results_erods.pkl')\n",
|
|
"# analysis_results_ws.to_pickle('analysis_results_ws.pkl')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Small Worldness\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We have already computed the average clusting coefficient and the average shortesh path len for our networks. We can save a lot of time by skipping this computations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def omega(G, C_og, L_og, niter, nrand):\n",
|
|
" randMetrics = {\"C\": [], \"L\": []}\n",
|
|
"\n",
|
|
" # Calculate initial average clustering coefficient which potentially will\n",
|
|
" # get replaced by higher clustering coefficients from generated lattice\n",
|
|
" # reference graphs\n",
|
|
" Cl = C_og\n",
|
|
"\n",
|
|
" niter_lattice_reference = niter\n",
|
|
" niter_random_reference = niter * 2\n",
|
|
"\n",
|
|
" for _ in range(nrand):\n",
|
|
" \n",
|
|
" # Generate random graph\n",
|
|
" Gr = nx.random_reference(G, niter=niter_random_reference, seed=42)\n",
|
|
" randMetrics[\"L\"].append(nx.average_shortest_path_length(Gr))\n",
|
|
"\n",
|
|
" # Generate lattice graph\n",
|
|
" Gl = nx.lattice_reference(G, niter=niter_lattice_reference, seed=42)\n",
|
|
"\n",
|
|
" # Replace old clustering coefficient, if clustering is higher in\n",
|
|
" # generated lattice reference\n",
|
|
" Cl_temp = nx.average_clustering(Gl)\n",
|
|
" if Cl_temp > Cl:\n",
|
|
" Cl = Cl_temp\n",
|
|
"\n",
|
|
" C = C_og\n",
|
|
" L = L_og\n",
|
|
" Lr = np.mean(randMetrics[\"L\"])\n",
|
|
"\n",
|
|
" omega = (Lr / L) - (C / Cl)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Brightkite Checkins Graph\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"analysis_results = pd.read_pickle('analysis_results.pkl')\n",
|
|
"\n",
|
|
"omega_results = pd.DataFrame(columns=['Graph', 'omega'])\n",
|
|
"\n",
|
|
"for G in checkins_graphs:\n",
|
|
" print(G.name)\n",
|
|
" C_og = analysis_results.loc[analysis_results['Graph'] == G.name, 'Average Clustering Coefficient'].values[0]\n",
|
|
" L_og = analysis_results.loc[analysis_results['Graph'] == G.name, 'Average Shortest Path Length'].values[0]\n",
|
|
"\n",
|
|
" omega = omega(G, C_og, L_og, 2, 3)\n",
|
|
" \n",
|
|
" omega_results = omega_results.append({\n",
|
|
" 'Graph': G.name,\n",
|
|
" 'omega': omega\n",
|
|
" }, ignore_index=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"for G in friendships_graphs:\n",
|
|
" print(G.name)\n",
|
|
" C_og = analysis_results.loc[analysis_results['Graph'] == G.name, 'Average Clustering Coefficient'].values[0]\n",
|
|
" L_og = analysis_results.loc[analysis_results['Graph'] == G.name, 'Average Shortest Path Length'].values[0]\n",
|
|
"\n",
|
|
" omega = omega(G, C_og, L_og, 2, 3)\n",
|
|
" \n",
|
|
" omega_results = omega_results.append({\n",
|
|
" 'Graph': G.name,\n",
|
|
" 'omega': omega\n",
|
|
" }, ignore_index=True)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3.10.8 64-bit",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.9"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|