almost final version, need to review the theory

main
Luca Lombardo 2 years ago
parent 790d9e865e
commit 5c70ff3a19

1
.gitignore vendored

@ -147,3 +147,4 @@ data/
backup/ backup/
sources/ sources/
extra/ extra/
html_graphs/

Binary file not shown.

File diff suppressed because one or more lines are too long

@ -31,20 +31,11 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("graph", help="Name of the graph to be used. Options are 'checkins-foursquare', 'checkins-gowalla', 'checkins-brightkite', 'friends-foursquare', 'friends-gowalla', 'friends-brightkite'") parser.add_argument("graph", help="Name of the graph to be used. Options are 'checkins-foursquare', 'checkins-gowalla', 'checkins-brightkite', 'friends-foursquare', 'friends-gowalla', 'friends-brightkite'")
parser.add_argument("k", help="Percentage of nodes to be sampled. Needs to be a float between 0 and 1") parser.add_argument("k", help="Percentage of nodes to be sampled. Needs to be a float between 0 and 1")
parser.add_argument("niter", help="Number of rewiring per edge. Needs to be an integer. Default is 5") parser.add_argument("--niter", help="Number of rewiring per edge. Needs to be an integer. Default is 5", default=5)
parser.add_argument("nrand", help="Number of random graphs. Needs to be an integer. Default is 5") parser.add_argument("--nrand", help="Number of random graphs. Needs to be an integer. Default is 5", default=5)
parser.add_help = True parser.add_help = True
args = parser.parse_args() args = parser.parse_args()
# if no input is given for niter and nrand, set them to default values
if args.niter == None:
print("No input for niter. Setting it to default value: 5")
args.niter = 5
if args.nrand == None:
print("No input for nrand. Setting it to default value: 5")
args.nrand = 5
# the name of the graph is the first part of the input string # the name of the graph is the first part of the input string
name = args.graph.split('-')[1] name = args.graph.split('-')[1]
if 'checkins' in args.graph: if 'checkins' in args.graph:

@ -16,809 +16,171 @@
"import pandas as pd\n", "import pandas as pd\n",
"import networkx as nx\n", "import networkx as nx\n",
"import plotly.graph_objects as go\n", "import plotly.graph_objects as go\n",
"# from utils import *\n", "from utils import *\n",
"from collections import Counter\n", "from collections import Counter\n",
"from tqdm import tqdm\n", "from tqdm import tqdm\n",
"import time\n", "import time\n",
"import geopandas as gpd\n", "import geopandas as gpd\n",
"import gdown # for downloading files from google drive\n", "import gdown # for downloading files from google drive\n",
"import shutil\n", "import shutil\n",
"# ignore warnings\n",
"import warnings\n", "import warnings\n",
"import sys\n", "import sys\n",
"from pyvis.network import Network\n",
"warnings.filterwarnings(\"ignore\")" "warnings.filterwarnings(\"ignore\")"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [],
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Graph</th>\n",
" <th>Number of Nodes</th>\n",
" <th>Number of Edges</th>\n",
" <th>Average Degree</th>\n",
" <th>Average Clustering Coefficient</th>\n",
" <th>log N</th>\n",
" <th>Average Shortest Path Length</th>\n",
" <th>betweenness centrality</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Brightkite Checkins Graph</td>\n",
" <td>6493</td>\n",
" <td>292973</td>\n",
" <td>90.242723</td>\n",
" <td>0.713999</td>\n",
" <td>8.778480</td>\n",
" <td>3.013369</td>\n",
" <td>0.000534</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gowalla Checkins Graph</td>\n",
" <td>3073</td>\n",
" <td>62790</td>\n",
" <td>40.865604</td>\n",
" <td>0.548372</td>\n",
" <td>8.030410</td>\n",
" <td>3.508031</td>\n",
" <td>0.001277</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Foursquare Checkins Graph</td>\n",
" <td>2324</td>\n",
" <td>246702</td>\n",
" <td>212.30809</td>\n",
" <td>0.65273</td>\n",
" <td>7.751045</td>\n",
" <td>2.186112</td>\n",
" <td>0.000938</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Brightkite Friendship Graph</td>\n",
" <td>5420</td>\n",
" <td>14690</td>\n",
" <td>5.420664</td>\n",
" <td>0.218571</td>\n",
" <td>8.597851</td>\n",
" <td>5.231807</td>\n",
" <td>0.000664</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>(Filtered) Gowalla Friendship Graph</td>\n",
" <td>2294</td>\n",
" <td>5548</td>\n",
" <td>4.836966</td>\n",
" <td>0.234293</td>\n",
" <td>7.738052</td>\n",
" <td>5.396488</td>\n",
" <td>0.001331</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Foursquare Friendship Graph</td>\n",
" <td>1397</td>\n",
" <td>5323</td>\n",
" <td>7.620616</td>\n",
" <td>0.183485</td>\n",
" <td>7.242082</td>\n",
" <td>6.45841</td>\n",
" <td>0.001531</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Graph Number of Nodes Number of Edges \\\n",
"0 Brightkite Checkins Graph 6493 292973 \n",
"1 Gowalla Checkins Graph 3073 62790 \n",
"2 Foursquare Checkins Graph 2324 246702 \n",
"3 Brightkite Friendship Graph 5420 14690 \n",
"4 (Filtered) Gowalla Friendship Graph 2294 5548 \n",
"5 Foursquare Friendship Graph 1397 5323 \n",
"\n",
" Average Degree Average Clustering Coefficient log N \\\n",
"0 90.242723 0.713999 8.778480 \n",
"1 40.865604 0.548372 8.030410 \n",
"2 212.30809 0.65273 7.751045 \n",
"3 5.420664 0.218571 8.597851 \n",
"4 4.836966 0.234293 7.738052 \n",
"5 7.620616 0.183485 7.242082 \n",
"\n",
" Average Shortest Path Length betweenness centrality \n",
"0 3.013369 0.000534 \n",
"1 3.508031 0.001277 \n",
"2 2.186112 0.000938 \n",
"3 5.231807 0.000664 \n",
"4 5.396488 0.001331 \n",
"5 6.45841 0.001531 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"analysis_results = pd.read_pickle('analysis_results.pkl')\n",
"analysis_results"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Graph</th>\n",
" <th>Number of Nodes</th>\n",
" <th>Number of Edges</th>\n",
" <th>Average Degree</th>\n",
" <th>Average Clustering Coefficient</th>\n",
" <th>log N</th>\n",
" <th>Average Shortest Path Length</th>\n",
" <th>betweenness centrality</th>\n",
" <th>omega-coefficient</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Brightkite Checkins Graph</td>\n",
" <td>6493</td>\n",
" <td>292973</td>\n",
" <td>90.242723</td>\n",
" <td>0.713999</td>\n",
" <td>8.778480</td>\n",
" <td>3.013369</td>\n",
" <td>0.000534</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gowalla Checkins Graph</td>\n",
" <td>3073</td>\n",
" <td>62790</td>\n",
" <td>40.865604</td>\n",
" <td>0.548372</td>\n",
" <td>8.030410</td>\n",
" <td>3.508031</td>\n",
" <td>0.001277</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Foursquare Checkins Graph</td>\n",
" <td>2324</td>\n",
" <td>246702</td>\n",
" <td>212.30809</td>\n",
" <td>0.65273</td>\n",
" <td>7.751045</td>\n",
" <td>2.186112</td>\n",
" <td>0.000938</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Brightkite Friendship Graph</td>\n",
" <td>5420</td>\n",
" <td>14690</td>\n",
" <td>5.420664</td>\n",
" <td>0.218571</td>\n",
" <td>8.597851</td>\n",
" <td>5.231807</td>\n",
" <td>0.000664</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>(Filtered) Gowalla Friendship Graph</td>\n",
" <td>2294</td>\n",
" <td>5548</td>\n",
" <td>4.836966</td>\n",
" <td>0.234293</td>\n",
" <td>7.738052</td>\n",
" <td>5.396488</td>\n",
" <td>0.001331</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Foursquare Friendship Graph</td>\n",
" <td>1397</td>\n",
" <td>5323</td>\n",
" <td>7.620616</td>\n",
" <td>0.183485</td>\n",
" <td>7.242082</td>\n",
" <td>6.45841</td>\n",
" <td>0.001531</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Graph Number of Nodes Number of Edges \\\n",
"0 Brightkite Checkins Graph 6493 292973 \n",
"1 Gowalla Checkins Graph 3073 62790 \n",
"2 Foursquare Checkins Graph 2324 246702 \n",
"3 Brightkite Friendship Graph 5420 14690 \n",
"4 (Filtered) Gowalla Friendship Graph 2294 5548 \n",
"5 Foursquare Friendship Graph 1397 5323 \n",
"\n",
" Average Degree Average Clustering Coefficient log N \\\n",
"0 90.242723 0.713999 8.778480 \n",
"1 40.865604 0.548372 8.030410 \n",
"2 212.30809 0.65273 7.751045 \n",
"3 5.420664 0.218571 8.597851 \n",
"4 4.836966 0.234293 7.738052 \n",
"5 7.620616 0.183485 7.242082 \n",
"\n",
" Average Shortest Path Length betweenness centrality omega-coefficient \n",
"0 3.013369 0.000534 NaN \n",
"1 3.508031 0.001277 NaN \n",
"2 2.186112 0.000938 NaN \n",
"3 5.231807 0.000664 NaN \n",
"4 5.396488 0.001331 NaN \n",
"5 6.45841 0.001531 NaN "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"analysis_results['omega-coefficient'] = np.nan\n", "import multiprocessing\n",
"analysis_results" "import random\n",
"import networkx as nx\n",
"import numpy as np\n",
"import math\n",
"\n",
"def parallel_omega(G, nrand=10, seed=None):\n",
"\n",
" random.seed(seed)\n",
" if not nx.is_connected(G):\n",
" G = G.subgraph(max(nx.connected_components(G), key=len))\n",
"\n",
" if len(G) == 1:\n",
" return 0\n",
"\n",
" niter_lattice_reference = nrand\n",
" niter_random_reference = nrand * 2\n",
" \n",
" def worker(queue):\n",
" while True:\n",
" task = queue.get()\n",
" if task is None:\n",
" break\n",
" random_graph = nx.random_reference(G)\n",
" lattice_graph = nx.lattice_reference(G)\n",
" random_shortest_path = nx.average_shortest_path_length(random_graph)\n",
" lattice_clustering = nx.average_clustering(lattice_graph)\n",
" queue.put((random_shortest_path, lattice_clustering))\n",
" \n",
" n_processes = multiprocessing.cpu_count()\n",
" manager = multiprocessing.Manager()\n",
" queue = manager.Queue()\n",
" processes = [multiprocessing.Process(target=worker, args=(queue,)) for _ in range(n_processes)]\n",
" for process in processes:\n",
" process.start()\n",
" \n",
" for _ in range(nrand):\n",
" queue.put(1)\n",
" \n",
" for _ in range(n_processes):\n",
" queue.put(None)\n",
" \n",
" for process in processes:\n",
" process.join()\n",
" \n",
" shortest_paths = []\n",
" clustering_coeffs = []\n",
" while not queue.empty():\n",
" random_shortest_path, lattice_clustering = queue.get()\n",
" shortest_paths.append(random_shortest_path)\n",
" clustering_coeffs.append(lattice_clustering)\n",
" \n",
" L = nx.average_shortest_path_length(G)\n",
" C = nx.average_clustering(G)\n",
"\n",
" # kill the process\n",
" for process in processes:\n",
" process.terminate()\n",
" process.join()\n",
"\n",
" omega = (np.mean(shortest_paths) / L) - (C / np.mean(clustering_coeffs))\n",
"\n",
"\n",
" return omega"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Graph</th>\n",
" <th>Number of Nodes</th>\n",
" <th>Number of Edges</th>\n",
" <th>Average Degree</th>\n",
" <th>Average Clustering Coefficient</th>\n",
" <th>log N</th>\n",
" <th>Average Shortest Path Length</th>\n",
" <th>betweenness centrality</th>\n",
" <th>omega-coefficient</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Brightkite Checkins Graph</td>\n",
" <td>6493</td>\n",
" <td>292973</td>\n",
" <td>90.242723</td>\n",
" <td>0.713999</td>\n",
" <td>8.778480</td>\n",
" <td>3.013369</td>\n",
" <td>0.000534</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gowalla Checkins Graph</td>\n",
" <td>3073</td>\n",
" <td>62790</td>\n",
" <td>40.865604</td>\n",
" <td>0.548372</td>\n",
" <td>8.030410</td>\n",
" <td>3.508031</td>\n",
" <td>0.001277</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Foursquare Checkins Graph</td>\n",
" <td>2324</td>\n",
" <td>246702</td>\n",
" <td>212.30809</td>\n",
" <td>0.65273</td>\n",
" <td>7.751045</td>\n",
" <td>2.186112</td>\n",
" <td>0.000938</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Brightkite Friendship Graph</td>\n",
" <td>5420</td>\n",
" <td>14690</td>\n",
" <td>5.420664</td>\n",
" <td>0.218571</td>\n",
" <td>8.597851</td>\n",
" <td>5.231807</td>\n",
" <td>0.000664</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>(Filtered) Gowalla Friendship Graph</td>\n",
" <td>2294</td>\n",
" <td>5548</td>\n",
" <td>4.836966</td>\n",
" <td>0.234293</td>\n",
" <td>7.738052</td>\n",
" <td>5.396488</td>\n",
" <td>0.001331</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Foursquare Friendship Graph</td>\n",
" <td>1397</td>\n",
" <td>5323</td>\n",
" <td>7.620616</td>\n",
" <td>0.183485</td>\n",
" <td>7.242082</td>\n",
" <td>6.45841</td>\n",
" <td>0.001531</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [ "text/plain": [
" Graph Number of Nodes Number of Edges \\\n", "'Graph with 200 nodes and 584 edges'"
"0 Brightkite Checkins Graph 6493 292973 \n",
"1 Gowalla Checkins Graph 3073 62790 \n",
"2 Foursquare Checkins Graph 2324 246702 \n",
"3 Brightkite Friendship Graph 5420 14690 \n",
"4 (Filtered) Gowalla Friendship Graph 2294 5548 \n",
"5 Foursquare Friendship Graph 1397 5323 \n",
"\n",
" Average Degree Average Clustering Coefficient log N \\\n",
"0 90.242723 0.713999 8.778480 \n",
"1 40.865604 0.548372 8.030410 \n",
"2 212.30809 0.65273 7.751045 \n",
"3 5.420664 0.218571 8.597851 \n",
"4 4.836966 0.234293 7.738052 \n",
"5 7.620616 0.183485 7.242082 \n",
"\n",
" Average Shortest Path Length betweenness centrality omega-coefficient \n",
"0 3.013369 0.000534 NaN \n",
"1 3.508031 0.001277 NaN \n",
"2 2.186112 0.000938 NaN \n",
"3 5.231807 0.000664 NaN \n",
"4 5.396488 0.001331 NaN \n",
"5 6.45841 0.001531 NaN "
] ]
}, },
"execution_count": 16, "execution_count": 3,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"# rename (Filtered) Gowalla Friendship Graph in Gowalla Friendship Graph\n", "G = nx.erdos_renyi_graph(200, 0.03)\n",
"analysis_results.loc[analysis_results['Graph'] == 'Filtered Gowalla Friendship Graph', 'Graph'] = 'Gowalla Friendship Graph'\n", "G = G.subgraph(max(nx.connected_components(G), key=len))\n",
"analysis_results" "nx.info(G)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Graph</th>\n",
" <th>Number of Nodes</th>\n",
" <th>Number of Edges</th>\n",
" <th>Average Degree</th>\n",
" <th>Average Clustering Coefficient</th>\n",
" <th>log N</th>\n",
" <th>Average Shortest Path Length</th>\n",
" <th>betweenness centrality</th>\n",
" <th>omega-coefficient</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Brightkite Checkins Graph</td>\n",
" <td>6493</td>\n",
" <td>292973</td>\n",
" <td>90.242723</td>\n",
" <td>0.713999</td>\n",
" <td>8.778480</td>\n",
" <td>3.013369</td>\n",
" <td>0.000534</td>\n",
" <td>-0.180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gowalla Checkins Graph</td>\n",
" <td>3073</td>\n",
" <td>62790</td>\n",
" <td>40.865604</td>\n",
" <td>0.548372</td>\n",
" <td>8.030410</td>\n",
" <td>3.508031</td>\n",
" <td>0.001277</td>\n",
" <td>-0.240</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Foursquare Checkins Graph</td>\n",
" <td>2324</td>\n",
" <td>246702</td>\n",
" <td>212.30809</td>\n",
" <td>0.65273</td>\n",
" <td>7.751045</td>\n",
" <td>2.186112</td>\n",
" <td>0.000938</td>\n",
" <td>-0.056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Brightkite Friendship Graph</td>\n",
" <td>5420</td>\n",
" <td>14690</td>\n",
" <td>5.420664</td>\n",
" <td>0.218571</td>\n",
" <td>8.597851</td>\n",
" <td>5.231807</td>\n",
" <td>0.000664</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>(Filtered) Gowalla Friendship Graph</td>\n",
" <td>2294</td>\n",
" <td>5548</td>\n",
" <td>4.836966</td>\n",
" <td>0.234293</td>\n",
" <td>7.738052</td>\n",
" <td>5.396488</td>\n",
" <td>0.001331</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Foursquare Friendship Graph</td>\n",
" <td>1397</td>\n",
" <td>5323</td>\n",
" <td>7.620616</td>\n",
" <td>0.183485</td>\n",
" <td>7.242082</td>\n",
" <td>6.45841</td>\n",
" <td>0.001531</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [ "text/plain": [
" Graph Number of Nodes Number of Edges \\\n", "0.6776975801779451"
"0 Brightkite Checkins Graph 6493 292973 \n",
"1 Gowalla Checkins Graph 3073 62790 \n",
"2 Foursquare Checkins Graph 2324 246702 \n",
"3 Brightkite Friendship Graph 5420 14690 \n",
"4 (Filtered) Gowalla Friendship Graph 2294 5548 \n",
"5 Foursquare Friendship Graph 1397 5323 \n",
"\n",
" Average Degree Average Clustering Coefficient log N \\\n",
"0 90.242723 0.713999 8.778480 \n",
"1 40.865604 0.548372 8.030410 \n",
"2 212.30809 0.65273 7.751045 \n",
"3 5.420664 0.218571 8.597851 \n",
"4 4.836966 0.234293 7.738052 \n",
"5 7.620616 0.183485 7.242082 \n",
"\n",
" Average Shortest Path Length betweenness centrality omega-coefficient \n",
"0 3.013369 0.000534 -0.180 \n",
"1 3.508031 0.001277 -0.240 \n",
"2 2.186112 0.000938 -0.056 \n",
"3 5.231807 0.000664 NaN \n",
"4 5.396488 0.001331 NaN \n",
"5 6.45841 0.001531 NaN "
] ]
}, },
"execution_count": 18, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"# Foursquare Checkins Graph : -0.056\n", "omega = parallel_omega(G, nrand=10, seed=42)\n",
"# Gowalla Checkins Graph : -0.24\n", "omega"
"# Brightkite Checkins Graph : -0.18\n",
"\n",
"# add omega-coefficient to the respective graphs\n",
"analysis_results.loc[analysis_results['Graph'] == 'Foursquare Checkins Graph', 'omega-coefficient'] = -0.056\n",
"analysis_results.loc[analysis_results['Graph'] == 'Gowalla Checkins Graph', 'omega-coefficient'] = -0.24\n",
"analysis_results.loc[analysis_results['Graph'] == 'Brightkite Checkins Graph', 'omega-coefficient'] = -0.18\n",
"analysis_results"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 25, "execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# rename (Filtered) Gowalla Friendship Graph in Gowalla Friendship Graph\n",
"analysis_results.loc[analysis_results['Graph'] == '(Filtered) Gowalla Friendship Graph', 'Graph'] = 'Gowalla Friendship Graph'"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# FourSquare Friendship Graph : -0.17\n",
"# Gowalla Friendship Graph : -0.25\n",
"# Brightkite Friendship Graph : -0.20\n",
"\n",
"# add omega-coefficient to the respective graphs\n",
"analysis_results.loc[analysis_results['Graph'] == 'Foursquare Friendship Graph', 'omega-coefficient'] = -0.17\n",
"analysis_results.loc[analysis_results['Graph'] == 'Gowalla Friendship Graph', 'omega-coefficient'] = -0.25\n",
"analysis_results.loc[analysis_results['Graph'] == 'Brightkite Friendship Graph', 'omega-coefficient'] = -0.20"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "ename": "KeyboardInterrupt",
"text/html": [ "evalue": "",
"<div>\n", "output_type": "error",
"<style scoped>\n", "traceback": [
" .dataframe tbody tr th:only-of-type {\n", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
" vertical-align: middle;\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
" }\n", "Cell \u001b[0;32mIn[4], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m standard_omega \u001b[39m=\u001b[39m nx\u001b[39m.\u001b[39;49momega(G, nrand\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m, seed\u001b[39m=\u001b[39;49m\u001b[39m42\u001b[39;49m)\n\u001b[1;32m 2\u001b[0m standard_omega\n",
"\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/utils/decorators.py:845\u001b[0m, in \u001b[0;36margmap.__call__.<locals>.func\u001b[0;34m(_argmap__wrapper, *args, **kwargs)\u001b[0m\n\u001b[1;32m 844\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfunc\u001b[39m(\u001b[39m*\u001b[39margs, __wrapper\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m--> 845\u001b[0m \u001b[39mreturn\u001b[39;00m argmap\u001b[39m.\u001b[39;49m_lazy_compile(__wrapper)(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
" .dataframe tbody tr th {\n", "File \u001b[0;32m<class 'networkx.utils.decorators.argmap'> compilation 14:6\u001b[0m, in \u001b[0;36margmap_omega_9\u001b[0;34m(G, niter, nrand, seed)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39minspect\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mitertools\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mre\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcollections\u001b[39;00m \u001b[39mimport\u001b[39;00m defaultdict\n\u001b[1;32m 8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcontextlib\u001b[39;00m \u001b[39mimport\u001b[39;00m contextmanager\n",
" vertical-align: top;\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/smallworld.py:367\u001b[0m, in \u001b[0;36momega\u001b[0;34m(G, niter, nrand, seed)\u001b[0m\n\u001b[1;32m 363\u001b[0m niter_random_reference \u001b[39m=\u001b[39m niter \u001b[39m*\u001b[39m \u001b[39m2\u001b[39m\n\u001b[1;32m 365\u001b[0m \u001b[39mfor\u001b[39;00m _ \u001b[39min\u001b[39;00m \u001b[39mrange\u001b[39m(nrand):\n\u001b[1;32m 366\u001b[0m \u001b[39m# Generate random graph\u001b[39;00m\n\u001b[0;32m--> 367\u001b[0m Gr \u001b[39m=\u001b[39m random_reference(G, niter\u001b[39m=\u001b[39;49mniter_random_reference, seed\u001b[39m=\u001b[39;49mseed)\n\u001b[1;32m 368\u001b[0m randMetrics[\u001b[39m\"\u001b[39m\u001b[39mL\u001b[39m\u001b[39m\"\u001b[39m]\u001b[39m.\u001b[39mappend(nx\u001b[39m.\u001b[39maverage_shortest_path_length(Gr))\n\u001b[1;32m 370\u001b[0m \u001b[39m# Generate lattice graph\u001b[39;00m\n",
" }\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/utils/decorators.py:845\u001b[0m, in \u001b[0;36margmap.__call__.<locals>.func\u001b[0;34m(_argmap__wrapper, *args, **kwargs)\u001b[0m\n\u001b[1;32m 844\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mfunc\u001b[39m(\u001b[39m*\u001b[39margs, __wrapper\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[0;32m--> 845\u001b[0m \u001b[39mreturn\u001b[39;00m argmap\u001b[39m.\u001b[39;49m_lazy_compile(__wrapper)(\u001b[39m*\u001b[39;49margs, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
"\n", "File \u001b[0;32m<class 'networkx.utils.decorators.argmap'> compilation 24:6\u001b[0m, in \u001b[0;36margmap_random_reference_19\u001b[0;34m(G, niter, connectivity, seed)\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39minspect\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mitertools\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mre\u001b[39;00m\n\u001b[1;32m 7\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcollections\u001b[39;00m \u001b[39mimport\u001b[39;00m defaultdict\n\u001b[1;32m 8\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mcontextlib\u001b[39;00m \u001b[39mimport\u001b[39;00m contextmanager\n",
" .dataframe thead th {\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/smallworld.py:100\u001b[0m, in \u001b[0;36mrandom_reference\u001b[0;34m(G, niter, connectivity, seed)\u001b[0m\n\u001b[1;32m 97\u001b[0m G\u001b[39m.\u001b[39mremove_edge(c, d)\n\u001b[1;32m 99\u001b[0m \u001b[39m# Check if the graph is still connected\u001b[39;00m\n\u001b[0;32m--> 100\u001b[0m \u001b[39mif\u001b[39;00m connectivity \u001b[39mand\u001b[39;00m local_conn(G, a, b) \u001b[39m==\u001b[39m \u001b[39m0\u001b[39m:\n\u001b[1;32m 101\u001b[0m \u001b[39m# Not connected, revert the swap\u001b[39;00m\n\u001b[1;32m 102\u001b[0m G\u001b[39m.\u001b[39mremove_edge(a, d)\n\u001b[1;32m 103\u001b[0m G\u001b[39m.\u001b[39mremove_edge(c, b)\n",
" text-align: right;\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/connectivity/connectivity.py:649\u001b[0m, in \u001b[0;36mlocal_edge_connectivity\u001b[0;34m(G, s, t, flow_func, auxiliary, residual, cutoff)\u001b[0m\n\u001b[1;32m 646\u001b[0m \u001b[39melif\u001b[39;00m flow_func \u001b[39mis\u001b[39;00m boykov_kolmogorov:\n\u001b[1;32m 647\u001b[0m kwargs[\u001b[39m\"\u001b[39m\u001b[39mcutoff\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m cutoff\n\u001b[0;32m--> 649\u001b[0m \u001b[39mreturn\u001b[39;00m nx\u001b[39m.\u001b[39;49mmaximum_flow_value(H, s, t, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n",
" }\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/flow/maxflow.py:307\u001b[0m, in \u001b[0;36mmaximum_flow_value\u001b[0;34m(flowG, _s, _t, capacity, flow_func, **kwargs)\u001b[0m\n\u001b[1;32m 304\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m callable(flow_func):\n\u001b[1;32m 305\u001b[0m \u001b[39mraise\u001b[39;00m nx\u001b[39m.\u001b[39mNetworkXError(\u001b[39m\"\u001b[39m\u001b[39mflow_func has to be callable.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m--> 307\u001b[0m R \u001b[39m=\u001b[39m flow_func(flowG, _s, _t, capacity\u001b[39m=\u001b[39;49mcapacity, value_only\u001b[39m=\u001b[39;49m\u001b[39mTrue\u001b[39;49;00m, \u001b[39m*\u001b[39;49m\u001b[39m*\u001b[39;49mkwargs)\n\u001b[1;32m 309\u001b[0m \u001b[39mreturn\u001b[39;00m R\u001b[39m.\u001b[39mgraph[\u001b[39m\"\u001b[39m\u001b[39mflow_value\u001b[39m\u001b[39m\"\u001b[39m]\n",
"</style>\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/flow/edmondskarp.py:237\u001b[0m, in \u001b[0;36medmonds_karp\u001b[0;34m(G, s, t, capacity, residual, value_only, cutoff)\u001b[0m\n\u001b[1;32m 120\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39medmonds_karp\u001b[39m(\n\u001b[1;32m 121\u001b[0m G, s, t, capacity\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mcapacity\u001b[39m\u001b[39m\"\u001b[39m, residual\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m, value_only\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m, cutoff\u001b[39m=\u001b[39m\u001b[39mNone\u001b[39;00m\n\u001b[1;32m 122\u001b[0m ):\n\u001b[1;32m 123\u001b[0m \u001b[39m \u001b[39m\u001b[39m\"\"\"Find a maximum single-commodity flow using the Edmonds-Karp algorithm.\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \n\u001b[1;32m 125\u001b[0m \u001b[39m This function returns the residual network resulting after computing\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 235\u001b[0m \n\u001b[1;32m 236\u001b[0m \u001b[39m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 237\u001b[0m R \u001b[39m=\u001b[39m edmonds_karp_impl(G, s, t, capacity, residual, cutoff)\n\u001b[1;32m 238\u001b[0m R\u001b[39m.\u001b[39mgraph[\u001b[39m\"\u001b[39m\u001b[39malgorithm\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39medmonds_karp\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 239\u001b[0m \u001b[39mreturn\u001b[39;00m R\n",
"<table border=\"1\" class=\"dataframe\">\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/flow/edmondskarp.py:104\u001b[0m, in \u001b[0;36medmonds_karp_impl\u001b[0;34m(G, s, t, capacity, residual, cutoff)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[39mraise\u001b[39;00m nx\u001b[39m.\u001b[39mNetworkXError(\u001b[39m\"\u001b[39m\u001b[39msource and sink are the same node\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 103\u001b[0m \u001b[39mif\u001b[39;00m residual \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 104\u001b[0m R \u001b[39m=\u001b[39m build_residual_network(G, capacity)\n\u001b[1;32m 105\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 106\u001b[0m R \u001b[39m=\u001b[39m residual\n",
" <thead>\n", "File \u001b[0;32m/usr/lib/python3.10/site-packages/networkx/algorithms/flow/utils.py:139\u001b[0m, in \u001b[0;36mbuild_residual_network\u001b[0;34m(G, capacity)\u001b[0m\n\u001b[1;32m 135\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m R\u001b[39m.\u001b[39mhas_edge(u, v):\n\u001b[1;32m 136\u001b[0m \u001b[39m# Both (u, v) and (v, u) must be present in the residual\u001b[39;00m\n\u001b[1;32m 137\u001b[0m \u001b[39m# network.\u001b[39;00m\n\u001b[1;32m 138\u001b[0m R\u001b[39m.\u001b[39madd_edge(u, v, capacity\u001b[39m=\u001b[39mr)\n\u001b[0;32m--> 139\u001b[0m R\u001b[39m.\u001b[39;49madd_edge(v, u, capacity\u001b[39m=\u001b[39;49m\u001b[39m0\u001b[39;49m)\n\u001b[1;32m 140\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 141\u001b[0m \u001b[39m# The edge (u, v) was added when (v, u) was visited.\u001b[39;00m\n\u001b[1;32m 142\u001b[0m R[u][v][\u001b[39m\"\u001b[39m\u001b[39mcapacity\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m=\u001b[39m r\n",
" <tr style=\"text-align: right;\">\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
" <th></th>\n", ]
" <th>Graph</th>\n",
" <th>Number of Nodes</th>\n",
" <th>Number of Edges</th>\n",
" <th>Average Degree</th>\n",
" <th>Average Clustering Coefficient</th>\n",
" <th>log N</th>\n",
" <th>Average Shortest Path Length</th>\n",
" <th>betweenness centrality</th>\n",
" <th>omega-coefficient</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Brightkite Checkins Graph</td>\n",
" <td>6493</td>\n",
" <td>292973</td>\n",
" <td>90.242723</td>\n",
" <td>0.713999</td>\n",
" <td>8.778480</td>\n",
" <td>3.013369</td>\n",
" <td>0.000534</td>\n",
" <td>-0.180</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Gowalla Checkins Graph</td>\n",
" <td>3073</td>\n",
" <td>62790</td>\n",
" <td>40.865604</td>\n",
" <td>0.548372</td>\n",
" <td>8.030410</td>\n",
" <td>3.508031</td>\n",
" <td>0.001277</td>\n",
" <td>-0.240</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Foursquare Checkins Graph</td>\n",
" <td>2324</td>\n",
" <td>246702</td>\n",
" <td>212.30809</td>\n",
" <td>0.65273</td>\n",
" <td>7.751045</td>\n",
" <td>2.186112</td>\n",
" <td>0.000938</td>\n",
" <td>-0.056</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Brightkite Friendship Graph</td>\n",
" <td>5420</td>\n",
" <td>14690</td>\n",
" <td>5.420664</td>\n",
" <td>0.218571</td>\n",
" <td>8.597851</td>\n",
" <td>5.231807</td>\n",
" <td>0.000664</td>\n",
" <td>-0.200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Gowalla Friendship Graph</td>\n",
" <td>2294</td>\n",
" <td>5548</td>\n",
" <td>4.836966</td>\n",
" <td>0.234293</td>\n",
" <td>7.738052</td>\n",
" <td>5.396488</td>\n",
" <td>0.001331</td>\n",
" <td>-0.250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Foursquare Friendship Graph</td>\n",
" <td>1397</td>\n",
" <td>5323</td>\n",
" <td>7.620616</td>\n",
" <td>0.183485</td>\n",
" <td>7.242082</td>\n",
" <td>6.45841</td>\n",
" <td>0.001531</td>\n",
" <td>-0.170</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Graph Number of Nodes Number of Edges Average Degree \\\n",
"0 Brightkite Checkins Graph 6493 292973 90.242723 \n",
"1 Gowalla Checkins Graph 3073 62790 40.865604 \n",
"2 Foursquare Checkins Graph 2324 246702 212.30809 \n",
"3 Brightkite Friendship Graph 5420 14690 5.420664 \n",
"4 Gowalla Friendship Graph 2294 5548 4.836966 \n",
"5 Foursquare Friendship Graph 1397 5323 7.620616 \n",
"\n",
" Average Clustering Coefficient log N Average Shortest Path Length \\\n",
"0 0.713999 8.778480 3.013369 \n",
"1 0.548372 8.030410 3.508031 \n",
"2 0.65273 7.751045 2.186112 \n",
"3 0.218571 8.597851 5.231807 \n",
"4 0.234293 7.738052 5.396488 \n",
"5 0.183485 7.242082 6.45841 \n",
"\n",
" betweenness centrality omega-coefficient \n",
"0 0.000534 -0.180 \n",
"1 0.001277 -0.240 \n",
"2 0.000938 -0.056 \n",
"3 0.000664 -0.200 \n",
"4 0.001331 -0.250 \n",
"5 0.001531 -0.170 "
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
} }
], ],
"source": [ "source": [
"analysis_results\n" "standard_omega = nx.omega(G, nrand=10, seed=42)\n",
] "standard_omega"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# save the results into a pickle file\n",
"analysis_results.to_pickle('analysis_results.pkl')"
] ]
} }
], ],
"metadata": { "metadata": {
"kernelspec": { "kernelspec": {
"display_name": "Python 3.10.8 64-bit", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -832,7 +194,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.10.9 (main, Dec 19 2022, 17:35:49) [GCC 12.2.0]" "version": "3.10.9"
}, },
"orig_nbformat": 4, "orig_nbformat": 4,
"vscode": { "vscode": {

@ -23,6 +23,7 @@ import numpy as np
import gdown import gdown
from networkx.utils import py_random_state from networkx.utils import py_random_state
import shutil import shutil
from pyvis.network import Network
# ------------------------------------------------------------------------# # ------------------------------------------------------------------------#
@ -100,21 +101,15 @@ def download_datasets():
shutil.rmtree(os.path.join("data", "foursquare", "dataset_WWW2019")) shutil.rmtree(os.path.join("data", "foursquare", "dataset_WWW2019"))
shutil.rmtree(os.path.join("data", "foursquare", "__MACOSX")) shutil.rmtree(os.path.join("data", "foursquare", "__MACOSX"))
os.rename(os.path.join("data", "foursquare", "dataset_WWW_friendship_new.txt"), os.path.join("data", "foursquare", "foursquare_friends_edges.txt")) os.rename(os.path.join("data", "foursquare", "dataset_WWW_friendship_new.txt"), os.path.join("data", "foursquare", "foursquare_friends_edges.txt"))
os.rename(os.path.join("data", "foursquare", "dataset_WWW_Checkins_anonymized.txt"), os.path.join("data", "foursquare", "foursquare_checkins_full.txt"))
os.rename(os.path.join("data", "foursquare", "dataset_WWW_Checkins_anonymized.txt"), os.path.join("data", "foursquare", "foursquare_checkins.txt"))
## BRIGHTKITE CLEANING ## ## BRIGHTKITE CLEANING ##
os.rename(os.path.join("data", "brightkite", "loc-brightkite_totalCheckins.txt"), os.path.join("data", "brightkite", "brightkite_checkins_full.txt"))
os.rename(os.path.join("data", "brightkite", "loc-brightkite_totalCheckins.txt"), os.path.join("data", "brightkite", "brightkite_checkins.txt"))
os.rename(os.path.join("data", "brightkite", "loc-brightkite_edges.txt"), os.path.join("data", "brightkite", "brightkite_friends_edges.txt")) os.rename(os.path.join("data", "brightkite", "loc-brightkite_edges.txt"), os.path.join("data", "brightkite", "brightkite_friends_edges.txt"))
## GOWALLA CLEANING ## ## GOWALLA CLEANING ##
os.rename(os.path.join("data", "gowalla", "loc-gowalla_totalCheckins.txt"), os.path.join("data", "gowalla", "gowalla_checkins_full.txt"))
os.rename(os.path.join("data", "gowalla", "loc-gowalla_totalCheckins.txt"), os.path.join("data", "gowalla", "gowalla_checkins.txt"))
os.rename(os.path.join("data", "gowalla", "loc-gowalla_edges.txt"), os.path.join("data", "gowalla", "gowalla_friends_edges.txt")) os.rename(os.path.join("data", "gowalla", "loc-gowalla_edges.txt"), os.path.join("data", "gowalla", "gowalla_friends_edges.txt"))
# ------------------------------------------------------------------------# # ------------------------------------------------------------------------#
@ -392,7 +387,7 @@ def average_shortest_path(G: nx.Graph, k=None) -> float:
---------- ----------
`G` : networkx graph `G` : networkx graph
The graph to compute the average shortest path length of. The graph to compute the average shortest path length of.
`k` : int `k` : float
percentage of nodes to remove from the graph. If k is None, the average shortest path length of each connected component is computed using all the nodes of the connected component. percentage of nodes to remove from the graph. If k is None, the average shortest path length of each connected component is computed using all the nodes of the connected component.
Returns Returns
@ -548,3 +543,90 @@ def create_random_graphs(G: nx.Graph, model = None, save = True) -> nx.Graph:
print("\tThe file graph has been saved in the folder data/random/watts_strogatz with the syntax watts_strogatz_n_nodes_n_edges.gpickle") print("\tThe file graph has been saved in the folder data/random/watts_strogatz with the syntax watts_strogatz_n_nodes_n_edges.gpickle")
return G_random return G_random
def visualize_graphs(G: nx.Graph, k: float, connected = True):
"""
Function to visualize the graph in a HTML page using pyvis
Parameters
----------
G: nx.Graph
The graph to visualize
k: float
The percentage of nodes to remove from the graph. Default is None, in which case it will be chosen such that there are about 1000 nodes in the sampled graph. I strongly suggest to use the default value, other wise the visualization will be very slow.
connected: bool
If True, we will consider only the largest connected component of the graph
Returns
-------
html file
The html file containing the visualization of the graph
Notes:
------
This is of course an approximation, it's nice to have an idea of the graph, but it's not a good idea trying to understand the graph in details from this sampled visualization.
"""
if k is None:
if len(G.nodes) > 1500:
k = 1 - 1500/len(G.nodes)
else:
k = 0
# remove a percentage of the nodes
nodes_to_remove = np.random.choice(list(G.nodes), size=int(k*len(G.nodes)), replace=False)
G.remove_nodes_from(nodes_to_remove)
if connected:
# take only the largest connected component
connected_components = list(nx.connected_components(G))
largest_connected_component = max(connected_components, key=len)
G = G.subgraph(largest_connected_component)
# create a networkx graph
net = net = Network(directed=False, bgcolor='#1e1f29', font_color='white')
# for some reasons, if I put % values, the graph is not displayed correctly. So I use pixels, sorry non FHD users
net.width = '1920px'
net.height = '1080px'
# add nodes and edges
net.add_nodes(list(G.nodes))
net.add_edges(list(G.edges))
# set the physics layout of the network
net.set_options("""
var options = {
"edges": {
"color": {
"inherit": true
},
"smooth": false
},
"physics": {
"repulsion": {
"centralGravity": 0.25,
"nodeDistance": 500,
"damping": 0.67
},
"maxVelocity": 48,
"minVelocity": 0.39,
"solver": "repulsion"
}
}
""")
name = G.name.replace(" ", "_").lower()
if not os.path.exists("html_graphs"):
os.mkdir("html_graphs")
# save the graph in a html file
net.show("html_graphs/{}.html".format(name))
print("The graph has been saved in the folder html_graphs with the name {}.html" .format(name))

Loading…
Cancel
Save