testing over the algorithm 1 with different taus

2 years ago · 990d59e3ab
parent 6651a16f28
commit 990d59e3ab
2 changed files with 462 additions and 368 deletions
--- a/src/algo1_testing.ipynb
+++ b/src/algo1_testing.ipynb
@ -0,0 +1,462 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import networkx as nx\n",
+    "import time\n",
+    "import math\n",
+    "import pandas as pd\n",
+    "import scipy as sp\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objs as go\n",
+    "from scipy.sparse import *\n",
+    "from scipy.sparse.linalg import norm"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's create two graphs from the list of edges downloaded from the Snap database. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "G1 = nx.read_edgelist('../data/web-Stanford.txt', create_using=nx.DiGraph(), nodetype=int)\n",
+    "\n",
+    "# G2 = nx.read_edgelist('../data/web-BerkStan.txt', create_using=nx.DiGraph(), nodetype=int)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Creating the transition probability matrix"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# square matrix of size n x n, where n is the number of nodes in the graph. The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).\n",
+    "\n",
+    "def create_matrix(G):\n",
+    "    n = G.number_of_nodes()\n",
+    "    P = sp.sparse.lil_matrix((n,n))\n",
+    "    for i in G.nodes():\n",
+    "        for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors\n",
+    "            P[i-1,j-1] = 1/len(G[i])\n",
+    "    return P"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To ensure that the random process has a unique stationary distribution and it will not stagnate, the transition matrix P is usually modified to be an irreducible stochastic matrix A (called the Google matrix) as follows\n",
+    "\n",
+    "$$ A = \\alpha \\tilde{P} + (1-\\alpha)v e^T$$\n",
+    "\n",
+    "Where $\\tilde{P}$ is defined as \n",
+    "\n",
+    "$$ \\tilde{P} = P + v d^T$$\n",
+    "\n",
+    "Where $d \\in \\mathbb{N}^{n \\times 1}$ s a binary vector tracing the indices of dangling web-pages with no hyperlinks, i.e., $d(i ) = 1$ if the `ith` page has no hyperlink, $v \\in \\mathbb{R}^{n \\times 1}$ is a probability vector, $e = [1, 1, . . . , 1]^T$ , and $0 < \\alpha < 1$ is the so-called damping factor that represents the probability in the model that the surfer transfer by clicking a hyperlink rather than other ways"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n = G1.number_of_nodes()\n",
+    "P = create_matrix(G1)    "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "the vector `d` solves the dangling nodes problem"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define d as a nx1 sparse matrix, where n is the number of nodes in the graph. The vector is filled with d(i) = 1 if the i row of the matrix P is filled with zeros, other wise is 0\n",
+    "\n",
+    "# d is the vector of dangling nodes\n",
+    "d = sp.sparse.lil_matrix((n,1))\n",
+    "for i in range(n):\n",
+    "    if P[i].sum() == 0:\n",
+    "        d[i] = 1"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The vector v is a probability vector, the sum of its elements bust be one"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# define v as the probability vector of size n x 1, where n is the number of nodes in the graph. The vector is filled with 1/n\n",
+    "# https://en.wikipedia.org/wiki/Probability_vector\n",
+    "\n",
+    "v = sp.sparse.lil_matrix((n,1))\n",
+    "for i in range(n):\n",
+    "    v[i] = 1/n  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we can compute the transition matrix\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Pt = P + v.dot(d.T)\n",
+    "\n",
+    "# Pt is a sparse matrix too"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# e is a nx1 sparse matrix filled with ones\n",
+    "e = sp.sparse.lil_matrix((1,n))\n",
+    "for i in range(n):\n",
+    "    e[0,i] = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # v*eT is a nxn sparse matrix filled all with 1/n, let's call it B\n",
+    "\n",
+    "# B = sp.sparse.lil_matrix((n,n))\n",
+    "# for i in range(n):\n",
+    "#     for j in range(n):\n",
+    "#         B[i,j] = 1/n\n",
+    "\n",
+    "# A = alpha*Pt + (1-alpha)*B"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Algorithm 1 Shifted-Power method for PageRank with multiple damping factors:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pandas dataframe to store the results\n",
+    "df = pd.DataFrame(columns=['alpha', 'iterations', 'tau', 'time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this should return mv (the number of iteration needed for the convergence), and two vector called x and r. Where x is the vector of the pagerank and r is the residual vector\n",
+    "\n",
+    "def Algorithm1(Pt, v, tau, max_mv, a: list):\n",
+    "    \n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    u = Pt.dot(v) - v \n",
+    "    mv = 1 # number of iteration\n",
+    "    r = sp.sparse.lil_matrix((n,1)) \n",
+    "    Res = sp.sparse.lil_matrix((len(a),1))\n",
+    "    x = sp.sparse.lil_matrix((n,1))  \n",
+    "\n",
+    "    for i in range(len(a)):\n",
+    "        r = a[i]*(u) \n",
+    "        normed_r = norm(r)\n",
+    "        Res[i] = normed_r \n",
+    "\n",
+    "        if Res[i] > tau:\n",
+    "            x = r + v \n",
+    "\n",
+    "    while max(Res) > tau and mv < max_mv:\n",
+    "        u = Pt*u # should it be the same u of the beginning?\n",
+    "        mv += 1 \n",
+    "\n",
+    "        for i in range(len(a)):\n",
+    "            if Res[i] >= tau: \n",
+    "                r = (a[i]**(mv+1))*(u)\n",
+    "                Res[i] = norm(r)\n",
+    "\n",
+    "                if Res[i] > tau:\n",
+    "                    x = r + x\n",
+    "\n",
+    "    if mv == max_mv:\n",
+    "        print(\"The algorithm didn't converge in \", max_mv, \" iterations\")\n",
+    "    else:\n",
+    "        print(\"The algorithm converged in \", mv, \" iterations\")\n",
+    "\n",
+    "    total_time = time.time() - start_time\n",
+    "    print(\"The algorithm took \", total_time, \" seconds\")\n",
+    "       \n",
+    "    return mv, x, r, total_time  "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "tau =  1e-05\n",
+      "The algorithm converged in  19  iterations\n",
+      "The algorithm took  12.630054473876953  seconds\n",
+      "\n",
+      "tau =  1e-06\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "  df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The algorithm converged in  56  iterations\n",
+      "The algorithm took  32.28381681442261  seconds\n",
+      "\n",
+      "tau =  1e-07\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "  df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The algorithm converged in  113  iterations\n",
+      "The algorithm took  75.9386818408966  seconds\n",
+      "\n",
+      "tau =  1e-08\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "  df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The algorithm converged in  275  iterations\n",
+      "The algorithm took  107.68565726280212  seconds\n",
+      "\n",
+      "tau =  1e-09\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "  df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The algorithm converged in  454  iterations\n",
+      "The algorithm took  152.8943109512329  seconds\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
+      "  df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
+     ]
+    },
+    {
+     "ename": "OSError",
+     "evalue": "Cannot save file into a non-existent directory: '../data/algo1'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_1733107/908708813.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[0;31m# save the results in a csv file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../data/algo1/different_tau.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[1;32m   3549\u001b[0m         )\n\u001b[1;32m   3550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3551\u001b[0;31m         return DataFrameRenderer(formatter).to_csv(\n\u001b[0m\u001b[1;32m   3552\u001b[0m             \u001b[0mpath_or_buf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   3553\u001b[0m             \u001b[0mline_terminator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mline_terminator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/formats/format.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[1;32m   1178\u001b[0m             \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1179\u001b[0m         )\n\u001b[0;32m-> 1180\u001b[0;31m         \u001b[0mcsv_formatter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1182\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcreated_buffer\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/formats/csvs.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    239\u001b[0m         \"\"\"\n\u001b[1;32m    240\u001b[0m         \u001b[0;31m# apply compression and byte/text conversion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 241\u001b[0;31m         with get_handle(\n\u001b[0m\u001b[1;32m    242\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    243\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    692\u001b[0m     \u001b[0;31m# Only for write methods\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    693\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;34m\"r\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mis_path\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 694\u001b[0;31m         \u001b[0mcheck_parent_directory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    695\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    696\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mcheck_parent_directory\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m    566\u001b[0m     \u001b[0mparent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    567\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_dir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mOSError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mrf\"Cannot save file into a non-existent directory: '{parent}'\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    570\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mOSError\u001b[0m: Cannot save file into a non-existent directory: '../data/algo1'"
+     ]
+    }
+   ],
+   "source": [
+    "# list of alpha values, from 0.85 to 0.99 with step 0.01\n",
+    "a = []\n",
+    "for i in range(85,100):\n",
+    "    a.append(i/100)\n",
+    "\n",
+    "max_mv = 1000\n",
+    "\n",
+    "# run the algorithm for different values of tau from 10^-5 to 10^-9 with step 10^-1\n",
+    "for i in range(5,10):\n",
+    "    tau = 10**(-i)\n",
+    "    print(\"\\ntau = \", tau)\n",
+    "    mv, x, r, total_time = Algorithm1(Pt, v, tau, max_mv, a)\n",
+    "    df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True) \n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# save the results in a csv file\n",
+    "df.to_csv('../data/results/algo1/different_tau.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Plotting the results of the algorithm for different values of tau, and fixed alpha"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = df['tau'][::-1].tolist()\n",
+    "y = df['iterations'].tolist()\n",
+    "\n",
+    "fig1 = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'), \n",
+    "                                layout=go.Layout(title='Iterations needed for the convergence', xaxis_title='tau', yaxis_title='iterations'))\n",
+    "                                                                                \n",
+    "# save the plot in a html file\n",
+    "fig1.write_html(\"../data/results/algo1/taus_over_iterations.html\")\n",
+    "\n",
+    "##### RESULTS OVER TIME #####\n",
+    "\n",
+    "x1 = df['tau'][::-1].tolist()\n",
+    "y1 = df['time'].tolist()\n",
+    "\n",
+    "fig2 = go.Figure(data=go.Scatter(x=x1, y=y1, mode='lines+markers'),\n",
+    "                                layout=go.Layout(title='Time needed for the convergence', xaxis_title='tau', yaxis_title='time (seconds)'))\n",
+    "\n",
+    "# save the plot in a html file\n",
+    "fig2.write_html(\"../data/results/algo1/taus_over_time.html\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To view the graph just use the command\n",
+    "\n",
+    "```bash\n",
+    "firefox taus_over_iterations.html \n",
+    "```\n",
+    "or \n",
+    "\n",
+    "```bash\n",
+    "firefox taus_over_time.html\n",
+    "```\n",
+    "\n",
+    "_In the right folder_"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.6 64-bit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/src/filtering.ipynb
+++ b/src/filtering.ipynb
@ -1,368 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import networkx as nx\n",
-    "import time\n",
-    "import math\n",
-    "import scipy as sp\n",
-    "from scipy.sparse import *\n",
-    "from scipy.sparse.linalg import norm"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's create two graphs from the list of edges downloaded from the Snap database. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "G1 = nx.read_edgelist('../data/web-Stanford.txt', create_using=nx.DiGraph(), nodetype=int)\n",
-    "\n",
-    "G2 = nx.read_edgelist('../data/web-BerkStan.txt', create_using=nx.DiGraph(), nodetype=int)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Creating the transition probability matrix"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# square matrix of size n x n, where n is the number of nodes in the graph. The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).\n",
-    "\n",
-    "def create_matrix(G):\n",
-    "    n = G.number_of_nodes()\n",
-    "    P = sp.sparse.lil_matrix((n,n))\n",
-    "    for i in G.nodes():\n",
-    "        for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors\n",
-    "            P[i-1,j-1] = 1/len(G[i])\n",
-    "    return P"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "To ensure that the random process has a unique stationary distribution and it will not stagnate, the transition matrix P is usually modified to be an irreducible stochastic matrix A (called the Google matrix) as follows\n",
-    "\n",
-    "$$ A = \\alpha \\tilde{P} + (1-\\alpha)v e^T$$\n",
-    "\n",
-    "Where $\\tilde{P}$ is defined as \n",
-    "\n",
-    "$$ \\tilde{P} = P + v d^T$$\n",
-    "\n",
-    "Where $d \\in \\mathbb{N}^{n \\times 1}$ s a binary vector tracing the indices of dangling web-pages with no hyperlinks, i.e., $d(i ) = 1$ if the `ith` page has no hyperlink, $v \\in \\mathbb{R}^{n \\times 1}$ is a probability vector, $e = [1, 1, . . . , 1]^T$ , and $0 < \\alpha < 1$ is the so-called damping factor that represents the probability in the model that the surfer transfer by clicking a hyperlink rather than other ways"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "n = G2.number_of_nodes()\n",
-    "P = create_matrix(G2)    "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "the vector `d` solves the dangling nodes problem"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define d as a nx1 sparse matrix, where n is the number of nodes in the graph. The vector is filled with d(i) = 1 if the i row of the matrix P is filled with zeros, other wise is 0\n",
-    "\n",
-    "# d is the vector of dangling nodes\n",
-    "d = sp.sparse.lil_matrix((n,1))\n",
-    "for i in range(n):\n",
-    "    if P[i].sum() == 0:\n",
-    "        d[i] = 1"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The vector v is a probability vector, the sum of its elements bust be one"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# define v as the probability vector of size n x 1, where n is the number of nodes in the graph. The vector is filled with 1/n\n",
-    "# https://en.wikipedia.org/wiki/Probability_vector\n",
-    "\n",
-    "v = sp.sparse.lil_matrix((n,1))\n",
-    "for i in range(n):\n",
-    "    v[i] = 1/n  "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now we can compute the transition matrix\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "Pt = P + v.dot(d.T)\n",
-    "\n",
-    "# Pt is a sparse matrix too"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# e is a nx1 sparse matrix filled with ones\n",
-    "e = sp.sparse.lil_matrix((1,n))\n",
-    "for i in range(n):\n",
-    "    e[0,i] = 1"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# # v*eT is a nxn sparse matrix filled all with 1/n, let's call it B\n",
-    "\n",
-    "# B = sp.sparse.lil_matrix((n,n))\n",
-    "# for i in range(n):\n",
-    "#     for j in range(n):\n",
-    "#         B[i,j] = 1/n\n",
-    "\n",
-    "# A = alpha*Pt + (1-alpha)*B"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Algorithm 1 Shifted-Power method for PageRank with multiple damping factors:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# list of alpha values, from 0.85 to 0.99 with step 0.01\n",
-    "a = []\n",
-    "for i in range(85,100):\n",
-    "    a.append(i/100)\n",
-    "\n",
-    "tau = 10**-6\n",
-    "max_mv = 100\n",
-    "\n",
-    "# this should return mv (the number of iteration needed for the convergence), and two vector called x and r. Where x is the vector of the pagerank and r is the residual vector\n",
-    "\n",
-    "def Algorithm1(Pt, v, tau, max_mv, a: list):\n",
-    "    # take time of the performance\n",
-    "    start_time = time.time()\n",
-    "\n",
-    "\n",
-    "    u = Pt.dot(v) - v \n",
-    "    mv = 1 # number of iteration\n",
-    "    r = sp.sparse.lil_matrix((n,1)) \n",
-    "    Res = sp.sparse.lil_matrix((len(a),1))\n",
-    "    x = sp.sparse.lil_matrix((n,1))  \n",
-    "\n",
-    "    for i in range(len(a)):\n",
-    "        r = a[i]*(u) \n",
-    "        normed_r = norm(r)\n",
-    "        Res[i] = normed_r \n",
-    "\n",
-    "        if Res[i] > tau:\n",
-    "            x = r + v \n",
-    "\n",
-    "    print(\"STARTING THE WHILE LOOP\\n\")\n",
-    "\n",
-    "    # take the maximum value of the sparse matrix Res\n",
-    "\n",
-    "\n",
-    "    while max(Res) > tau and mv < max_mv:\n",
-    "        u = Pt*u # should it be the same u of the beginning?\n",
-    "        mv += 1 \n",
-    "        print(\"mv = \", mv)\n",
-    "        print(\"max(Res) = \", max(Res))\n",
-    "\n",
-    "        for i in range(len(a)):\n",
-    "            if Res[i] >= tau: \n",
-    "                r = (a[i]**(mv+1))*(u)\n",
-    "                Res[i] = norm(r)\n",
-    "\n",
-    "                if Res[i] > tau:\n",
-    "                    x = r + x\n",
-    "\n",
-    "    print(\"\\nEND OF THE WHILE LOOP\\n\")\n",
-    "\n",
-    "    if mv == max_mv:\n",
-    "        print(\"The algorithm didn't converge in \", max_mv, \" iterations\")\n",
-    "    else:\n",
-    "        print(\"The algorithm converged in \", mv, \" iterations\")\n",
-    "\n",
-    "    print(\"\\nThe execution time is %s seconds\" % (time.time() - start_time))\n",
-    "       \n",
-    "    return mv, x, r  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mv, x, r = Algorithm1(Pt, v, tau, max_mv, a)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Algorithm 2 Arnoldi process"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def Algorithm2():\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Algorithm 4 shifted-GMRES method for PageRank with multiple damping factors: "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def Algorithm4(Pt, v, m , a: list, tau , maxit, x: list):\n",
-    "    iter = 1\n",
-    "   \n",
-    "    e1 = sp.sparse.lil_matrix((len(a),1))\n",
-    "    e1[0] = 1\n",
-    "\n",
-    "    # identity matrix sparse\n",
-    "    I = sp.sparse.lil_matrix((len(a),len(a)))\n",
-    "    for i in range(n):\n",
-    "        I[i,i] = 1\n",
-    "\n",
-    "    # create the page rank vector x\n",
-    "    x = sp.sparse.lil_matrix((n,1))\n",
-    "    for i in range(n):\n",
-    "        x[i] = 1/n\n",
-    "\n",
-    "    # create the vector r \n",
-    "    r = sp.sparse.lil_matrix((len(a),1))\n",
-    "    for i in range(len(a)):\n",
-    "        r[i] = ((1-a[i])/a[i]).dot(v) - ((1/a[i]).dot(I) - Pt).dot(x[i]).dot(e1)\n",
-    "    \n",
-    "    # create the vector Res\n",
-    "    Res = sp.sparse.lil_matrix((len(a),1))\n",
-    "    for i in range(len(a)):\n",
-    "        Res[i] = a[i] * np.linalg.norm(r[i])\n",
-    "\n",
-    "    mv = 0\n",
-    "\n",
-    "    while max(Res) > tau and mv < maxit:\n",
-    "        # find the k that satisfy the condition res[k] = max(res[i])\n",
-    "        k = 0\n",
-    "        for i in range(len(a)):\n",
-    "            if Res[i] == max(Res):\n",
-    "                k = i\n",
-    "                break\n",
-    "        \n",
-    "        # compute a new vector called delta where delta[i] = (res[i]*a[k])/(res[k]*a[i])\n",
-    "        delta = sp.sparse.lil_matrix((len(a),1))\n",
-    "        for i in range(len(a)) and i != k:\n",
-    "            delta[i] = (Res[i]*a[k])/(Res[k]*a[i])\n",
-    "\n",
-    "        # run algorithm 2\n",
-    "        # TO DO\n",
-    "\n",
-    "        # j depends on the algorithm 2\n",
-    "        mv = mv + j\n",
-    "\n",
-    "        \n",
-    "        # .............\n",
-    "\n",
-    "        \n",
-    "        "
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3.10.6 64-bit",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.6"
-  },
-  "orig_nbformat": 4,
-  "vscode": {
-   "interpreter": {
-    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
-   }
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}