created a testing notebook for each algo

pull/1/head
Luca Lombardo 2 years ago
parent 1c3c69b775
commit da82dcfa14

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -20,6 +20,13 @@
"from scipy.optimize import least_squares"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# ALGORITHM 1 TESTING\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -35,14 +42,14 @@
"source": [
"G1 = nx.read_edgelist('../data/web-Stanford.txt', create_using=nx.DiGraph(), nodetype=int)\n",
"\n",
"# G2 = nx.read_edgelist('../data/web-BerkStan.txt', create_using=nx.DiGraph(), nodetype=int)"
"G2 = nx.read_edgelist('../data/web-BerkStan.txt', create_using=nx.DiGraph(), nodetype=int)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Creating the transition probability matrix"
"Creating the transition probability matrix"
]
},
{
@ -180,7 +187,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Algorithm 1 Shifted-Power method for PageRank with multiple damping factors:"
"Algorithm 1 Shifted-Power method for PageRank with multiple damping factors:"
]
},
{
@ -199,8 +206,6 @@
"metadata": {},
"outputs": [],
"source": [
"# this should return mv (the number of iteration needed for the convergence), and two vector called x and r. Where x is the vector of the pagerank and r is the residual vector\n",
"\n",
"def Algorithm1(Pt, v, tau, max_mv, a: list):\n",
" \n",
" start_time = time.time()\n",
@ -220,7 +225,7 @@
" x = r + v \n",
"\n",
" while max(Res) > tau and mv < max_mv:\n",
" u = Pt*u # should it be the same u of the beginning?\n",
" u = Pt*u \n",
" mv += 1 \n",
"\n",
" for i in range(len(a)):\n",
@ -242,6 +247,13 @@
" return mv, x, r, total_time "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Testing setup"
]
},
{
"cell_type": "code",
"execution_count": null,
@ -260,8 +272,7 @@
" tau = 10**(-i)\n",
" print(\"\\ntau = \", tau)\n",
" mv, x, r, total_time = Algorithm1(Pt, v, tau, max_mv, a)\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True) \n",
"\n"
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True) "
]
},
{
@ -278,7 +289,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Plotting the results of the algorithm for different values of tau, and fixed alpha"
"Plotting the results of the algorithm for different values of tau, and fixed alpha"
]
},
{
@ -288,170 +299,26 @@
"outputs": [],
"source": [
"x = df['tau'][::-1].tolist()\n",
"y = df['iterations'].tolist()\n",
"y = df['products m-v'].tolist()\n",
"\n",
"fig1 = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'), \n",
" layout=go.Layout(title='Iterations needed for the convergence', xaxis_title='tau', yaxis_title='iterations'))\n",
"fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'),\n",
" layout=go.Layout(title='products needed for the convergence', xaxis_title='tau', yaxis_title='products matrix vector'))\n",
"\n",
"# save the plot in a html file\n",
"fig1.write_html(\"../data/results/algo1/taus_over_iterations.html\")\n",
"# save the figure as a html file\n",
"fig.write_html(\"../data/results/algo1/taus_over_prods.html\")\n",
"print(\"The plot has been saved in the folder data/results/algo1\")\n",
"\n",
"##### RESULTS OVER TIME #####\n",
"\n",
"x1 = df['tau'][::-1].tolist()\n",
"y1 = df['time'].tolist()\n",
"\n",
"fig2 = go.Figure(data=go.Scatter(x=x1, y=y1, mode='lines+markers'),\n",
"fig = go.Figure(data=go.Scatter(x=x1, y=y1, mode='lines+markers'),\n",
" layout=go.Layout(title='Time needed for the convergence', xaxis_title='tau', yaxis_title='time (seconds)'))\n",
"\n",
"# save the plot in a html file\n",
"fig2.write_html(\"../data/results/algo1/taus_over_time.html\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To view the graph just use the command\n",
"\n",
"```bash\n",
"firefox taus_over_iterations.html \n",
"```\n",
"or \n",
"\n",
"```bash\n",
"firefox taus_over_time.html\n",
"```\n",
"\n",
"_In the right folder_"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"def Arnoldi(A, v, m): # defined ad algorithm 2 in the paper\n",
" beta = norm(v)\n",
" print(\"A\")\n",
" v = v/beta\n",
" print(\"B\")\n",
" h = sp.sparse.lil_matrix((m,m))\n",
" print(\"C\")\n",
"\n",
" for j in range(m):\n",
" w = A.dot(v)\n",
" print(\"D\")\n",
" for i in range(j):\n",
" h[i,j] = v.T.dot(w)\n",
" print(\"E\")\n",
" w = w - h[i,j]*v[i]\n",
" print(\"F\")\n",
"\n",
" h[j+1,j] = norm(w)\n",
" print(\"G\")\n",
"\n",
" if h[j+1,j] == 0:\n",
" print(\"The algorithm didn't converge\")\n",
" m = j\n",
" v[m+1] = 0\n",
" break\n",
" else:\n",
" print(\"H\")\n",
" v[j+1] = w**h[j+1,j]\n",
" print(\"I\")\n",
"\n",
" return v, h, m, beta, j"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"A = sp.sparse.rand(100,100, density=0.5, format='lil')\n",
"v = sp.sparse.rand(100,1, density=1, format='lil')\n",
"m = 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"v, h, m, beta, j = Arnoldi(A, v, m)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
" \n",
" iter = 1\n",
" mv = 0\n",
" e1 = sp.sparse.lil_matrix((1,n))\n",
" e1[0,0] = 1\n",
" x = sp.sparse.lil_matrix((len(a),1))\n",
" I = sp.sparse.eye(n, n, format='lil')\n",
" res = sp.sparse.lil_matrix((len(a),1))\n",
" r = sp.sparse.lil_matrix((n,1))\n",
" y = sp.sparse.lil_matrix((n,1))\n",
"\n",
" for i in range(len(a)):\n",
" r = ((1-a[i])**a[i])*v - ((1**a[i])*I - Pt).dot(x)\n",
" res[i] = a[i]*norm(r)\n",
"\n",
" def Find_k(res, maxit):\n",
" k = 0\n",
" for i in range(len(a)):\n",
" if res[i] == max(res):\n",
" k = i\n",
" break\n",
" return k\n",
"\n",
" def Find_gamma(res, a, k):\n",
" gamma = sp.sparse.lil_matrix((len(a),1))\n",
" for i in range(len(a)):\n",
" if i != k:\n",
" gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
" else:\n",
" gamma[i] = 0\n",
" return gamma\n",
"\n",
"\n",
" while max(res) > tau and iter < maxit:\n",
" k = Find_k(res, maxit)\n",
" gamma = Find_gamma(res, a, k)\n",
" v, h, m, beta, j = Arnoldi((1**a[k])*I - Pt, r, m)\n",
" Hbar = sp.sparse.lil_matrix((m+1,m))\n",
" Hbar[0:m,0:m] = h\n",
" Hbar[m+1,0:m] = e1\n",
"\n",
" mv += j\n",
"\n",
" # solve the least squares problem for Hbar*x = beta*e1\n",
" y = sp.sparse.linalg.least_squares(Hbar, beta*e1)\n",
" res[k] = a[k]*norm(beta*e1 - Hbar*y)\n",
" x[k] = x[k] + v*y[k]\n",
"\n",
" for i in range(len(a)):\n",
" if i != k:\n",
" if res[i] >= tau:\n",
" Hbar[i] = Hbar[k] + ((1-a[i])/a[i] - (1-a[k])/a[k])*I\n",
" z = beta*e1 - Hbar*y\n",
" y = sp.sparse.linalg.solve(Hbar, gamma*beta*e1)\n",
" x = x + v*y\n",
" res[i] = a[i]**a[k]*gamma[i]*res[k]\n",
" \n",
" iter += 1\n",
" \n",
" return x, res, mv\n"
"fig.write_html(\"../data/results/algo1/taus_over_time.html\")\n",
"print(\"The plot has been saved in the folder data/results/algo1\")\n"
]
}
],

@ -0,0 +1,124 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import networkx as nx\n",
"import time\n",
"import math\n",
"import pandas as pd\n",
"import scipy as sp\n",
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"from scipy.sparse import *\n",
"from scipy import linalg\n",
"from scipy.sparse.linalg import norm\n",
"from scipy.optimize import least_squares"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Algorithm 2 testing"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def Arnoldi(A, v, m): # defined ad algorithm 2 in the paper\n",
" beta = norm(v)\n",
" print(\"A\")\n",
" v = v/beta\n",
" print(\"B\")\n",
" h = sp.sparse.lil_matrix((m,m))\n",
" print(\"C\")\n",
"\n",
" for j in range(m):\n",
" w = A.dot(v)\n",
" print(\"D\")\n",
" for i in range(j):\n",
" h[i,j] = v.T.dot(w)\n",
" print(\"E\")\n",
" w = w - h[i,j]*v[i]\n",
" print(\"F\")\n",
"\n",
" h[j+1,j] = norm(w)\n",
" print(\"G\")\n",
"\n",
" if h[j+1,j] == 0:\n",
" print(\"The algorithm didn't converge\")\n",
" m = j\n",
" v[m+1] = 0\n",
" break\n",
" else:\n",
" print(\"H\")\n",
" v[j+1] = w**h[j+1,j] # THIS IS WRONG, I DON'T KNOW HOW TO FIX IT. ERROR \" matrix is not square\"\n",
" print(\"I\")\n",
"\n",
" return v, h, m, beta, j"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Creating a small test case"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"A = sp.sparse.rand(100,100, density=0.5, format='lil')\n",
"v = sp.sparse.rand(100,1, density=0.5, format='lil')\n",
"m = 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"v, h, m, beta, j = Arnoldi(A, v, m)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -0,0 +1,174 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import networkx as nx\n",
"import time\n",
"import math\n",
"import pandas as pd\n",
"import scipy as sp\n",
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"from scipy.sparse import *\n",
"from scipy import linalg\n",
"from scipy.sparse.linalg import norm\n",
"from scipy.optimize import least_squares"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This function is needed in the algorithm. Note that this is a NON-functioning version, for now it's just a place holder. When algo2_testing will be completed, this will be updated and I'll work on algo4_testing."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def Arnoldi(A, v, m): # defined ad algorithm 2 in the paper\n",
" beta = norm(v)\n",
" print(\"A\")\n",
" v = v/beta\n",
" print(\"B\")\n",
" h = sp.sparse.lil_matrix((m,m))\n",
" print(\"C\")\n",
"\n",
" for j in range(m):\n",
" w = A.dot(v)\n",
" print(\"D\")\n",
" for i in range(j):\n",
" h[i,j] = v.T.dot(w)\n",
" print(\"E\")\n",
" w = w - h[i,j]*v[i]\n",
" print(\"F\")\n",
"\n",
" h[j+1,j] = norm(w)\n",
" print(\"G\")\n",
"\n",
" if h[j+1,j] == 0:\n",
" print(\"The algorithm didn't converge\")\n",
" m = j\n",
" v[m+1] = 0\n",
" break\n",
" else:\n",
" print(\"H\")\n",
" v[j+1] = w**h[j+1,j] # THIS IS WRONG, I DON'T KNOW HOW TO FIX IT. ERROR \" matrix is not square\"\n",
" print(\"I\")\n",
"\n",
" return v, h, m, beta, j"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Algorithm 4 testing\n",
"\n",
"Still a complete mess. Conceptually and technically wrong. I'll work on it when algo2_testing will be completed."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
" \n",
" iter = 1\n",
" mv = 0\n",
" e1 = sp.sparse.lil_matrix((1,n))\n",
" e1[0,0] = 1\n",
" x = sp.sparse.lil_matrix((len(a),1))\n",
" I = sp.sparse.eye(n, n, format='lil')\n",
" res = sp.sparse.lil_matrix((len(a),1))\n",
" r = sp.sparse.lil_matrix((n,1))\n",
" y = sp.sparse.lil_matrix((n,1))\n",
"\n",
" for i in range(len(a)): # I don't think that this is what was intended in the pseudocode... \n",
" r = ((1-a[i])**a[i])*v - ((1**a[i])*I - Pt).dot(x)\n",
" res[i] = a[i]*norm(r)\n",
"\n",
" def Find_k(res, maxit):\n",
" k = 0\n",
" for i in range(len(a)):\n",
" if res[i] == max(res):\n",
" k = i\n",
" break\n",
" return k\n",
"\n",
" def Find_gamma(res, a, k):\n",
" gamma = sp.sparse.lil_matrix((len(a),1))\n",
" for i in range(len(a)):\n",
" if i != k:\n",
" gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
" else:\n",
" gamma[i] = 0\n",
" return gamma\n",
"\n",
"\n",
" while max(res) > tau and iter < maxit:\n",
" k = Find_k(res, maxit)\n",
" gamma = Find_gamma(res, a, k)\n",
" v, h, m, beta, j = Arnoldi((1**a[k])*I - Pt, r, m)\n",
" Hbar = sp.sparse.lil_matrix((m+1,m))\n",
" Hbar[0:m,0:m] = h\n",
" Hbar[m+1,0:m] = e1\n",
"\n",
" mv += j\n",
"\n",
" y = sp.sparse.linalg.least_squares(Hbar, beta*e1)\n",
" res[k] = a[k]*norm(beta*e1 - Hbar*y)\n",
" x[k] = x[k] + v*y[k]\n",
"\n",
" for i in range(len(a)):\n",
" if i != k:\n",
" if res[i] >= tau:\n",
" Hbar[i] = Hbar[k] + ((1-a[i])/a[i] - (1-a[k])/a[k])*I\n",
" z = beta*e1 - Hbar*y\n",
" y = sp.sparse.linalg.solve(Hbar, gamma*beta*e1)\n",
" x = x + v*y\n",
" res[i] = a[i]**a[k]*gamma[i]*res[k]\n",
" \n",
" iter += 1\n",
" \n",
" return x, res, mv\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}

@ -22,7 +22,7 @@ class Utilities:
# Importing the dataset
def load_data():
# Loading the dataset
dataset = int(input("Choose the dataset:\n [1] web-Stanford\n [2] web-BerkStan: \nEnter an option: "))
dataset = int(input("Choose the dataset:\n [1] web-Stanford (use this one for now)\n [2] web-BerkStan: \nEnter an option: "))
if dataset == 1:
if exists('../data/web-Stanford.txt'):
@ -82,7 +82,6 @@ class Utilities:
print("Graph created based on the dataset\n")
return G, n
# # Creating the transition probability matrix
# The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).
def create_matrix(G):
print("Creating the transition probability matrix...")
@ -103,6 +102,7 @@ class Utilities:
print("List of dangling nodes created\n")
return d
# For now it is set to equally distribute the probability to all the nodes
def probability_vector(n):
print("Creating the probability vector...")
v = sp.sparse.lil_matrix((n,1))
@ -117,22 +117,24 @@ class Utilities:
print("Transition matrix created\n")
return Pt
# it can vary from 0 to 1, the higher the value the more the probability to jump to a random page
def alpha():
a = []
for i in range(85,100):
a.append(i/100)
return a
# Class for plotting the results obtained. For now it can only the first algorithm. To be updated once all the algorithms are implemented and the test cases are well defined
class Plotting:
def tau_over_iterations(df):
x = df['tau'][::-1].tolist()
y = df['iterations'].tolist()
y = df['products m-v'].tolist()
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'),
layout=go.Layout(title='Iterations needed for the convergence', xaxis_title='tau', yaxis_title='iterations'))
layout=go.Layout(title='products needed for the convergence', xaxis_title='tau', yaxis_title='products matrix vector'))
# save the figure as a html file
fig.write_html("../data/results/algo1/taus_over_iterations.html")
fig.write_html("../data/results/algo1/taus_over_prods.html")
print("The plot has been saved in the folder data/results/algo1")
def tau_over_time(df):
@ -147,7 +149,7 @@ class Plotting:
print("The plot has been saved in the folder data/results/algo1")
class Algorithms:
# Power method adapted to the PageRank problem with different damping factors. Referred as Algorithm 1 in the paper
def algo1(Pt, v, tau, max_mv, a: list):
start_time = time.time()
@ -190,7 +192,8 @@ class Algorithms:
return mv, x, r, total_time
def Arnoldi(A, v, m): # defined ad algorithm 2 in the paper
# Refers to Algorithm 2 in the paper, it's needed to implement the algorithm 4. It doesn't work yet. Refer to the file testing.ipynb for more details. This function down here is just a place holder for now
def Arnoldi(A, v, m):
beta = norm(v)
v = v/beta
h = sp.sparse.lil_matrix((m,m))
@ -212,7 +215,7 @@ class Algorithms:
return v, h, m, beta, j
# pandas dataframe to store the results
df = pd.DataFrame(columns=['alpha', 'iterations', 'tau', 'time'])
df = pd.DataFrame(columns=['alpha', 'products m-v', 'tau', 'time'])
# Main
if __name__ == "__main__":
@ -234,7 +237,7 @@ if __name__ == "__main__":
mv, x, r, total_time = Algorithms.algo1(Pt, v, tau, max_mv, a)
# store the results in the dataframe
df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)
df = df.append({'alpha': a, 'products m-v': mv, 'tau': tau, 'time': total_time}, ignore_index=True)
# save the results in a csv file
df.to_csv('../data/results/algo1/different_tau.csv', index=False)
@ -245,4 +248,4 @@ if __name__ == "__main__":
# print in the terminal the columns of the dataframe iterations, tau and time
print("Computations done. Here are the results:")
print("\n", df[['iterations', 'tau', 'time']])
print("\n", df[['products m-v', 'tau', 'time']])

Loading…
Cancel
Save