refinend the algo1, starting to test algo2
parent
990d59e3ab
commit
aa7364c261
@ -0,0 +1,221 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
# Importing the libraries
|
||||||
|
import os
|
||||||
|
import wget
|
||||||
|
import gzip
|
||||||
|
import time
|
||||||
|
import warnings
|
||||||
|
import scipy as sp
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import networkx as nx
|
||||||
|
import plotly.graph_objs as go
|
||||||
|
from scipy.sparse import *
|
||||||
|
from scipy.sparse.linalg import norm
|
||||||
|
from os.path import exists
|
||||||
|
|
||||||
|
|
||||||
|
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||||
|
# some stupid pandas function that doesn't work
|
||||||
|
|
||||||
|
class utilities:
|
||||||
|
# Importing the dataset
|
||||||
|
def load_data():
|
||||||
|
# Loading the dataset
|
||||||
|
dataset = int(input("Choose the dataset:\n [1] web-Stanford\n [2] web-BerkStan: \nEnter an option: "))
|
||||||
|
|
||||||
|
if dataset == 1:
|
||||||
|
if exists('../data/web-Stanford.txt'):
|
||||||
|
dataset = '../data/web-Stanford.txt'
|
||||||
|
else:
|
||||||
|
print("\nThe file doesn't exist, download it from https://snap.stanford.edu/data/web-Stanford.html")
|
||||||
|
|
||||||
|
# if there is no folder data, create it
|
||||||
|
if not exists('../data'):
|
||||||
|
os.makedirs('../data')
|
||||||
|
|
||||||
|
# Downloading the dataset
|
||||||
|
url = 'https://snap.stanford.edu/data/web-Stanford.txt.gz'
|
||||||
|
wget.download(url, '../data/web-Stanford.txt.gz')
|
||||||
|
# Unzipping the dataset
|
||||||
|
with gzip.open('../data/web-Stanford.txt.gz', 'rb') as f_in:
|
||||||
|
with open('../data/web-Stanford.txt', 'wb') as f_out:
|
||||||
|
f_out.write(f_in.read())
|
||||||
|
|
||||||
|
dataset = '../data/web-Stanford.txt'
|
||||||
|
print("\nDataset downloaded\n")
|
||||||
|
|
||||||
|
elif dataset == 2:
|
||||||
|
if exists('../data/web-BerkStan.txt'):
|
||||||
|
dataset = '../data/web-BerkStan.txt'
|
||||||
|
else:
|
||||||
|
print("\nThe file doesn't exist, download it from https://snap.stanford.edu/data/web-BerkStan.html")
|
||||||
|
|
||||||
|
# if there is no folder data, create it
|
||||||
|
if not exists('../data'):
|
||||||
|
os.makedirs('../data')
|
||||||
|
|
||||||
|
# Downloading the dataset
|
||||||
|
url = 'https://snap.stanford.edu/data/web-BerkStan.txt.gz'
|
||||||
|
wget.download(url, '../data/web-BerkStan.txt.gz')
|
||||||
|
# Unzipping the dataset
|
||||||
|
with gzip.open('../data/web-BerkStan.txt.gz', 'rb') as f_in:
|
||||||
|
with open('../data/web-BerkStan.txt', 'wb') as f_out:
|
||||||
|
f_out.write(f_in.read())
|
||||||
|
|
||||||
|
dataset = '../data/web-BerkStan.txt'
|
||||||
|
print("\nDataset downloaded\n")
|
||||||
|
|
||||||
|
return dataset
|
||||||
|
|
||||||
|
# Creating the graph from the dataset
|
||||||
|
def create_graph(dataset):
|
||||||
|
print("\nCreating the graph...")
|
||||||
|
G = nx.read_edgelist(dataset, create_using=nx.DiGraph(), nodetype=int)
|
||||||
|
n = G.number_of_nodes()
|
||||||
|
print("Graph created based on the dataset\n")
|
||||||
|
return G, n
|
||||||
|
|
||||||
|
# # Creating the transition probability matrix
|
||||||
|
# The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).
|
||||||
|
def create_matrix(G):
|
||||||
|
print("Creating the transition probability matrix...")
|
||||||
|
P = sp.sparse.lil_matrix((n,n))
|
||||||
|
for i in G.nodes():
|
||||||
|
for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors
|
||||||
|
P[i-1,j-1] = 1/len(G[i])
|
||||||
|
print("Transition probability matrix created\n")
|
||||||
|
return P
|
||||||
|
|
||||||
|
# The vector is filled with d(i) = 1 if the i row of the matrix P is filled with zeros, other wise is 0
|
||||||
|
def dangling_nodes(P,n):
|
||||||
|
print("Creating the list of dangling nodes...")
|
||||||
|
d = sp.sparse.lil_matrix((n,1))
|
||||||
|
for i in range(n):
|
||||||
|
if P[i].sum() == 0:
|
||||||
|
d[i] = 1
|
||||||
|
print("List of dangling nodes created\n")
|
||||||
|
return d
|
||||||
|
|
||||||
|
def probability_vector(n):
|
||||||
|
print("Creating the probability vector...")
|
||||||
|
v = sp.sparse.lil_matrix((n,1))
|
||||||
|
for i in range(n):
|
||||||
|
v[i] = 1/n
|
||||||
|
print("Probability vector created\n")
|
||||||
|
return v
|
||||||
|
|
||||||
|
def transition_matrix(P, v, d):
|
||||||
|
print("Creating the transition matrix...")
|
||||||
|
Pt = P + v.dot(d.T)
|
||||||
|
print("Transition matrix created\n")
|
||||||
|
return Pt
|
||||||
|
|
||||||
|
def alpha():
|
||||||
|
a = []
|
||||||
|
for i in range(85,100):
|
||||||
|
a.append(i/100)
|
||||||
|
return a
|
||||||
|
|
||||||
|
class Plotting:
|
||||||
|
def tau_over_iterations(dataframe):
|
||||||
|
dataframe = df
|
||||||
|
x = df['tau'][::-1].tolist()
|
||||||
|
y = df['iterations'].tolist()
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'),
|
||||||
|
layout=go.Layout(title='Iterations needed for the convergence', xaxis_title='tau', yaxis_title='iterations'))
|
||||||
|
|
||||||
|
# save the figure as a html file
|
||||||
|
fig.write_html("../data/results/algo1/taus_over_iterations.html")
|
||||||
|
print("The plot has been saved in the folder data/results/algo1")
|
||||||
|
|
||||||
|
def tau_over_time(df):
|
||||||
|
x1 = df['tau'][::-1].tolist()
|
||||||
|
y1 = df['time'].tolist()
|
||||||
|
|
||||||
|
fig = go.Figure(data=go.Scatter(x=x1, y=y1, mode='lines+markers'),
|
||||||
|
layout=go.Layout(title='Time needed for the convergence', xaxis_title='tau', yaxis_title='time (seconds)'))
|
||||||
|
|
||||||
|
# save the plot in a html file
|
||||||
|
fig.write_html("../data/results/algo1/taus_over_time.html")
|
||||||
|
print("The plot has been saved in the folder data/results/algo1")
|
||||||
|
|
||||||
|
class Algorithms:
|
||||||
|
def algo1(Pt, v, tau, max_mv, a: list):
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
print("STARTING ALGORITHM 1...")
|
||||||
|
u = Pt.dot(v) - v
|
||||||
|
mv = 1 # number of iteration
|
||||||
|
r = sp.sparse.lil_matrix((n,1))
|
||||||
|
Res = sp.sparse.lil_matrix((len(a),1))
|
||||||
|
x = sp.sparse.lil_matrix((n,1))
|
||||||
|
|
||||||
|
for i in range(len(a)):
|
||||||
|
r = a[i]*(u)
|
||||||
|
normed_r = norm(r)
|
||||||
|
Res[i] = normed_r
|
||||||
|
|
||||||
|
if Res[i] > tau:
|
||||||
|
x = r + v
|
||||||
|
|
||||||
|
while max(Res) > tau and mv < max_mv:
|
||||||
|
u = Pt*u # should it be the same u of the beginning?
|
||||||
|
mv += 1
|
||||||
|
|
||||||
|
for i in range(len(a)):
|
||||||
|
if Res[i] >= tau:
|
||||||
|
r = (a[i]**(mv+1))*(u)
|
||||||
|
Res[i] = norm(r)
|
||||||
|
|
||||||
|
if Res[i] > tau:
|
||||||
|
x = r + x
|
||||||
|
|
||||||
|
if mv == max_mv:
|
||||||
|
print("The algorithm didn't converge in ", max_mv, " iterations")
|
||||||
|
else:
|
||||||
|
print("The algorithm converged in ", mv, " iterations")
|
||||||
|
|
||||||
|
total_time = time.time() - start_time
|
||||||
|
total_time = round(total_time, 2)
|
||||||
|
|
||||||
|
print("The algorithm took ", total_time, " seconds to run\n")
|
||||||
|
|
||||||
|
return mv, x, r, total_time
|
||||||
|
|
||||||
|
# pandas dataframe to store the results
|
||||||
|
df = pd.DataFrame(columns=['alpha', 'iterations', 'tau', 'time'])
|
||||||
|
|
||||||
|
# Main
|
||||||
|
if __name__ == "__main__":
|
||||||
|
dataset = utilities.load_data()
|
||||||
|
# maximum number of iterations, asked to the user
|
||||||
|
max_mv = int(input("Insert the maximum number of iterations: "))
|
||||||
|
|
||||||
|
G, n = utilities.create_graph(dataset)
|
||||||
|
P = utilities.create_matrix(G)
|
||||||
|
d = utilities.dangling_nodes(P,n)
|
||||||
|
v = utilities.probability_vector(n)
|
||||||
|
Pt = utilities.transition_matrix(P, v, d)
|
||||||
|
a = utilities.alpha()
|
||||||
|
|
||||||
|
# run the algorithm for different values of tau from 10^-5 to 10^-9 with step 10^-1
|
||||||
|
for i in range(5,10):
|
||||||
|
tau = 10**(-i)
|
||||||
|
print("\ntau = ", tau)
|
||||||
|
mv, x, r, total_time = Algorithms.algo1(Pt, v, tau, max_mv, a)
|
||||||
|
|
||||||
|
# store the results in the dataframe
|
||||||
|
df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)
|
||||||
|
|
||||||
|
# save the results in a csv file
|
||||||
|
df.to_csv('../data/results/algo1/different_tau.csv', index=False)
|
||||||
|
|
||||||
|
# plot the results
|
||||||
|
Plotting.tau_over_iterations(df)
|
||||||
|
Plotting.tau_over_time(df)
|
||||||
|
|
||||||
|
# print in the terminal the columns of the dataframe iterations, tau and time
|
||||||
|
print("\n", df[['iterations', 'tau', 'time']])
|
Loading…
Reference in New Issue