refinend the algo1, starting to test algo2

pull/1/head
Luca Lombardo 2 years ago
parent 990d59e3ab
commit aa7364c261

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -45,7 +45,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -77,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -94,7 +94,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -116,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -137,7 +137,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -148,7 +148,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -160,7 +160,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -183,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -193,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -242,116 +242,9 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"tau = 1e-05\n",
"The algorithm converged in 19 iterations\n",
"The algorithm took 12.630054473876953 seconds\n",
"\n",
"tau = 1e-06\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The algorithm converged in 56 iterations\n",
"The algorithm took 32.28381681442261 seconds\n",
"\n",
"tau = 1e-07\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The algorithm converged in 113 iterations\n",
"The algorithm took 75.9386818408966 seconds\n",
"\n",
"tau = 1e-08\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The algorithm converged in 275 iterations\n",
"The algorithm took 107.68565726280212 seconds\n",
"\n",
"tau = 1e-09\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"The algorithm converged in 454 iterations\n",
"The algorithm took 152.8943109512329 seconds\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_1733107/908708813.py:13: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n",
" df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)\n"
]
},
{
"ename": "OSError",
"evalue": "Cannot save file into a non-existent directory: '../data/algo1'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_1733107/908708813.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;31m# save the results in a csv file\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'../data/algo1/different_tau.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, decimal, errors, storage_options)\u001b[0m\n\u001b[1;32m 3549\u001b[0m )\n\u001b[1;32m 3550\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3551\u001b[0;31m return DataFrameRenderer(formatter).to_csv(\n\u001b[0m\u001b[1;32m 3552\u001b[0m \u001b[0mpath_or_buf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3553\u001b[0m \u001b[0mline_terminator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mline_terminator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/formats/format.py\u001b[0m in \u001b[0;36mto_csv\u001b[0;34m(self, path_or_buf, encoding, sep, columns, index_label, mode, compression, quoting, quotechar, line_terminator, chunksize, date_format, doublequote, escapechar, errors, storage_options)\u001b[0m\n\u001b[1;32m 1178\u001b[0m \u001b[0mformatter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfmt\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1179\u001b[0m )\n\u001b[0;32m-> 1180\u001b[0;31m \u001b[0mcsv_formatter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msave\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1181\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1182\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcreated_buffer\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/formats/csvs.py\u001b[0m in \u001b[0;36msave\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 239\u001b[0m \"\"\"\n\u001b[1;32m 240\u001b[0m \u001b[0;31m# apply compression and byte/text conversion\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 241\u001b[0;31m with get_handle(\n\u001b[0m\u001b[1;32m 242\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m 692\u001b[0m \u001b[0;31m# Only for write methods\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"r\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mis_path\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 694\u001b[0;31m \u001b[0mcheck_parent_directory\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 695\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 696\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcompression\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mcheck_parent_directory\u001b[0;34m(path)\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[0mparent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mPath\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparent\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_dir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 568\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mOSError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mrf\"Cannot save file into a non-existent directory: '{parent}'\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 569\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 570\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mOSError\u001b[0m: Cannot save file into a non-existent directory: '../data/algo1'"
]
}
],
"outputs": [],
"source": [
"# list of alpha values, from 0.85 to 0.99 with step 0.01\n",
"a = []\n",
@ -371,7 +264,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -388,7 +281,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@ -430,6 +323,57 @@
"\n",
"_In the right folder_"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def Arnoldi(A, v, m): # defined ad algorithm 2 in the paper\n",
" beta = norm(v)\n",
" print(\"A\")\n",
" v = v/beta\n",
" print(\"B\")\n",
" h = sp.sparse.lil_matrix((m,m))\n",
" print(\"C\")\n",
"\n",
" for j in range(1,m):\n",
" w = A.dot(v)\n",
" for i in range(1,j):\n",
" h[i,j] = v.T.dot(w)\n",
" w = w - h[i,j]*v\n",
"\n",
" h[j+1,j] = norm(w)\n",
"\n",
" if h[j+1,j] == 0:\n",
" m = j\n",
" v[m+1] = 0\n",
" break\n",
" else:\n",
" v = w**h[j+1,j]\n",
"\n",
" return v, h, m, beta, j"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"A = sp.sparse.rand(100,100, density=0.5, format='lil')\n",
"v = sp.sparse.rand(100,1, density=1, format='lil')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"v, h, m, beta, j = Arnoldi(A, v, 100)"
]
}
],
"metadata": {

@ -0,0 +1,221 @@
#!/usr/bin/env python3
# Importing the libraries
import os
import wget
import gzip
import time
import warnings
import scipy as sp
import numpy as np
import pandas as pd
import networkx as nx
import plotly.graph_objs as go
from scipy.sparse import *
from scipy.sparse.linalg import norm
from os.path import exists
warnings.simplefilter(action='ignore', category=FutureWarning)
# some stupid pandas function that doesn't work
class utilities:
# Importing the dataset
def load_data():
# Loading the dataset
dataset = int(input("Choose the dataset:\n [1] web-Stanford\n [2] web-BerkStan: \nEnter an option: "))
if dataset == 1:
if exists('../data/web-Stanford.txt'):
dataset = '../data/web-Stanford.txt'
else:
print("\nThe file doesn't exist, download it from https://snap.stanford.edu/data/web-Stanford.html")
# if there is no folder data, create it
if not exists('../data'):
os.makedirs('../data')
# Downloading the dataset
url = 'https://snap.stanford.edu/data/web-Stanford.txt.gz'
wget.download(url, '../data/web-Stanford.txt.gz')
# Unzipping the dataset
with gzip.open('../data/web-Stanford.txt.gz', 'rb') as f_in:
with open('../data/web-Stanford.txt', 'wb') as f_out:
f_out.write(f_in.read())
dataset = '../data/web-Stanford.txt'
print("\nDataset downloaded\n")
elif dataset == 2:
if exists('../data/web-BerkStan.txt'):
dataset = '../data/web-BerkStan.txt'
else:
print("\nThe file doesn't exist, download it from https://snap.stanford.edu/data/web-BerkStan.html")
# if there is no folder data, create it
if not exists('../data'):
os.makedirs('../data')
# Downloading the dataset
url = 'https://snap.stanford.edu/data/web-BerkStan.txt.gz'
wget.download(url, '../data/web-BerkStan.txt.gz')
# Unzipping the dataset
with gzip.open('../data/web-BerkStan.txt.gz', 'rb') as f_in:
with open('../data/web-BerkStan.txt', 'wb') as f_out:
f_out.write(f_in.read())
dataset = '../data/web-BerkStan.txt'
print("\nDataset downloaded\n")
return dataset
# Creating the graph from the dataset
def create_graph(dataset):
print("\nCreating the graph...")
G = nx.read_edgelist(dataset, create_using=nx.DiGraph(), nodetype=int)
n = G.number_of_nodes()
print("Graph created based on the dataset\n")
return G, n
# # Creating the transition probability matrix
# The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).
def create_matrix(G):
print("Creating the transition probability matrix...")
P = sp.sparse.lil_matrix((n,n))
for i in G.nodes():
for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors
P[i-1,j-1] = 1/len(G[i])
print("Transition probability matrix created\n")
return P
# The vector is filled with d(i) = 1 if the i row of the matrix P is filled with zeros, other wise is 0
def dangling_nodes(P,n):
print("Creating the list of dangling nodes...")
d = sp.sparse.lil_matrix((n,1))
for i in range(n):
if P[i].sum() == 0:
d[i] = 1
print("List of dangling nodes created\n")
return d
def probability_vector(n):
print("Creating the probability vector...")
v = sp.sparse.lil_matrix((n,1))
for i in range(n):
v[i] = 1/n
print("Probability vector created\n")
return v
def transition_matrix(P, v, d):
print("Creating the transition matrix...")
Pt = P + v.dot(d.T)
print("Transition matrix created\n")
return Pt
def alpha():
a = []
for i in range(85,100):
a.append(i/100)
return a
class Plotting:
def tau_over_iterations(dataframe):
dataframe = df
x = df['tau'][::-1].tolist()
y = df['iterations'].tolist()
fig = go.Figure(data=go.Scatter(x=x, y=y, mode='lines+markers'),
layout=go.Layout(title='Iterations needed for the convergence', xaxis_title='tau', yaxis_title='iterations'))
# save the figure as a html file
fig.write_html("../data/results/algo1/taus_over_iterations.html")
print("The plot has been saved in the folder data/results/algo1")
def tau_over_time(df):
x1 = df['tau'][::-1].tolist()
y1 = df['time'].tolist()
fig = go.Figure(data=go.Scatter(x=x1, y=y1, mode='lines+markers'),
layout=go.Layout(title='Time needed for the convergence', xaxis_title='tau', yaxis_title='time (seconds)'))
# save the plot in a html file
fig.write_html("../data/results/algo1/taus_over_time.html")
print("The plot has been saved in the folder data/results/algo1")
class Algorithms:
def algo1(Pt, v, tau, max_mv, a: list):
start_time = time.time()
print("STARTING ALGORITHM 1...")
u = Pt.dot(v) - v
mv = 1 # number of iteration
r = sp.sparse.lil_matrix((n,1))
Res = sp.sparse.lil_matrix((len(a),1))
x = sp.sparse.lil_matrix((n,1))
for i in range(len(a)):
r = a[i]*(u)
normed_r = norm(r)
Res[i] = normed_r
if Res[i] > tau:
x = r + v
while max(Res) > tau and mv < max_mv:
u = Pt*u # should it be the same u of the beginning?
mv += 1
for i in range(len(a)):
if Res[i] >= tau:
r = (a[i]**(mv+1))*(u)
Res[i] = norm(r)
if Res[i] > tau:
x = r + x
if mv == max_mv:
print("The algorithm didn't converge in ", max_mv, " iterations")
else:
print("The algorithm converged in ", mv, " iterations")
total_time = time.time() - start_time
total_time = round(total_time, 2)
print("The algorithm took ", total_time, " seconds to run\n")
return mv, x, r, total_time
# pandas dataframe to store the results
df = pd.DataFrame(columns=['alpha', 'iterations', 'tau', 'time'])
# Main
if __name__ == "__main__":
dataset = utilities.load_data()
# maximum number of iterations, asked to the user
max_mv = int(input("Insert the maximum number of iterations: "))
G, n = utilities.create_graph(dataset)
P = utilities.create_matrix(G)
d = utilities.dangling_nodes(P,n)
v = utilities.probability_vector(n)
Pt = utilities.transition_matrix(P, v, d)
a = utilities.alpha()
# run the algorithm for different values of tau from 10^-5 to 10^-9 with step 10^-1
for i in range(5,10):
tau = 10**(-i)
print("\ntau = ", tau)
mv, x, r, total_time = Algorithms.algo1(Pt, v, tau, max_mv, a)
# store the results in the dataframe
df = df.append({'alpha': a, 'iterations': mv, 'tau': tau, 'time': total_time}, ignore_index=True)
# save the results in a csv file
df.to_csv('../data/results/algo1/different_tau.csv', index=False)
# plot the results
Plotting.tau_over_iterations(df)
Plotting.tau_over_time(df)
# print in the terminal the columns of the dataframe iterations, tau and time
print("\n", df[['iterations', 'tau', 'time']])
Loading…
Cancel
Save