You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
12 KiB
Plaintext
343 lines
12 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import numpy as np\n",
|
|
"import networkx as nx\n",
|
|
"import time\n",
|
|
"import math\n",
|
|
"import pandas as pd\n",
|
|
"import scipy as sp\n",
|
|
"import plotly.express as px\n",
|
|
"import plotly.graph_objs as go\n",
|
|
"from scipy.sparse import *\n",
|
|
"from scipy import linalg\n",
|
|
"from scipy.sparse.linalg import norm\n",
|
|
"from scipy.optimize import least_squares"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Arnoldi \n",
|
|
"\n",
|
|
"This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n",
|
|
"\n",
|
|
"Everything will be reorganized in the main.py file once everything is working."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def Arnoldi(A,v0,m):\n",
|
|
" v = v0\n",
|
|
" beta = np.linalg.norm(v)\n",
|
|
" v = v/beta\n",
|
|
" H = sp.sparse.lil_matrix((m+1,m)) \n",
|
|
" V = sp.sparse.lil_matrix((A.shape[0],m+1))\n",
|
|
" V[:,0] = v # each column of V is a vector v\n",
|
|
"\n",
|
|
" for j in range(m):\n",
|
|
" w = A @ v \n",
|
|
" for i in range(j):\n",
|
|
" H[i,j] = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
|
|
" w = w - H[i,j]*v \n",
|
|
" \n",
|
|
" H[j+1,j] = np.linalg.norm(w)\n",
|
|
"\n",
|
|
" if H[j+1,j] == 0:\n",
|
|
" # print(\"Arnoldi breakdown\")\n",
|
|
" m = j\n",
|
|
" v = 0\n",
|
|
" break\n",
|
|
" else:\n",
|
|
" if j < m-1:\n",
|
|
" v = w/H[j+1,j]\n",
|
|
" V[:,j+1] = v\n",
|
|
"\n",
|
|
" return V, H, v, beta, j "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Algorithm 4 testing\n",
|
|
"\n",
|
|
"This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n",
|
|
"\n",
|
|
"![](https://i.imgur.com/H92fru7.png)\n",
|
|
"\n",
|
|
"Line 14 is particularly tricky to understand, not working for now. Need to figure out how to solve that linear system. My idea was to do something like that\n",
|
|
"\n",
|
|
"![](https://i.imgur.com/uBCDYUa.jpeg)\n",
|
|
"\n",
|
|
"And use the `sp.sparse.linalg.spsolve` function to solve the linear system as $Ax=0$ where $A$ is $[\\bar H_m^i ~ | ~ z]$ but it returns an array of zeros. So the idea it's wrong"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# def Algo4_OLD(Pt, v, m, a: list, tau, maxit: int, x):\n",
|
|
" \n",
|
|
"# # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n",
|
|
"\n",
|
|
"# iter , mv = 1, 0\n",
|
|
"# res = np.zeros(len(a)) \n",
|
|
"\n",
|
|
"# # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n",
|
|
"\n",
|
|
"# H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n",
|
|
"# H_e1[0] = 1\n",
|
|
"\n",
|
|
"# V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n",
|
|
"# V_e1[0] = 1\n",
|
|
"\n",
|
|
"# s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n",
|
|
"# s_e1[0] = 1\n",
|
|
"\n",
|
|
"# def find_k(res): # function to find the index of the largest element in res\n",
|
|
"# k = 0\n",
|
|
"# for i in range(len(a)):\n",
|
|
"# if res[i] == max(res):\n",
|
|
"# k = i\n",
|
|
"# break\n",
|
|
"# return k\n",
|
|
"\n",
|
|
"# def compute_gamma(res, a, k): # function to compute gamma\n",
|
|
"# gamma = np.zeros(len(a))\n",
|
|
"# for i in range(len(a)):\n",
|
|
"# if i != k:\n",
|
|
"# gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
|
|
"# else:\n",
|
|
"# gamma[i] = 0\n",
|
|
"# return gamma\n",
|
|
"\n",
|
|
"# # compute the residual vector\n",
|
|
"\n",
|
|
"# # create a sp.sparse.eye matrix of size Pt\n",
|
|
"# I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n",
|
|
"# for i in range(len(a)):\n",
|
|
"# r = ((1-a[i])/a[i])*v - ((1/a[i])*I - Pt) @ x\n",
|
|
"# res[i] = a[i]*norm(r)\n",
|
|
"\n",
|
|
"\n",
|
|
"# while max(res) >= tau and iter <= maxit:\n",
|
|
"# k = find_k(res)\n",
|
|
"# gamma = compute_gamma(res, a, k)\n",
|
|
"# V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
|
|
"\n",
|
|
"# mv = mv + j\n",
|
|
"\n",
|
|
"# # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
|
|
"# y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
|
|
"\n",
|
|
"# # reshape y to be a column vector\n",
|
|
"# y = y.reshape(y.shape[0],1)\n",
|
|
"\n",
|
|
"# # update x \n",
|
|
"# x += V[:,0:y.shape[0]] @ y\n",
|
|
"\n",
|
|
"# # compute the residual vector\n",
|
|
"# res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n",
|
|
" \n",
|
|
"# # for i in range(len(a)) but not k\n",
|
|
"# for i in range(len(a)):\n",
|
|
"# if i != k and res[i] >= tau:\n",
|
|
"# # Compute H as described in the paper\n",
|
|
"# H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n",
|
|
"\n",
|
|
"# z = beta*H_e1 - H @ y # define z as in the paper (page 9)\n",
|
|
"# A_tmp = sp.sparse.hstack([H, z]) # stack H and z, as in the paper, to solve the linear system (?)\n",
|
|
"# A_tmp = A_tmp.tocsc() # Convert A to CSC format for sparse solver\n",
|
|
"\n",
|
|
"# # What should I put here? What does it mean in the paper the line 14 of the pseudocode?\n",
|
|
"# result = sp.sparse.linalg.spsolve(A_tmp, np.zeros(A_tmp.shape[0])) # if I solve this, I get a vector of zeros.\n",
|
|
"# print(result)\n",
|
|
" \n",
|
|
"# # I don't know if the code below is correct since I don't get how to solve the linear system above, so I'm unsure about what y and gamma should be. For now it's commented out.\n",
|
|
"\n",
|
|
"# # # update x\n",
|
|
"# # x += V[:,0:y.shape[0]] @ y\n",
|
|
"# # # update the residual vector\n",
|
|
"# # res[i] = (a[i]/a[k])*gamma[k]*res[k] \n",
|
|
"\n",
|
|
"# iter = iter + 1\n",
|
|
"\n",
|
|
"# return x, iter, mv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
|
|
" \n",
|
|
" # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n",
|
|
"\n",
|
|
" mv = 0\n",
|
|
"\n",
|
|
" # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n",
|
|
"\n",
|
|
" H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n",
|
|
" H_e1[0] = 1\n",
|
|
"\n",
|
|
" V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n",
|
|
" V_e1[0] = 1\n",
|
|
"\n",
|
|
" s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n",
|
|
" s_e1[0] = 1\n",
|
|
"\n",
|
|
" def find_k(res): # function to find the index of the largest element in res\n",
|
|
" k = 0\n",
|
|
" for i in range(len(a)):\n",
|
|
" if res[i] == max(res):\n",
|
|
" k = i\n",
|
|
" break\n",
|
|
" return k\n",
|
|
"\n",
|
|
" def compute_gamma(res, a, k): # function to compute gamma\n",
|
|
" gamma = np.zeros(len(a))\n",
|
|
" for i in range(len(a)):\n",
|
|
" if i != k:\n",
|
|
" gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
|
|
" else:\n",
|
|
" gamma[i] = 0\n",
|
|
" return gamma\n",
|
|
"\n",
|
|
" # compute the residual vector\n",
|
|
"\n",
|
|
" I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n",
|
|
" res = np.zeros(len(a))\n",
|
|
" for i in range(len(a)):\n",
|
|
" r = sp.sparse.linalg.spsolve(I - a[i]*Pt, v)\n",
|
|
" res[i] = a[i]*np.linalg.norm(r)\n",
|
|
"\n",
|
|
" for _ in range(maxit):\n",
|
|
" k = find_k(res)\n",
|
|
" gamma = compute_gamma(res, a, k)\n",
|
|
" \n",
|
|
" # Run Arnoldi\n",
|
|
" V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
|
|
"\n",
|
|
" mv = mv + j\n",
|
|
"\n",
|
|
" # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
|
|
" y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
|
|
" \n",
|
|
" # reshape y to be a column vector\n",
|
|
" y = y.reshape(y.shape[0],1)\n",
|
|
"\n",
|
|
" # update x \n",
|
|
" x_new = x + V[:,0:y.shape[0]] @ y\n",
|
|
"\n",
|
|
"\n",
|
|
" res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n",
|
|
" \n",
|
|
" for i in range(len(a)):\n",
|
|
" if i != k and res[i] >= tau:\n",
|
|
" # Compute H as described in the paper\n",
|
|
" H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n",
|
|
" z = beta*H_e1 - H @ y\n",
|
|
"\n",
|
|
" # TO DO: SOLVE THE LINEAR SYSTEM HERE\n",
|
|
"\n",
|
|
" # update x\n",
|
|
" x_new = x + V[:,0:y.shape[0]] @ y\n",
|
|
"\n",
|
|
" # update the residual vector\n",
|
|
" res[i] = (a[i]/a[k])*gamma[k]*res[k]\n",
|
|
" \n",
|
|
" err = np.absolute(x - x_new).sum()\n",
|
|
" x = x_new\n",
|
|
"\n",
|
|
" if err < tau:\n",
|
|
" raise StopIteration\n",
|
|
"\n",
|
|
" return x, iter, mv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Basic test case with random numbers to test the algorithm."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"n = 100\n",
|
|
"m = 110\n",
|
|
"tau = 1e-9\n",
|
|
"a = [0.85, 0.9, 0.95, 0.99]\n",
|
|
"\n",
|
|
"x = sp.sparse.lil_matrix((n,1))\n",
|
|
"x[0,0] = 1\n",
|
|
"\n",
|
|
"# generate a random graph\n",
|
|
"G = nx.gnp_random_graph(n, 1)\n",
|
|
"v = np.repeat(1.0 / 100, 100) # p is the personalization vector\n",
|
|
"v = v.reshape(v.shape[0],1)\n",
|
|
"\n",
|
|
"A = nx.to_scipy_sparse_array(G, dtype=float)\n",
|
|
"S = A.sum(axis=1) # S[i] is the sum of the weights of edges going out of node i\n",
|
|
"S[S != 0] = 1.0 / S[S != 0] # S[i] is now the sum of the weights of edges going into node i\n",
|
|
"Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"Algo4(Q, v, m, a, tau, 100, x)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3.10.6 64-bit",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.6"
|
|
},
|
|
"orig_nbformat": 4,
|
|
"vscode": {
|
|
"interpreter": {
|
|
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
|
}
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|