diff --git a/algo2_testing.ipynb b/algo2_testing.ipynb new file mode 100644 index 0000000..5816097 --- /dev/null +++ b/algo2_testing.ipynb @@ -0,0 +1,213 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import networkx as nx\n", + "import time\n", + "import math\n", + "import pandas as pd\n", + "import scipy as sp\n", + "import plotly.express as px\n", + "import plotly.graph_objs as go\n", + "from scipy.sparse import *\n", + "from scipy import linalg\n", + "from scipy.sparse.linalg import norm\n", + "from scipy.optimize import least_squares" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algorithm 2 testing" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# def Arnoldi(A, v, m): \n", + "# beta = norm(v)\n", + "# v = v/beta # dimension of v is n x 1\n", + "# H = sp.sparse.lil_matrix((m,m)) # dimension of H is m x m \n", + "# V = sp.sparse.lil_matrix((A.shape[0],m))\n", + "# V[:,0] = v # each column of V is a vector v\n", + "\n", + "# for j in range(m):\n", + "# print(\"j = \", j)\n", + "# w = A @ v \n", + "# for i in range(j):\n", + "# tmp = v.T @ w \n", + "# H[i,j] = tmp[0,0]\n", + "# w = w - H[i,j]*v \n", + " \n", + "# H[j,j-1] = norm(w) \n", + "\n", + "# if H[j,j-1] == 0: \n", + "# print(\"Arnoldi breakdown\")\n", + "# m = j\n", + "# v = 0\n", + "# break\n", + "# else:\n", + "# if j < m-1:\n", + "# v = w/H[j,j-1]\n", + "# # for i in range(A.shape[0]):\n", + "# # V[i,j+1] = v[i,0]\n", + "# V[:,j+1] = v\n", + "\n", + "# print(j, \" iterations completed\")\n", + "# print(\"V = \", V.shape)\n", + "# print(\"H = \", H.shape)\n", + "# print(\"v = \", v.shape)\n", + "# print(\"beta = \", beta)\n", + "\n", + "# return V, H, v, beta, j" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Defined as Algorithm 2 in the paper. It's needed since it's called by Algorithm 4" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def Arnoldi(A,v0,m):\n", + " v = v0\n", + " beta = norm(v)\n", + " v = v/beta\n", + " H = sp.sparse.lil_matrix((m+1,m)) \n", + " V = sp.sparse.lil_matrix((A.shape[0],m+1))\n", + " V[:,0] = v # each column of V is a vector v\n", + "\n", + " for j in range(m):\n", + " # print(\"j = \", j)\n", + " w = A @ v \n", + " for i in range(j):\n", + " tmp = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n", + " H[i,j] = tmp[0,0] \n", + " w = w - H[i,j]*v \n", + " \n", + " H[j+1,j] = norm(w)\n", + "\n", + " if H[j+1,j] == 0:\n", + " print(\"Arnoldi breakdown\")\n", + " m = j\n", + " v = 0\n", + " break\n", + " else:\n", + " if j < m-1:\n", + " v = w/H[j+1,j]\n", + " V[:,j+1] = v\n", + "\n", + " print(j, \" iterations completed\")\n", + " print(\"V = \", V.shape)\n", + " print(\"H = \", H.shape)\n", + " print(\"v = \", v.shape)\n", + " print(\"beta = \", beta)\n", + "\n", + " return V, H, v, beta, j \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Creating a small test case" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "m = 100\n", + "n = 110\n", + "A = sp.sparse.rand(n,n, density=0.1, format='lil')\n", + "# generate a probability vector, with all the entries as 1/n\n", + "v = sp.sparse.lil_matrix((n,1))\n", + "for i in range(n):\n", + " v[i] = 1/n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99 iterations completed\n", + "V = (110, 101)\n", + "H = (101, 100)\n", + "v = (110, 1)\n", + "beta = 0.09534625892455922\n" + ] + }, + { + "data": { + "text/plain": [ + "(<110x101 sparse matrix of type ''\n", + " \twith 11000 stored elements in List of Lists format>,\n", + " <101x100 sparse matrix of type ''\n", + " \twith 4879 stored elements in List of Lists format>,\n", + " <110x1 sparse matrix of type ''\n", + " \twith 110 stored elements in Compressed Sparse Row format>,\n", + " 0.09534625892455922,\n", + " 99)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "Arnoldi(A,v,m)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.6 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/algo4_testing.ipynb b/algo4_testing.ipynb new file mode 100644 index 0000000..d748d56 --- /dev/null +++ b/algo4_testing.ipynb @@ -0,0 +1,342 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import networkx as nx\n", + "import time\n", + "import math\n", + "import pandas as pd\n", + "import scipy as sp\n", + "import plotly.express as px\n", + "import plotly.graph_objs as go\n", + "from scipy.sparse import *\n", + "from scipy import linalg\n", + "from scipy.sparse.linalg import norm\n", + "from scipy.optimize import least_squares" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arnoldi \n", + "\n", + "This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n", + "\n", + "Everything will be reorganized in the main.py file once everything is working." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def Arnoldi(A,v0,m):\n", + " v = v0\n", + " beta = np.linalg.norm(v)\n", + " v = v/beta\n", + " H = sp.sparse.lil_matrix((m+1,m)) \n", + " V = sp.sparse.lil_matrix((A.shape[0],m+1))\n", + " V[:,0] = v # each column of V is a vector v\n", + "\n", + " for j in range(m):\n", + " w = A @ v \n", + " for i in range(j):\n", + " H[i,j] = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n", + " w = w - H[i,j]*v \n", + " \n", + " H[j+1,j] = np.linalg.norm(w)\n", + "\n", + " if H[j+1,j] == 0:\n", + " # print(\"Arnoldi breakdown\")\n", + " m = j\n", + " v = 0\n", + " break\n", + " else:\n", + " if j < m-1:\n", + " v = w/H[j+1,j]\n", + " V[:,j+1] = v\n", + "\n", + " return V, H, v, beta, j " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Algorithm 4 testing\n", + "\n", + "This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n", + "\n", + "![](https://i.imgur.com/H92fru7.png)\n", + "\n", + "Line 14 is particularly tricky to understand, not working for now. Need to figure out how to solve that linear system. My idea was to do something like that\n", + "\n", + "![](https://i.imgur.com/uBCDYUa.jpeg)\n", + "\n", + "And use the `sp.sparse.linalg.spsolve` function to solve the linear system as $Ax=0$ where $A$ is $[\\bar H_m^i ~ | ~ z]$ but it returns an array of zeros. So the idea it's wrong" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# def Algo4_OLD(Pt, v, m, a: list, tau, maxit: int, x):\n", + " \n", + "# # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n", + "\n", + "# iter , mv = 1, 0\n", + "# res = np.zeros(len(a)) \n", + "\n", + "# # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n", + "\n", + "# H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n", + "# H_e1[0] = 1\n", + "\n", + "# V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n", + "# V_e1[0] = 1\n", + "\n", + "# s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n", + "# s_e1[0] = 1\n", + "\n", + "# def find_k(res): # function to find the index of the largest element in res\n", + "# k = 0\n", + "# for i in range(len(a)):\n", + "# if res[i] == max(res):\n", + "# k = i\n", + "# break\n", + "# return k\n", + "\n", + "# def compute_gamma(res, a, k): # function to compute gamma\n", + "# gamma = np.zeros(len(a))\n", + "# for i in range(len(a)):\n", + "# if i != k:\n", + "# gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n", + "# else:\n", + "# gamma[i] = 0\n", + "# return gamma\n", + "\n", + "# # compute the residual vector\n", + "\n", + "# # create a sp.sparse.eye matrix of size Pt\n", + "# I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n", + "# for i in range(len(a)):\n", + "# r = ((1-a[i])/a[i])*v - ((1/a[i])*I - Pt) @ x\n", + "# res[i] = a[i]*norm(r)\n", + "\n", + "\n", + "# while max(res) >= tau and iter <= maxit:\n", + "# k = find_k(res)\n", + "# gamma = compute_gamma(res, a, k)\n", + "# V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n", + "\n", + "# mv = mv + j\n", + "\n", + "# # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n", + "# y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n", + "\n", + "# # reshape y to be a column vector\n", + "# y = y.reshape(y.shape[0],1)\n", + "\n", + "# # update x \n", + "# x += V[:,0:y.shape[0]] @ y\n", + "\n", + "# # compute the residual vector\n", + "# res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n", + " \n", + "# # for i in range(len(a)) but not k\n", + "# for i in range(len(a)):\n", + "# if i != k and res[i] >= tau:\n", + "# # Compute H as described in the paper\n", + "# H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n", + "\n", + "# z = beta*H_e1 - H @ y # define z as in the paper (page 9)\n", + "# A_tmp = sp.sparse.hstack([H, z]) # stack H and z, as in the paper, to solve the linear system (?)\n", + "# A_tmp = A_tmp.tocsc() # Convert A to CSC format for sparse solver\n", + "\n", + "# # What should I put here? What does it mean in the paper the line 14 of the pseudocode?\n", + "# result = sp.sparse.linalg.spsolve(A_tmp, np.zeros(A_tmp.shape[0])) # if I solve this, I get a vector of zeros.\n", + "# print(result)\n", + " \n", + "# # I don't know if the code below is correct since I don't get how to solve the linear system above, so I'm unsure about what y and gamma should be. For now it's commented out.\n", + "\n", + "# # # update x\n", + "# # x += V[:,0:y.shape[0]] @ y\n", + "# # # update the residual vector\n", + "# # res[i] = (a[i]/a[k])*gamma[k]*res[k] \n", + "\n", + "# iter = iter + 1\n", + "\n", + "# return x, iter, mv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n", + " \n", + " # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n", + "\n", + " mv = 0\n", + "\n", + " # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n", + "\n", + " H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n", + " H_e1[0] = 1\n", + "\n", + " V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n", + " V_e1[0] = 1\n", + "\n", + " s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n", + " s_e1[0] = 1\n", + "\n", + " def find_k(res): # function to find the index of the largest element in res\n", + " k = 0\n", + " for i in range(len(a)):\n", + " if res[i] == max(res):\n", + " k = i\n", + " break\n", + " return k\n", + "\n", + " def compute_gamma(res, a, k): # function to compute gamma\n", + " gamma = np.zeros(len(a))\n", + " for i in range(len(a)):\n", + " if i != k:\n", + " gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n", + " else:\n", + " gamma[i] = 0\n", + " return gamma\n", + "\n", + " # compute the residual vector\n", + "\n", + " I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n", + " res = np.zeros(len(a))\n", + " for i in range(len(a)):\n", + " r = sp.sparse.linalg.spsolve(I - a[i]*Pt, v)\n", + " res[i] = a[i]*np.linalg.norm(r)\n", + "\n", + " for _ in range(maxit):\n", + " k = find_k(res)\n", + " gamma = compute_gamma(res, a, k)\n", + " \n", + " # Run Arnoldi\n", + " V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n", + "\n", + " mv = mv + j\n", + "\n", + " # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n", + " y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n", + " \n", + " # reshape y to be a column vector\n", + " y = y.reshape(y.shape[0],1)\n", + "\n", + " # update x \n", + " x_new = x + V[:,0:y.shape[0]] @ y\n", + "\n", + "\n", + " res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n", + " \n", + " for i in range(len(a)):\n", + " if i != k and res[i] >= tau:\n", + " # Compute H as described in the paper\n", + " H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n", + " z = beta*H_e1 - H @ y\n", + "\n", + " # TO DO: SOLVE THE LINEAR SYSTEM HERE\n", + "\n", + " # update x\n", + " x_new = x + V[:,0:y.shape[0]] @ y\n", + "\n", + " # update the residual vector\n", + " res[i] = (a[i]/a[k])*gamma[k]*res[k]\n", + " \n", + " err = np.absolute(x - x_new).sum()\n", + " x = x_new\n", + "\n", + " if err < tau:\n", + " raise StopIteration\n", + "\n", + " return x, iter, mv" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Basic test case with random numbers to test the algorithm." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "n = 100\n", + "m = 110\n", + "tau = 1e-9\n", + "a = [0.85, 0.9, 0.95, 0.99]\n", + "\n", + "x = sp.sparse.lil_matrix((n,1))\n", + "x[0,0] = 1\n", + "\n", + "# generate a random graph\n", + "G = nx.gnp_random_graph(n, 1)\n", + "v = np.repeat(1.0 / 100, 100) # p is the personalization vector\n", + "v = v.reshape(v.shape[0],1)\n", + "\n", + "A = nx.to_scipy_sparse_array(G, dtype=float)\n", + "S = A.sum(axis=1) # S[i] is the sum of the weights of edges going out of node i\n", + "S[S != 0] = 1.0 / S[S != 0] # S[i] is now the sum of the weights of edges going into node i\n", + "Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "Algo4(Q, v, m, a, tau, 100, x)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.10.6 64-bit", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}