You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

429 lines
20 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import networkx as nx\n",
"import time\n",
"import math\n",
"import pandas as pd\n",
"import scipy as sp\n",
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"from scipy.sparse import *\n",
"from scipy import linalg\n",
"from scipy.sparse.linalg import norm\n",
"from scipy.optimize import least_squares"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Arnoldi \n",
"\n",
"This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n",
"\n",
"Everything will be reorganized in the main.py file once everything is working."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def Arnoldi(A,v0,m):\n",
" v = v0\n",
" beta = np.linalg.norm(v)\n",
" v = v/beta\n",
" H = sp.sparse.lil_matrix((m+1,m)) \n",
" V = sp.sparse.lil_matrix((A.shape[0],m+1))\n",
" V[:,0] = v # each column of V is a vector v\n",
"\n",
" for j in range(m):\n",
" w = A @ v \n",
" for i in range(j):\n",
" H[i,j] = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
" w = w - H[i,j]*v \n",
" \n",
" H[j+1,j] = np.linalg.norm(w)\n",
"\n",
" if H[j+1,j] == 0:\n",
" # print(\"Arnoldi breakdown\")\n",
" m = j\n",
" v = 0\n",
" break\n",
" else:\n",
" if j < m-1:\n",
" v = w/H[j+1,j]\n",
" V[:,j+1] = v\n",
"\n",
" return V, H, v, beta, j "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Algorithm 4 testing\n",
"\n",
"This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n",
"\n",
"![](https://i.imgur.com/H92fru7.png)\n",
"\n",
"Line 14 is particularly tricky to understand, not working for now. Need to figure out how to solve that linear system. My idea was to do something like that\n",
"\n",
"![](https://i.imgur.com/uBCDYUa.jpeg)\n",
"\n",
"And use the `sp.sparse.linalg.spsolve` function to solve the linear system as $Ax=0$ where $A$ is $[\\bar H_m^i ~ | ~ z]$ but it returns an array of zeros. So the idea it's wrong"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# def Algo4_OLD(Pt, v, m, a: list, tau, maxit: int, x):\n",
" \n",
"# # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n",
"\n",
"# iter , mv = 1, 0\n",
"# res = np.zeros(len(a)) \n",
"\n",
"# # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n",
"\n",
"# H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n",
"# H_e1[0] = 1\n",
"\n",
"# V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n",
"# V_e1[0] = 1\n",
"\n",
"# s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n",
"# s_e1[0] = 1\n",
"\n",
"# def find_k(res): # function to find the index of the largest element in res\n",
"# k = 0\n",
"# for i in range(len(a)):\n",
"# if res[i] == max(res):\n",
"# k = i\n",
"# break\n",
"# return k\n",
"\n",
"# def compute_gamma(res, a, k): # function to compute gamma\n",
"# gamma = np.zeros(len(a))\n",
"# for i in range(len(a)):\n",
"# if i != k:\n",
"# gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
"# else:\n",
"# gamma[i] = 0\n",
"# return gamma\n",
"\n",
"# # compute the residual vector\n",
"\n",
"# # create a sp.sparse.eye matrix of size Pt\n",
"# I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n",
"# for i in range(len(a)):\n",
"# r = ((1-a[i])/a[i])*v - ((1/a[i])*I - Pt) @ x\n",
"# res[i] = a[i]*norm(r)\n",
"\n",
"\n",
"# while max(res) >= tau and iter <= maxit:\n",
"# k = find_k(res)\n",
"# gamma = compute_gamma(res, a, k)\n",
"# V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
"\n",
"# mv = mv + j\n",
"\n",
"# # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
"# y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
"\n",
"# # reshape y to be a column vector\n",
"# y = y.reshape(y.shape[0],1)\n",
"\n",
"# # update x \n",
"# x += V[:,0:y.shape[0]] @ y\n",
"\n",
"# # compute the residual vector\n",
"# res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n",
" \n",
"# # for i in range(len(a)) but not k\n",
"# for i in range(len(a)):\n",
"# if i != k and res[i] >= tau:\n",
"# # Compute H as described in the paper\n",
"# H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n",
"\n",
"# z = beta*H_e1 - H @ y # define z as in the paper (page 9)\n",
"# A_tmp = sp.sparse.hstack([H, z]) # stack H and z, as in the paper, to solve the linear system (?)\n",
"# A_tmp = A_tmp.tocsc() # Convert A to CSC format for sparse solver\n",
"\n",
"# # What should I put here? What does it mean in the paper the line 14 of the pseudocode?\n",
"# result = sp.sparse.linalg.spsolve(A_tmp, np.zeros(A_tmp.shape[0])) # if I solve this, I get a vector of zeros.\n",
"# print(result)\n",
" \n",
"# # I don't know if the code below is correct since I don't get how to solve the linear system above, so I'm unsure about what y and gamma should be. For now it's commented out.\n",
"\n",
"# # # update x\n",
"# # x += V[:,0:y.shape[0]] @ y\n",
"# # # update the residual vector\n",
"# # res[i] = (a[i]/a[k])*gamma[k]*res[k] \n",
"\n",
"# iter = iter + 1\n",
"\n",
"# return x, iter, mv"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [],
"source": [
"def find_k(res): # function to find the index of the largest element in res\n",
" k = 0\n",
" for i in range(len(a)):\n",
" if res[i] == max(res):\n",
" k = i\n",
" break\n",
" return k\n",
"\n",
"def compute_gamma(res, a, k): # function to compute gamma\n",
" gamma = np.zeros(len(a))\n",
" for i in range(len(a)):\n",
" if i != k:\n",
" gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
" else:\n",
" gamma[i] = 0\n",
" return gamma"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
"\n",
" x = sp.sparse.lil_matrix((Pt.shape[0], len(a)))\n",
" mv = 0\n",
"\n",
" # compute the residual vector\n",
" I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n",
" res = np.zeros(len(a))\n",
" for i in range(len(a)):\n",
" r = sp.sparse.linalg.spsolve(I - a[i]*Pt, v)\n",
" res[i] = a[i]*np.linalg.norm(r)\n",
"\n",
" iter = 0\n",
" for _ in range(maxit):\n",
" print(\"\\niter: \", iter)\n",
" k = find_k(res)\n",
" gamma = compute_gamma(res, a, k)\n",
" \n",
" # Run Arnoldi\n",
" V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
"\n",
" H = H[:-1,:]\n",
" V = V[:,:-1]\n",
" mv = mv + j\n",
"\n",
" # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
" H_e1 = np.zeros(H.shape[0])\n",
" H_e1[0] = 1\n",
" y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
" \n",
" # Update res[k]\n",
" y = y.reshape(y.shape[0],1)\n",
" V_e1 = np.zeros(V.shape[0])\n",
" V_e1[0] = 1\n",
"\n",
" res[k] = a[k]*np.linalg.norm(beta*V_e1 - V @ y) \n",
" print(\"res[\", k, \"] = \", res[k]) \n",
" \n",
" for i in range(len(a)):\n",
" print(\"res = \", res)\n",
" if i != k and res[i] >= tau:\n",
" print(\"res[\", i, \"] is greater than tau = \", tau, \" at iteration \", iter, \" entering the for loop\")\n",
"\n",
" # Compute H as described in the paper\n",
" H_new = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil')\n",
"\n",
" # Compute z as described in the paper\n",
" tmp = H_e1*beta \n",
" z1, z2 = tmp.reshape(tmp.shape[0],1), H @ y\n",
" z = z1 - z2\n",
"\n",
" # Solve the linear system \n",
" A = sp.sparse.hstack([H_new, z])\n",
" b = gamma[i]*(beta*H_e1)\n",
" b = b.reshape(b.shape[0],1)\n",
" \n",
" # use the least squares method to solve the linear system\n",
" y_to_split = sp.sparse.linalg.lsqr(A, b)[0]\n",
" \n",
" # the last element of y_to_split is the last element of gamma[i], the other elements are the elements of y\n",
" y = y_to_split[:-1].reshape(y_to_split[:-1].shape[0],1)\n",
" gamma[i] = y_to_split[-1]\n",
"\n",
" x_new = x\n",
" x_new[:,[k]] = x[:,[k]] + V @ y\n",
"\n",
" # update x\n",
" x_new[:,i] = x[:,i] + V @ y\n",
"\n",
" # update the residual vector\n",
" res[i] = (a[i]/a[k])*gamma[k]*res[k]\n",
" print(\"res[\", i, \"] = \", res[i])\n",
" \n",
" else:\n",
" if res[i] < tau:\n",
" print(\"res[\", i, \"] = \", res[i], \" is smaller than tau = \", tau, \" at iteration \", iter)\n",
"\n",
" err = np.absolute(res).max()\n",
" print(\"err = \", err)\n",
" if err < tau:\n",
" return x_new, iter, mv\n",
" \n",
" iter = iter + 1\n",
" x = x_new\n",
"\n",
" raise Exception('Maximum number of iterations reached')\n",
" \n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Basic test case with random numbers to test the algorithm."
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [],
"source": [
"n = 1000\n",
"m = 1100\n",
"tau = 1e-5\n",
"a = [0.85, 0.9, 0.95, 0.99]\n",
"\n",
"x = sp.sparse.lil_matrix((n,1))\n",
"x[0,0] = 1\n",
"\n",
"# generate a random graph\n",
"G = nx.gnp_random_graph(n, 0.01)\n",
"v = np.repeat(1.0 / 1000, 1000) # p is the personalization vector\n",
"v = v.reshape(v.shape[0],1)\n",
"\n",
"A = nx.to_scipy_sparse_array(G, dtype=float)\n",
"S = A.sum(axis=1) # S[i] is the sum of the weights of edges going out of node i\n",
"S[S != 0] = 1.0 / S[S != 0] # S[i] is now the sum of the weights of edges going into node i\n",
"Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"iter: 0\n",
"res[ 3 ] = 1.9500347533542841\n",
"res = [0.02974037 0.03169717 0.03368163 1.95003475]\n",
"res[ 0 ] is greater than tau = 1e-05 at iteration 0 entering the for loop\n",
"res[ 0 ] = 0.0\n",
"res = [0. 0.03169717 0.03368163 1.95003475]\n",
"res[ 1 ] is greater than tau = 1e-05 at iteration 0 entering the for loop\n",
"res[ 1 ] = 0.0\n",
"res = [0. 0. 0.03368163 1.95003475]\n",
"res[ 2 ] is greater than tau = 1e-05 at iteration 0 entering the for loop\n",
"res[ 2 ] = 0.0\n",
"res = [0. 0. 0. 1.95003475]\n",
"err = 1.9500347533542841\n",
"\n",
"iter: 1\n",
"res[ 3 ] = 1.9500347533542841\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 0 ] = 0.0 is smaller than tau = 1e-05 at iteration 1\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 1 ] = 0.0 is smaller than tau = 1e-05 at iteration 1\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 2 ] = 0.0 is smaller than tau = 1e-05 at iteration 1\n",
"res = [0. 0. 0. 1.95003475]\n",
"err = 1.9500347533542841\n",
"\n",
"iter: 2\n",
"res[ 3 ] = 1.9500347533542841\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 0 ] = 0.0 is smaller than tau = 1e-05 at iteration 2\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 1 ] = 0.0 is smaller than tau = 1e-05 at iteration 2\n",
"res = [0. 0. 0. 1.95003475]\n",
"res[ 2 ] = 0.0 is smaller than tau = 1e-05 at iteration 2\n",
"res = [0. 0. 0. 1.95003475]\n",
"err = 1.9500347533542841\n",
"\n",
"iter: 3\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_85082/3677688099.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlgo4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/tmp/ipykernel_85082/4220477307.py\u001b[0m in \u001b[0;36mAlgo4\u001b[0;34m(Pt, v, m, a, tau, maxit, x)\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;31m# Run Arnoldi\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 20\u001b[0;31m \u001b[0mV\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mArnoldi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mI\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 21\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/tmp/ipykernel_85082/1990871025.py\u001b[0m in \u001b[0;36mArnoldi\u001b[0;34m(A, v0, m)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mw\u001b[0m \u001b[0;31m# tmp is a 1x1 matrix, so it's O(1) in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/.local/lib/python3.10/site-packages/scipy/sparse/_lil.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, x)\u001b[0m\n\u001b[1;32m 320\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_arrayXarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrow\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcol\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 322\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 323\u001b[0m \u001b[0;31m# Fast path for simple (int, int) indexing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 324\u001b[0m if (isinstance(key, tuple) and len(key) == 2 and\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"x, iter, mv = Algo4(Q, v, m, a, tau, 100, x)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.6 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}