{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import networkx as nx\n",
    "import time\n",
    "import math\n",
    "import pandas as pd\n",
    "import scipy as sp\n",
    "import plotly.express as px\n",
    "import plotly.graph_objs as go\n",
    "from scipy.sparse import *\n",
    "from scipy import linalg\n",
    "from scipy.sparse.linalg import norm\n",
    "from scipy.optimize import least_squares"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Arnoldi \n",
    "\n",
    "This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n",
    "\n",
    "Everything will be reorganized in the main.py file once everything is working."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Arnoldi(A,v0,m):\n",
    "    v = v0\n",
    "    beta = np.linalg.norm(v)\n",
    "    v = v/beta\n",
    "    H = sp.sparse.lil_matrix((m+1,m)) \n",
    "    V = sp.sparse.lil_matrix((A.shape[0],m+1))\n",
    "    V[:,0] = v # each column of V is a vector v\n",
    "\n",
    "    for j in range(m):\n",
    "        w = A @ v  \n",
    "        for i in range(j):\n",
    "            H[i,j] = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
    "            w = w - H[i,j]*v \n",
    "            \n",
    "        H[j+1,j] = np.linalg.norm(w)\n",
    "\n",
    "        if H[j+1,j] == 0:\n",
    "            # print(\"Arnoldi breakdown\")\n",
    "            m = j\n",
    "            v = 0\n",
    "            break\n",
    "        else:\n",
    "            if j < m-1:\n",
    "                v = w/H[j+1,j]\n",
    "                V[:,j+1] = v\n",
    "\n",
    "    return V, H, beta, j  "
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Algorithm 4 testing\n",
    "\n",
    "This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n",
    "\n",
    "![](https://i.imgur.com/H92fru7.png)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_gamma(res, a, k): # function to compute gamma\n",
    "    gamma = np.ones(len(a))\n",
    "    for i in range(len(a)):\n",
    "        if i != k:\n",
    "            gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
    "        else:\n",
    "            gamma[i] = 0\n",
    "    return gamma"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Basic test case with random numbers to test the algorithm."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 1000\n",
    "m = 1100\n",
    "tau = 1e-6\n",
    "a = [0.85, 0.88, 0.9, 0.95]\n",
    "\n",
    "x = sp.sparse.lil_matrix((n,1))\n",
    "x[0,0] = 1\n",
    "\n",
    "# generate a random graph\n",
    "G = nx.gnp_random_graph(n, 0.1)\n",
    "v = np.repeat(1.0 / 1000, 1000) # p is the personalization vector\n",
    "v = v.reshape(v.shape[0],1)\n",
    "\n",
    "A = nx.to_scipy_sparse_array(G, dtype=float)\n",
    "S = A.sum(axis=1) # S[i] is the sum of the weights of edges going out of node i\n",
    "S[S != 0] = 1.0 / S[S != 0] # S[i] is now the sum of the weights of edges going into node i\n",
    "Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
    "\n",
    "    mv, iter = 0, 1 # mv is the number of matrix-vector products, iter is the number of iterations\n",
    "    \n",
    "    # initialize x as a random sparse matrix. Each col is the pagerank vector for a different alpha\n",
    "    x = sp.sparse.lil_matrix((Pt.shape[0], len(a)))\n",
    "\n",
    "\n",
    "    # compute the residual vector, it is a matrix of size (n, len(a)). Each col is the residual vector for a different alpha. \n",
    "    I = sp.sparse.eye(Pt.shape[0], Pt.shape[1], format='lil')\n",
    "    r = sp.sparse.lil_matrix((Pt.shape[0], len(a)))\n",
    "    res = np.zeros(len(a))\n",
    "\n",
    "    for i in range(len(a)):\n",
    "        r[:,[i]] = sp.sparse.linalg.spsolve(I - a[i]*Pt, v)\n",
    "        col = r[:,[i]].toarray()\n",
    "        res[i] = np.linalg.norm(col)\n",
    "\n",
    "    for _ in range(maxit):\n",
    "        # check if we have converged\n",
    "        err = np.absolute(np.amax(res))\n",
    "        if err < tau:\n",
    "            print(\"Computation ended successfully in \", iter, \" iterations and \", mv, \" matrix-vector products.\")\n",
    "            return x, iter, mv\n",
    "\n",
    "        print(\"\\niter = \", iter)\n",
    "        print(\"res: \", res)\n",
    "        print(\"err = \", err)\n",
    "\n",
    "\n",
    "        k = int(np.argmax(res))\n",
    "        print(\"k = \", k)\n",
    "        gamma = compute_gamma(res, a, k)\n",
    "       \n",
    "        # Run Arnoldi\n",
    "        r_k = r[:,[k]].toarray()\n",
    "        A_arnoldi = (1/a[k])*I - Pt\n",
    "        V, H, beta, j = Arnoldi((1/a[k])*I - Pt, r_k, m)\n",
    "        H = H[:-1,:]\n",
    "        V = V[:,:-1]\n",
    "        mv = mv + j\n",
    "\n",
    "        H_e1 = np.zeros(H.shape[0])\n",
    "        H_e1[0] = 1\n",
    "\n",
    "        # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
    "        y = sp.sparse.lil_matrix((H.shape[1],len(a)))\n",
    "        y[:,[k]] = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
    "        y_k = y[:,[k]].toarray()\n",
    "\n",
    "        # # Update x\n",
    "        x_new = x\n",
    "        x_new[:,[k]] = x[:,[k]] + V @ y_k\n",
    "\n",
    "        # Update res[k]\n",
    "        V_e1 = np.zeros(V.shape[0])\n",
    "        V_e1[0] = 1\n",
    "\n",
    "        norm_k =np.linalg.norm(beta*V_e1 - V @ y_k)\n",
    "        res[k] = a[k]*norm_k\n",
    "\n",
    "        # multi shift\n",
    "        for i in range(len(a)):\n",
    "            if res[i] >= tau:\n",
    "                # print(\"res[\", i, \"] is larger than tau = \", tau)\n",
    "\n",
    "                # # Compute H as described in the paper\n",
    "                # H_k = H[:,[k]].toarray()\n",
    "                # H_i = H_k + ((1-a[i])/a[i] - (1-a[k])/a[k])\n",
    "                # H[:,[i]] = H_i\n",
    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil')\n",
    "\n",
    "                # Compute z as described in the paper\n",
    "                z1 = H_e1*beta\n",
    "                z1 = z1.reshape(z1.shape[0],1)\n",
    "                z2 = H @ y[:,[1]]\n",
    "                z2 = z2.reshape(z2.shape[0],1)\n",
    "                z = z1 - z2\n",
    "\n",
    "                # Solve the linear system \n",
    "                A = sp.sparse.hstack([H, z])\n",
    "                b = (beta*H_e1)\n",
    "                b = b.reshape(b.shape[0],1)\n",
    "                # use the least squares method to solve the linear system\n",
    "                to_split = sp.sparse.linalg.lsqr(A, b)[0]\n",
    "                \n",
    "                # the last element of y_to_split is the last element of gamma[i], the other elements are the elements of y[:[i]]\n",
    "                y[:,[i]] = to_split[:-1]\n",
    "                gamma[i] = to_split[-1]\n",
    "\n",
    "                # update x\n",
    "                x_new[:,i] = x[:,i] + V @ y[:,[i]]\n",
    "\n",
    "                # update the residual vector\n",
    "                res[i] = (a[i]/a[k])*pow(gamma[i], i)*res[k]\n",
    "\n",
    "            else:\n",
    "                if res[i] < tau:\n",
    "                    print(\"res[\", i, \"] is smaller than tau = \", tau, \" at iteration \", iter)\n",
    "\n",
    "        iter = iter + 1\n",
    "        x = x_new\n",
    "\n",
    "    raise Exception('Maximum number of iterations reached')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "iter =  1\n",
      "res:  [0.03189738 0.03190716 0.03191369 0.03193001]\n",
      "err =  0.031930006625941795\n",
      "k =  0\n",
      "\n",
      "iter =  2\n",
      "res:  [1.11728737e+00 8.26005227e-04 5.55288870e-10 4.81520495e-13]\n",
      "err =  1.1172873666904701\n",
      "k =  3\n",
      "res[ 2 ] is smaller than tau =  1e-06  at iteration  2\n",
      "\n",
      "iter =  3\n",
      "res:  [1.17714008e+00 1.29941354e-03 5.55288870e-10 1.93969263e-18]\n",
      "err =  1.1771400826095457\n",
      "k =  3\n",
      "res[ 2 ] is smaller than tau =  1e-06  at iteration  3\n",
      "\n",
      "iter =  4\n",
      "res:  [1.17714008e+00 1.29941354e-03 5.55288870e-10 1.93969263e-18]\n",
      "err =  1.1771400826095457\n",
      "k =  3\n",
      "res[ 2 ] is smaller than tau =  1e-06  at iteration  4\n",
      "\n",
      "iter =  5\n",
      "res:  [1.17714008e+00 1.29941354e-03 5.55288870e-10 1.93969263e-18]\n",
      "err =  1.1771400826095457\n",
      "k =  3\n",
      "res[ 2 ] is smaller than tau =  1e-06  at iteration  5\n",
      "\n",
      "iter =  6\n",
      "res:  [1.17714008e+00 1.29941354e-03 5.55288870e-10 1.93969263e-18]\n",
      "err =  1.1771400826095457\n",
      "k =  3\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_13660/3677688099.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0miter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlgo4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mQ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/tmp/ipykernel_13660/2503933778.py\u001b[0m in \u001b[0;36mAlgo4\u001b[0;34m(Pt, v, m, a, tau, maxit, x)\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0mr_k\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     37\u001b[0m         \u001b[0mA_arnoldi\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mI\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mPt\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 38\u001b[0;31m         \u001b[0mV\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mArnoldi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mI\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr_k\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     39\u001b[0m         \u001b[0mH\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     40\u001b[0m         \u001b[0mV\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mV\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/tmp/ipykernel_13660/113321894.py\u001b[0m in \u001b[0;36mArnoldi\u001b[0;34m(A, v0, m)\u001b[0m\n\u001b[1;32m     10\u001b[0m         \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m             \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m \u001b[0;34m@\u001b[0m \u001b[0mw\u001b[0m \u001b[0;31m# tmp is a 1x1 matrix, so it's O(1) in memory\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     13\u001b[0m             \u001b[0mw\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mw\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mH\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/.local/lib/python3.10/site-packages/scipy/sparse/_lil.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, x)\u001b[0m\n\u001b[1;32m    326\u001b[0m                 isinstance(key[1], INT_TYPES)):\n\u001b[1;32m    327\u001b[0m             \u001b[0mx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 328\u001b[0;31m             \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    329\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Trying to assign a sequence to an item\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    330\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_intXint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "x, iter, mv = Algo4(Q, v, m, a, tau, 100, x)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.10.6 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.6"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}