Can't solve the linear system on line 14 of the pseudocode

2 years ago · 2bacbb59a1
parent 4b8c39ad84
commit 2bacbb59a1
1 changed files with 78 additions and 87 deletions
--- a/src/algo4_testing.ipynb
+++ b/src/algo4_testing.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -24,12 +24,16 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "This function is needed in the algorithm. Note that this is a NON-functioning version, for now it's just a place holder. When algo2_testing will be completed, this will be updated and I'll work on algo4_testing."
+    "## Arnoldi \n",
+    "\n",
+    "This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n",
+    "\n",
+    "Everything will be reorganized in the main.py file once everything is working."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -42,7 +46,6 @@
    "    V[:,0] = v # each column of V is a vector v\n",
    "\n",
    "    for j in range(m):\n",
-    "        # print(\"j = \", j)\n",
    "        w = A @ v  \n",
    "        for i in range(j):\n",
    "            tmp = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
@ -61,12 +64,6 @@
    "                v = w/H[j+1,j]\n",
    "                V[:,j+1] = v\n",
    "\n",
-    "    # print(j, \" iterations completed\")\n",
-    "    # print(\"V = \", V.shape)\n",
-    "    # print(\"H = \", H.shape)\n",
-    "    # print(\"v = \", v.shape)\n",
-    "    # print(\"beta = \", beta)\n",
-    "\n",
    "    return V, H, v, beta, j  "
   ]
  },
@ -74,36 +71,47 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Algorithm 4 testing\n",
+    "# Algorithm 4 testing\n",
+    "\n",
+    "This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n",
+    "\n",
+    "![](https://i.imgur.com/H92fru7.png)\n",
    "\n",
-    "Still a complete mess. Conceptually and technically wrong. I'll work on it when algo2_testing will be completed."
+    "Line 14 is particularly tricky to understand, not working for now. Need to figure out how to solve that linear system. My idea was to do something like that\n",
+    "\n",
+    "![](https://i.imgur.com/uBCDYUa.jpeg)\n",
+    "\n",
+    "And use the `sp.sparse.linalg.spsolve` function to solve the linear system as $Ax=0$ where $A$ is $[\\bar H_m^i ~ |  ~ z]$ but it returns an array of zeros. So the idea it's wrong"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
    "    \n",
+    "    # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n",
+    "\n",
    "    iter = 1\n",
    "    mv = 0\n",
    "    I = sp.sparse.eye(n, n, format='lil')\n",
    "    r = sp.sparse.lil_matrix((n,1))\n",
    "    res = np.zeros(len(a)) \n",
    "\n",
-    "    H_e1 = np.zeros((m+1,1))\n",
+    "    # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n",
+    "\n",
+    "    H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n",
    "    H_e1[0] = 1\n",
    "\n",
-    "    V_e1 = np.zeros((n,1))\n",
+    "    V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n",
    "    V_e1[0] = 1\n",
    "\n",
-    "    s_e1 = np.zeros((len(a),1))\n",
+    "    s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n",
    "    s_e1[0] = 1\n",
    "\n",
-    "\n",
-    "    def find_k(res):\n",
+    "    def find_k(res): # function to find the index of the largest element in res\n",
    "        k = 0\n",
    "        for i in range(len(a)):\n",
    "            if res[i] == max(res):\n",
@ -111,8 +119,8 @@
    "                break\n",
    "        return k\n",
    "\n",
-    "    def compute_gamma(res, a, k):\n",
-    "        gamma = sp.sparse.lil_matrix((len(a),1))\n",
+    "    def compute_gamma(res, a, k): # function to compute gamma\n",
+    "        gamma = np.zeros(len(a))\n",
    "        for i in range(len(a)):\n",
    "            if i != k:\n",
    "                gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
@ -128,108 +136,91 @@
    "    while max(res) >= tau and iter <= maxit:\n",
    "        k = find_k(res)\n",
    "        gamma = compute_gamma(res, a, k)\n",
-    "        # run Arnoldi, with a[k] as the shift\n",
    "        V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
    "\n",
    "        mv = mv + j\n",
    "\n",
    "        # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
    "        y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
-    "        tmp = V[:,0:y.shape[0]]\n",
    "\n",
    "        # reshape y to be a column vector\n",
    "        y = y.reshape(y.shape[0],1)\n",
-    "        x += tmp @ y\n",
+    "        x += V[:,0:y.shape[0]] @ y\n",
    "\n",
    "        # compute the residual vector\n",
-    "        res[k] = a[k]*np.linalg.norm(beta*V_e1 - tmp @ y)\n",
+    "        res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n",
    "        \n",
    "        # for i in range(len(a)) but not k\n",
    "        for i in range(len(a)):\n",
    "            if i != k and res[i] >= tau:\n",
-    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil')\n",
-    "\n",
-    "                # solve y and gamma[i] from [H z] = [y gamma[i]]^T = gamma[i]*e1*beta\n",
-    "                z = beta*H_e1 - H.dot(y)\n",
-    "                A_tmp = sp.sparse.hstack([H, z])    \n",
-    "                b_tmp = gamma * beta \n",
-    "\n",
-    "                # make b_tmp a column vector\n",
-    "                b_tmp = b_tmp.reshape(b_tmp.shape[0],1)\n",
-    "                # add zeros to the end of b_tmp until it has the same number of rows as A_tmp\n",
-    "                b_tmp = sp.sparse.vstack([b_tmp, sp.sparse.lil_matrix((A_tmp.shape[0]-b_tmp.shape[0],1))])\n",
+    "                # Compute H as described in the paper\n",
+    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n",
    "\n",
+    "                z = beta*H_e1 - H @ y # define z as in the paper (page 9)\n",
+    "                A_tmp = sp.sparse.hstack([H, z]) # stack H and z, as in the paper, to solve the linear system (?)\n",
+    "                A_tmp = A_tmp.tocsc() # Convert A to CSC format for sparse solver\n",
    "\n",
-    "                # solve the system, without using the least squares method\n",
-    "                # CONVERT TO CSC FORMAT TO USE SPARSE SOLVERS\n",
-    "                A_tmp = A_tmp.tocsc()\n",
-    "\n",
-    "                result = sp.sparse.linalg.spsolve(A_tmp, b_tmp)[0]\n",
-    "                print(\"result:\", result.shape)\n",
-    "                                \n",
+    "                # What should I put here? What does it mean in the paper the 14 of the pseudocode?\n",
+    "                result = sp.sparse.linalg.spsolve(A_tmp, np.zeros(A_tmp.shape[0])) # if I solve this, I get a vector of zeros.\n",
+    "                print(result)\n",
    "                \n",
-    "                # split the result into y and gamma\n",
-    "                y = result[0:y.shape[0]]\n",
-    "                gamma = result[y.shape[0]:]\n",
+    "                # I don't know if the code below is correct since I don't get how to solve the linear system above, so I'm unsure about what y and gamma should be. For now it's commented out.\n",
    "\n",
-    "                # update x\n",
-    "                x = x + tmp @ y\n",
-    "                # update residual\n",
-    "                res = (a[i]/a[k])*res*gamma[k]\n",
+    "                # # update x\n",
+    "                # x += V[:,0:y.shape[0]] @ y\n",
+    "                # # update the residual vector\n",
+    "                # res[i] = (a[i]/a[k])*gamma[k]*res[k] \n",
    "\n",
-    "        iter = iter + 1"
+    "        iter = iter + 1\n",
+    "\n",
+    "    return x, iter, mv"
   ]
  },
  {
-   "cell_type": "code",
-   "execution_count": 16,
+   "cell_type": "markdown",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_264197/102804730.py:12: FutureWarning: adjacency_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.\n",
-      "  A = nx.adjacency_matrix(G)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "result: ()\n"
+   "source": [
+    "Basic test case with random numbers to test the algorithm."
   ]
  },
  {
-     "ename": "IndexError",
-     "evalue": "invalid index to scalar variable.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_264197/102804730.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mPt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;31m# run the algorithm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mAlgo4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/tmp/ipykernel_264197/2668036735.py\u001b[0m in \u001b[0;36mAlgo4\u001b[0;34m(Pt, v, m, a, tau, maxit, x)\u001b[0m\n\u001b[1;32m     83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     84\u001b[0m                 \u001b[0;31m# split the result into y and gamma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m                 \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m                 \u001b[0mgamma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mIndexError\u001b[0m: invalid index to scalar variable."
-     ]
-    }
-   ],
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
   "source": [
-    "# test the algorithm\n",
+    "\n",
    "n = 100\n",
    "m = 110\n",
    "maxit = 100\n",
    "tau = 1e-6\n",
    "a = [0.85, 0.9, 0.95, 0.99]\n",
+    "\n",
    "x = sp.sparse.lil_matrix((n,1))\n",
    "x[0,0] = 1\n",
+    "\n",
    "# generate a random graph\n",
    "G = nx.gnp_random_graph(n, 0.1, seed=1, directed=True)\n",
-    "# generate a random matrix\n",
-    "A = nx.adjacency_matrix(G)\n",
-    "# generate a random vector\n",
-    "v = sp.sparse.rand(n, 1, density=0.1, format='lil')\n",
-    "# compute the power iteration matrix\n",
-    "Pt = A.T\n",
+    "\n",
+    "P = sp.sparse.lil_matrix((n,n))\n",
+    "for i in G.nodes():\n",
+    "    for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors\n",
+    "        P[i-1,j-1] = 1/len(G[i])\n",
+    "\n",
+    "# generate a probability vector, with all the entries as 1/n\n",
+    "v = sp.sparse.lil_matrix((n,1))\n",
+    "for i in range(n):\n",
+    "    v[i] = 1/n\n",
+    "\n",
+    "# dangling nodes vector\n",
+    "d = sp.sparse.lil_matrix((n,1))\n",
+    "for i in range(n):\n",
+    "    if P[i].sum() == 0:\n",
+    "        d[i] = 1\n",
+    "\n",
+    "# compute the transition matrix\n",
+    "Pt = P + v @ (d.T)\n",
+    "\n",
    "# run the algorithm\n",
    "Algo4(Pt, v, m, a, tau, maxit, x)"
   ]