Can't solve the linear system on line 14 of the pseudocode

2 years ago · 2bacbb59a1
parent 4b8c39ad84
commit 2bacbb59a1
1 changed files with 78 additions and 87 deletions
--- a/src/algo4_testing.ipynb
+++ b/src/algo4_testing.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -24,12 +24,16 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "This function is needed in the algorithm. Note that this is a NON-functioning version, for now it's just a place holder. When algo2_testing will be completed, this will be updated and I'll work on algo4_testing."
+    "## Arnoldi \n",
    "\n",
    "This is a copy of the algorithm defined and tested in the notebook `algo2_testing`. It's an implementation of the Algorithm 2 from the paper. It's needed in this notebook since this function is called by the `algo4` function. It's implemented to return exactly what's needed in the `algo4` function.\n",
    "\n",
    "Everything will be reorganized in the main.py file once everything is working."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
@ -42,7 +46,6 @@
    "    V[:,0] = v # each column of V is a vector v\n",
    "\n",
    "    for j in range(m):\n",
    "        # print(\"j = \", j)\n",
    "        w = A @ v  \n",
    "        for i in range(j):\n",
    "            tmp = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
@ -61,12 +64,6 @@
    "                v = w/H[j+1,j]\n",
    "                V[:,j+1] = v\n",
    "\n",
    "    # print(j, \" iterations completed\")\n",
    "    # print(\"V = \", V.shape)\n",
    "    # print(\"H = \", H.shape)\n",
    "    # print(\"v = \", v.shape)\n",
    "    # print(\"beta = \", beta)\n",
    "\n",
    "    return V, H, v, beta, j  "
   ]
  },
@ -74,36 +71,47 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "## Algorithm 4 testing\n",
+    "# Algorithm 4 testing\n",
    "\n",
    "This algorithm is based on the \"Algorithm 4\" of the paper, the pseudocode provided by the authors is the following \n",
    "\n",
    "![](https://i.imgur.com/H92fru7.png)\n",
    "\n",
-    "Still a complete mess. Conceptually and technically wrong. I'll work on it when algo2_testing will be completed."
+    "Line 14 is particularly tricky to understand, not working for now. Need to figure out how to solve that linear system. My idea was to do something like that\n",
    "\n",
    "![](https://i.imgur.com/uBCDYUa.jpeg)\n",
    "\n",
    "And use the `sp.sparse.linalg.spsolve` function to solve the linear system as $Ax=0$ where $A$ is $[\\bar H_m^i ~ |  ~ z]$ but it returns an array of zeros. So the idea it's wrong"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Algo4(Pt, v, m, a: list, tau, maxit: int, x):\n",
    "    \n",
    "    # I'm using a non declared variable n here , it's declared in the next cell when I call this function. This will be fixed later in the main.py file\n",
    "\n",
    "    iter = 1\n",
    "    mv = 0\n",
    "    I = sp.sparse.eye(n, n, format='lil')\n",
    "    r = sp.sparse.lil_matrix((n,1))\n",
    "    res = np.zeros(len(a)) \n",
    "\n",
-    "    H_e1 = np.zeros((m+1,1))\n",
+    "    # I'm defining 3 canonical vectors of different sizes. It's probably stupid, will be fixed once the algorithm actually works\n",
    "\n",
    "    H_e1 = np.zeros((m+1,1)) # canonical basis vector of size H.shape[0]\n",
    "    H_e1[0] = 1\n",
    "\n",
-    "    V_e1 = np.zeros((n,1))\n",
+    "    V_e1 = np.zeros((n,1)) # canonical basis vector of size V.shape[0]\n",
    "    V_e1[0] = 1\n",
    "\n",
-    "    s_e1 = np.zeros((len(a),1))\n",
+    "    s_e1 = np.zeros((len(a),1)) # canonical basis vector of size s.shape[0]\n",
    "    s_e1[0] = 1\n",
    "\n",
-    "\n",
+    "    def find_k(res): # function to find the index of the largest element in res\n",
    "    def find_k(res):\n",
    "        k = 0\n",
    "        for i in range(len(a)):\n",
    "            if res[i] == max(res):\n",
@ -111,8 +119,8 @@
    "                break\n",
    "        return k\n",
    "\n",
-    "    def compute_gamma(res, a, k):\n",
+    "    def compute_gamma(res, a, k): # function to compute gamma\n",
-    "        gamma = sp.sparse.lil_matrix((len(a),1))\n",
+    "        gamma = np.zeros(len(a))\n",
    "        for i in range(len(a)):\n",
    "            if i != k:\n",
    "                gamma[i] = (res[i]*a[k])/(res[k]*a[i])\n",
@ -128,108 +136,91 @@
    "    while max(res) >= tau and iter <= maxit:\n",
    "        k = find_k(res)\n",
    "        gamma = compute_gamma(res, a, k)\n",
    "        # run Arnoldi, with a[k] as the shift\n",
    "        V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
    "\n",
    "        mv = mv + j\n",
    "\n",
    "        # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
    "        y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
    "        tmp = V[:,0:y.shape[0]]\n",
    "\n",
    "        # reshape y to be a column vector\n",
    "        y = y.reshape(y.shape[0],1)\n",
-    "        x += tmp @ y\n",
+    "        x += V[:,0:y.shape[0]] @ y\n",
    "\n",
    "        # compute the residual vector\n",
-    "        res[k] = a[k]*np.linalg.norm(beta*V_e1 - tmp @ y)\n",
+    "        res[k] = a[k]*np.linalg.norm(beta*V_e1 - V[:,0:y.shape[0]] @ y)\n",
    "        \n",
    "        # for i in range(len(a)) but not k\n",
    "        for i in range(len(a)):\n",
    "            if i != k and res[i] >= tau:\n",
-    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil')\n",
+    "                # Compute H as described in the paper\n",
-    "\n",
+    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil') \n",
    "                # solve y and gamma[i] from [H z] = [y gamma[i]]^T = gamma[i]*e1*beta\n",
    "                z = beta*H_e1 - H.dot(y)\n",
    "                A_tmp = sp.sparse.hstack([H, z])    \n",
    "                b_tmp = gamma * beta \n",
    "\n",
    "                # make b_tmp a column vector\n",
    "                b_tmp = b_tmp.reshape(b_tmp.shape[0],1)\n",
    "                # add zeros to the end of b_tmp until it has the same number of rows as A_tmp\n",
    "                b_tmp = sp.sparse.vstack([b_tmp, sp.sparse.lil_matrix((A_tmp.shape[0]-b_tmp.shape[0],1))])\n",
    "\n",
    "                z = beta*H_e1 - H @ y # define z as in the paper (page 9)\n",
    "                A_tmp = sp.sparse.hstack([H, z]) # stack H and z, as in the paper, to solve the linear system (?)\n",
    "                A_tmp = A_tmp.tocsc() # Convert A to CSC format for sparse solver\n",
    "\n",
-    "                # solve the system, without using the least squares method\n",
+    "                # What should I put here? What does it mean in the paper the 14 of the pseudocode?\n",
-    "                # CONVERT TO CSC FORMAT TO USE SPARSE SOLVERS\n",
+    "                result = sp.sparse.linalg.spsolve(A_tmp, np.zeros(A_tmp.shape[0])) # if I solve this, I get a vector of zeros.\n",
-    "                A_tmp = A_tmp.tocsc()\n",
+    "                print(result)\n",
    "\n",
    "                result = sp.sparse.linalg.spsolve(A_tmp, b_tmp)[0]\n",
    "                print(\"result:\", result.shape)\n",
    "                                \n",
    "                \n",
-    "                # split the result into y and gamma\n",
+    "                # I don't know if the code below is correct since I don't get how to solve the linear system above, so I'm unsure about what y and gamma should be. For now it's commented out.\n",
    "                y = result[0:y.shape[0]]\n",
    "                gamma = result[y.shape[0]:]\n",
    "\n",
-    "                # update x\n",
+    "                # # update x\n",
-    "                x = x + tmp @ y\n",
+    "                # x += V[:,0:y.shape[0]] @ y\n",
-    "                # update residual\n",
+    "                # # update the residual vector\n",
-    "                res = (a[i]/a[k])*res*gamma[k]\n",
+    "                # res[i] = (a[i]/a[k])*gamma[k]*res[k] \n",
    "\n",
-    "        iter = iter + 1"
+    "        iter = iter + 1\n",
    "\n",
    "    return x, iter, mv"
   ]
  },
  {
-   "cell_type": "code",
+   "cell_type": "markdown",
   "execution_count": 16,
   "metadata": {},
-   "outputs": [
+   "source": [
-    {
+    "Basic test case with random numbers to test the algorithm."
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/tmp/ipykernel_264197/102804730.py:12: FutureWarning: adjacency_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.\n",
      "  A = nx.adjacency_matrix(G)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "result: ()\n"
   ]
  },
  {
-     "ename": "IndexError",
+   "cell_type": "code",
-     "evalue": "invalid index to scalar variable.",
+   "execution_count": null,
-     "output_type": "error",
+   "metadata": {},
-     "traceback": [
+   "outputs": [],
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[0;32m/tmp/ipykernel_264197/102804730.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mPt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;31m# run the algorithm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mAlgo4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m/tmp/ipykernel_264197/2668036735.py\u001b[0m in \u001b[0;36mAlgo4\u001b[0;34m(Pt, v, m, a, tau, maxit, x)\u001b[0m\n\u001b[1;32m     83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     84\u001b[0m                 \u001b[0;31m# split the result into y and gamma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m                 \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m                 \u001b[0mgamma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mIndexError\u001b[0m: invalid index to scalar variable."
     ]
    }
   ],
   "source": [
-    "# test the algorithm\n",
+    "\n",
    "n = 100\n",
    "m = 110\n",
    "maxit = 100\n",
    "tau = 1e-6\n",
    "a = [0.85, 0.9, 0.95, 0.99]\n",
    "\n",
    "x = sp.sparse.lil_matrix((n,1))\n",
    "x[0,0] = 1\n",
    "\n",
    "# generate a random graph\n",
    "G = nx.gnp_random_graph(n, 0.1, seed=1, directed=True)\n",
-    "# generate a random matrix\n",
+    "\n",
-    "A = nx.adjacency_matrix(G)\n",
+    "P = sp.sparse.lil_matrix((n,n))\n",
-    "# generate a random vector\n",
+    "for i in G.nodes():\n",
-    "v = sp.sparse.rand(n, 1, density=0.1, format='lil')\n",
+    "    for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors\n",
-    "# compute the power iteration matrix\n",
+    "        P[i-1,j-1] = 1/len(G[i])\n",
-    "Pt = A.T\n",
+    "\n",
    "# generate a probability vector, with all the entries as 1/n\n",
    "v = sp.sparse.lil_matrix((n,1))\n",
    "for i in range(n):\n",
    "    v[i] = 1/n\n",
    "\n",
    "# dangling nodes vector\n",
    "d = sp.sparse.lil_matrix((n,1))\n",
    "for i in range(n):\n",
    "    if P[i].sum() == 0:\n",
    "        d[i] = 1\n",
    "\n",
    "# compute the transition matrix\n",
    "Pt = P + v @ (d.T)\n",
    "\n",
    "# run the algorithm\n",
    "Algo4(Pt, v, m, a, tau, maxit, x)"
   ]