now arnoldi works. last problem on algo 4. main to update

2 years ago · 4b8c39ad84
parent 633655bc27
commit 4b8c39ad84
6 changed files with 251 additions and 232 deletions
--- a/data/results/algo1/different_tau.csv
+++ b/data/results/algo1/different_tau.csv
@ -1,6 +0,0 @@
-alpha,products m-v,tau,time
-"[0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]",19,1e-05,14.4
-"[0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]",56,1e-06,33.8
-"[0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]",113,1e-07,84.19
-"[0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]",275,1e-08,136.66
-"[0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]",454,1e-09,193.94
--- a/data/results/algo1/taus_over_prods.html
+++ b/data/results/algo1/taus_over_prods.html
--- a/data/results/algo1/taus_over_time.html
+++ b/data/results/algo1/taus_over_time.html
--- a/src/algo2_testing.ipynb
+++ b/src/algo2_testing.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
@ -29,37 +29,95 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def Arnoldi(A, v, m): \n",
+    "#     beta = norm(v)\n",
+    "#     v = v/beta # dimension of v is n x 1\n",
+    "#     H = sp.sparse.lil_matrix((m,m)) # dimension of H is m x m \n",
+    "#     V = sp.sparse.lil_matrix((A.shape[0],m))\n",
+    "#     V[:,0] = v # each column of V is a vector v\n",
+    "\n",
+    "#     for j in range(m):\n",
+    "#         print(\"j = \", j)\n",
+    "#         w = A @ v  \n",
+    "#         for i in range(j):\n",
+    "#             tmp = v.T @ w  \n",
+    "#             H[i,j] = tmp[0,0]\n",
+    "#             w = w - H[i,j]*v \n",
+    "            \n",
+    "#         H[j,j-1] = norm(w) \n",
+    "\n",
+    "#         if H[j,j-1] == 0: \n",
+    "#             print(\"Arnoldi breakdown\")\n",
+    "#             m = j\n",
+    "#             v = 0\n",
+    "#             break\n",
+    "#         else:\n",
+    "#             if j < m-1:\n",
+    "#                 v = w/H[j,j-1]\n",
+    "#                 # for i in range(A.shape[0]):\n",
+    "#                 #     V[i,j+1] = v[i,0]\n",
+    "#                 V[:,j+1] = v\n",
+    "\n",
+    "#     print(j, \" iterations completed\")\n",
+    "#     print(\"V = \", V.shape)\n",
+    "#     print(\"H = \", H.shape)\n",
+    "#     print(\"v = \", v.shape)\n",
+    "#     print(\"beta = \", beta)\n",
+    "\n",
+    "#     return V, H, v, beta, j"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
-    "def Arnoldi(A, v, m): \n",
+    "#  review the restarted GMRES method (hereafter referred to as GMRES in short), which is a nonsymmetric Krylov subspace solver based on the Arnoldi decomposition procedure\n",
+    "\n",
+    "# return the matrix V, the matrix Hp, the vector v, the scalar beta and the number of iterations j. Where V is a matrix of dimension n x m, Hp is an upper Hessenberg matrix of dimension (m+1) x m. Return also the m+1 vector of the basis V. Return h as the m+1,m element of Hp and the vector v as the m+1 element of the basis V.\n",
+    "\n",
+    "\n",
+    "def ArnoldiGMRES(A,v0,m):\n",
+    "    v = v0\n",
    "    beta = norm(v)\n",
-    "    v = v/beta \n",
-    "    h = sp.sparse.lil_matrix((m,m)) \n",
+    "    v = v/beta\n",
+    "    H = sp.sparse.lil_matrix((m+1,m)) \n",
+    "    V = sp.sparse.lil_matrix((A.shape[0],m+1))\n",
+    "    V[:,0] = v # each column of V is a vector v\n",
    "\n",
    "    for j in range(m):\n",
+    "        # print(\"j = \", j)\n",
    "        w = A @ v  \n",
    "        for i in range(j):\n",
-    "            tmp = v.T @ w  \n",
-    "            h[i,j] = tmp[0,0]\n",
-    "            w = w - h[i,j]*v \n",
-    "        h[j,j-1] = norm(w) \n",
+    "            tmp = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
+    "            H[i,j] = tmp[0,0] \n",
+    "            w = w - H[i,j]*v \n",
+    "            \n",
+    "        H[j+1,j] = norm(w)\n",
    "\n",
-    "        if h[j,j-1] == 0: \n",
+    "        if H[j+1,j] == 0:\n",
    "            print(\"Arnoldi breakdown\")\n",
    "            m = j\n",
    "            v = 0\n",
    "            break\n",
    "        else:\n",
-    "            v = w/h[j,j-1] \n",
-    "        \n",
-    "    print(\"Arnoldi finished\\n\")\n",
-    "    print(\"h is \", h.shape)\n",
-    "    print(\"v is \", v.shape)\n",
-    "    print(\"m is \", m)\n",
-    "    print(\"beta is \", beta)\n",
-    "    return v, h, m, beta, j # this is not the output required in the paper, I should return the matrix V and the matrix H\n"
+    "            if j < m-1:\n",
+    "                v = w/H[j+1,j]\n",
+    "                V[:,j+1] = v\n",
+    "\n",
+    "    print(j, \" iterations completed\")\n",
+    "    print(\"V = \", V.shape)\n",
+    "    print(\"H = \", H.shape)\n",
+    "    print(\"v = \", v.shape)\n",
+    "    print(\"beta = \", beta)\n",
+    "\n",
+    "    return V, H, v, beta, j  \n",
+    " "
   ]
  },
  {
@ -71,35 +129,52 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "m = 100\n",
-    "A = sp.sparse.rand(m,m, density=0.5, format='lil')\n",
-    "v = sp.sparse.rand(m,1, density=0.5, format='lil')"
+    "n = 110\n",
+    "A = sp.sparse.rand(n,n, density=0.1, format='lil')\n",
+    "v = sp.sparse.rand(n,1, density=0.1, format='lil')"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Arnoldi finished\n",
-      "\n",
-      "h is  (100, 100)\n",
-      "v is  (100, 1)\n",
-      "m is  100\n",
-      "beta is  1.0\n"
+      "99  iterations completed\n",
+      "V =  (110, 101)\n",
+      "H =  (101, 100)\n",
+      "v =  (110, 1)\n",
+      "beta =  2.1103247239370497\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(<110x101 sparse matrix of type '<class 'numpy.float64'>'\n",
+       " \twith 10863 stored elements in List of Lists format>,\n",
+       " <101x100 sparse matrix of type '<class 'numpy.float64'>'\n",
+       " \twith 4923 stored elements in List of Lists format>,\n",
+       " <110x1 sparse matrix of type '<class 'numpy.float64'>'\n",
+       " \twith 110 stored elements in Compressed Sparse Row format>,\n",
+       " 2.1103247239370497,\n",
+       " 99)"
      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
    }
   ],
   "source": [
-    "v, h, m, beta, j = Arnoldi(A, v, m)"
+    "ArnoldiGMRES(A,v,m)"
   ]
  }
 ],
--- a/src/algo4_testing.ipynb
+++ b/src/algo4_testing.ipynb
@ -29,41 +29,45 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
-    "def Arnoldi(A, v, m): #  defined ad algorithm 2 in the paper\n",
+    "def Arnoldi(A,v0,m):\n",
+    "    v = v0\n",
    "    beta = norm(v)\n",
-    "    print(\"A\")\n",
    "    v = v/beta\n",
-    "    print(\"B\")\n",
-    "    h = sp.sparse.lil_matrix((m,m))\n",
-    "    print(\"C\")\n",
+    "    H = sp.sparse.lil_matrix((m+1,m)) \n",
+    "    V = sp.sparse.lil_matrix((A.shape[0],m+1))\n",
+    "    V[:,0] = v # each column of V is a vector v\n",
    "\n",
    "    for j in range(m):\n",
-    "        w = A.dot(v)\n",
-    "        print(\"D\")\n",
+    "        # print(\"j = \", j)\n",
+    "        w = A @ v  \n",
    "        for i in range(j):\n",
-    "            h[i,j] = v.T.dot(w)\n",
-    "            print(\"E\")\n",
-    "            w = w - h[i,j]*v[i]\n",
-    "            print(\"F\")\n",
-    "\n",
-    "        h[j+1,j] = norm(w)\n",
-    "        print(\"G\")\n",
+    "            tmp = v.T @ w # tmp is a 1x1 matrix, so it's O(1) in memory\n",
+    "            H[i,j] = tmp[0,0] \n",
+    "            w = w - H[i,j]*v \n",
+    "            \n",
+    "        H[j+1,j] = norm(w)\n",
    "\n",
-    "        if h[j+1,j] == 0:\n",
-    "            print(\"The algorithm didn't converge\")\n",
+    "        if H[j+1,j] == 0:\n",
+    "            # print(\"Arnoldi breakdown\")\n",
    "            m = j\n",
-    "            v[m+1] = 0\n",
+    "            v = 0\n",
    "            break\n",
    "        else:\n",
-    "            print(\"H\")\n",
-    "            v[j+1] = w**h[j+1,j] # THIS IS WRONG, I DON'T KNOW HOW TO FIX IT. ERROR \" matrix is not square\"\n",
-    "            print(\"I\")\n",
+    "            if j < m-1:\n",
+    "                v = w/H[j+1,j]\n",
+    "                V[:,j+1] = v\n",
    "\n",
-    "    return v, h, m, beta, j"
+    "    # print(j, \" iterations completed\")\n",
+    "    # print(\"V = \", V.shape)\n",
+    "    # print(\"H = \", H.shape)\n",
+    "    # print(\"v = \", v.shape)\n",
+    "    # print(\"beta = \", beta)\n",
+    "\n",
+    "    return V, H, v, beta, j  "
   ]
  },
  {
@ -77,7 +81,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
@ -85,19 +89,21 @@
    "    \n",
    "    iter = 1\n",
    "    mv = 0\n",
-    "    e1 = sp.sparse.lil_matrix((1,n))\n",
-    "    e1[0,0] = 1\n",
-    "    x = sp.sparse.lil_matrix((len(a),1))\n",
    "    I = sp.sparse.eye(n, n, format='lil')\n",
-    "    res = sp.sparse.lil_matrix((len(a),1))\n",
    "    r = sp.sparse.lil_matrix((n,1))\n",
-    "    y = sp.sparse.lil_matrix((n,1))\n",
+    "    res = np.zeros(len(a)) \n",
    "\n",
-    "    for i in range(len(a)): # I don't think that this is what was intended in the pseudocode... \n",
-    "        r = ((1-a[i])**a[i])*v - ((1**a[i])*I - Pt).dot(x)\n",
-    "        res[i] = a[i]*norm(r)\n",
+    "    H_e1 = np.zeros((m+1,1))\n",
+    "    H_e1[0] = 1\n",
+    "\n",
+    "    V_e1 = np.zeros((n,1))\n",
+    "    V_e1[0] = 1\n",
+    "\n",
+    "    s_e1 = np.zeros((len(a),1))\n",
+    "    s_e1[0] = 1\n",
    "\n",
-    "    def Find_k(res, maxit):\n",
+    "\n",
+    "    def find_k(res):\n",
    "        k = 0\n",
    "        for i in range(len(a)):\n",
    "            if res[i] == max(res):\n",
@ -105,7 +111,7 @@
    "                break\n",
    "        return k\n",
    "\n",
-    "    def Find_gamma(res, a, k):\n",
+    "    def compute_gamma(res, a, k):\n",
    "        gamma = sp.sparse.lil_matrix((len(a),1))\n",
    "        for i in range(len(a)):\n",
    "            if i != k:\n",
@ -114,33 +120,118 @@
    "                gamma[i] = 0\n",
    "        return gamma\n",
    "\n",
+    "    # compute the residual vector\n",
+    "    for i in range(len(a)):\n",
+    "        r = ((1-a[i])/a[i])*v - ((1/a[i])*I - Pt) @ x\n",
+    "        res[i] = a[i]*norm(r)\n",
+    "\n",
+    "    while max(res) >= tau and iter <= maxit:\n",
+    "        k = find_k(res)\n",
+    "        gamma = compute_gamma(res, a, k)\n",
+    "        # run Arnoldi, with a[k] as the shift\n",
+    "        V, H, v, beta, j = Arnoldi((1/a[k])*I - Pt, r, m)\n",
    "\n",
-    "    while max(res) > tau and iter < maxit:\n",
-    "        k = Find_k(res, maxit)\n",
-    "        gamma = Find_gamma(res, a, k)\n",
-    "        v, h, m, beta, j = Arnoldi((1**a[k])*I - Pt, r, m)\n",
-    "        Hbar = sp.sparse.lil_matrix((m+1,m))\n",
-    "        Hbar[0:m,0:m] = h\n",
-    "        Hbar[m+1,0:m] = e1\n",
+    "        mv = mv + j\n",
    "\n",
-    "        mv += j\n",
+    "        # compute y as the minimizer of || beta*e1 - Hy ||_2 using the least squares method\n",
+    "        y = sp.sparse.linalg.lsqr(H, beta*H_e1)[0]\n",
+    "        tmp = V[:,0:y.shape[0]]\n",
    "\n",
-    "        y = sp.sparse.linalg.least_squares(Hbar, beta*e1)\n",
-    "        res[k] = a[k]*norm(beta*e1 - Hbar*y)\n",
-    "        x[k] = x[k] + v*y[k]\n",
+    "        # reshape y to be a column vector\n",
+    "        y = y.reshape(y.shape[0],1)\n",
+    "        x += tmp @ y\n",
    "\n",
+    "        # compute the residual vector\n",
+    "        res[k] = a[k]*np.linalg.norm(beta*V_e1 - tmp @ y)\n",
+    "        \n",
+    "        # for i in range(len(a)) but not k\n",
    "        for i in range(len(a)):\n",
-    "            if i != k:\n",
-    "                if res[i] >= tau:\n",
-    "                    Hbar[i] = Hbar[k] + ((1-a[i])/a[i] - (1-a[k])/a[k])*I\n",
-    "                    z  = beta*e1 - Hbar*y\n",
-    "                    y = sp.sparse.linalg.solve(Hbar, gamma*beta*e1)\n",
-    "                    x = x + v*y\n",
-    "                    res[i] = a[i]**a[k]*gamma[i]*res[k]\n",
+    "            if i != k and res[i] >= tau:\n",
+    "                H = H + ((1-a[i])/a[i] - (1-a[k])/a[k])*sp.sparse.eye(H.shape[0], H.shape[1], format='lil')\n",
+    "\n",
+    "                # solve y and gamma[i] from [H z] = [y gamma[i]]^T = gamma[i]*e1*beta\n",
+    "                z = beta*H_e1 - H.dot(y)\n",
+    "                A_tmp = sp.sparse.hstack([H, z])    \n",
+    "                b_tmp = gamma * beta \n",
+    "\n",
+    "                # make b_tmp a column vector\n",
+    "                b_tmp = b_tmp.reshape(b_tmp.shape[0],1)\n",
+    "                # add zeros to the end of b_tmp until it has the same number of rows as A_tmp\n",
+    "                b_tmp = sp.sparse.vstack([b_tmp, sp.sparse.lil_matrix((A_tmp.shape[0]-b_tmp.shape[0],1))])\n",
+    "\n",
+    "\n",
+    "                # solve the system, without using the least squares method\n",
+    "                # CONVERT TO CSC FORMAT TO USE SPARSE SOLVERS\n",
+    "                A_tmp = A_tmp.tocsc()\n",
+    "\n",
+    "                result = sp.sparse.linalg.spsolve(A_tmp, b_tmp)[0]\n",
+    "                print(\"result:\", result.shape)\n",
    "                                \n",
-    "        iter += 1\n",
    "                \n",
-    "    return x, res, mv\n"
+    "                # split the result into y and gamma\n",
+    "                y = result[0:y.shape[0]]\n",
+    "                gamma = result[y.shape[0]:]\n",
+    "\n",
+    "                # update x\n",
+    "                x = x + tmp @ y\n",
+    "                # update residual\n",
+    "                res = (a[i]/a[k])*res*gamma[k]\n",
+    "\n",
+    "        iter = iter + 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_264197/102804730.py:12: FutureWarning: adjacency_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.\n",
+      "  A = nx.adjacency_matrix(G)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "result: ()\n"
+     ]
+    },
+    {
+     "ename": "IndexError",
+     "evalue": "invalid index to scalar variable.",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_264197/102804730.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mPt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mA\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;31m# run the algorithm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mAlgo4\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmaxit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/tmp/ipykernel_264197/2668036735.py\u001b[0m in \u001b[0;36mAlgo4\u001b[0;34m(Pt, v, m, a, tau, maxit, x)\u001b[0m\n\u001b[1;32m     83\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     84\u001b[0m                 \u001b[0;31m# split the result into y and gamma\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 85\u001b[0;31m                 \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     86\u001b[0m                 \u001b[0mgamma\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0my\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     87\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mIndexError\u001b[0m: invalid index to scalar variable."
+     ]
+    }
+   ],
+   "source": [
+    "# test the algorithm\n",
+    "n = 100\n",
+    "m = 110\n",
+    "maxit = 100\n",
+    "tau = 1e-6\n",
+    "a = [0.85, 0.9, 0.95, 0.99]\n",
+    "x = sp.sparse.lil_matrix((n,1))\n",
+    "x[0,0] = 1\n",
+    "# generate a random graph\n",
+    "G = nx.gnp_random_graph(n, 0.1, seed=1, directed=True)\n",
+    "# generate a random matrix\n",
+    "A = nx.adjacency_matrix(G)\n",
+    "# generate a random vector\n",
+    "v = sp.sparse.rand(n, 1, density=0.1, format='lil')\n",
+    "# compute the power iteration matrix\n",
+    "Pt = A.T\n",
+    "# run the algorithm\n",
+    "Algo4(Pt, v, m, a, tau, maxit, x)"
   ]
  }
 ],
--- a/src/main.py
+++ b/src/main.py
@ -193,6 +193,7 @@ class Algorithms:
        return mv, x, r, total_time

    # Refers to Algorithm 2 in the paper, it's needed to implement the algorithm 4. It doesn't work yet. Refer to the file testing.ipynb for more details. This function down here is just a place holder for now
+
    def Arnoldi(A, v, m):
        beta = norm(v)
        v = v/beta