small fixes
parent
6600f2ae1c
commit
74522d86d9
Binary file not shown.
@ -0,0 +1,357 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import networkx as nx\n",
|
||||
"import scipy as sp\n",
|
||||
"import scipy.sparse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's create two graphs from the list of edges downloaded from the Snap database. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"G1 = nx.read_edgelist('../data/web-Stanford.txt', create_using=nx.DiGraph(), nodetype=int)\n",
|
||||
"\n",
|
||||
"G2 = nx.read_edgelist('../data/web-BerkStan.txt', create_using=nx.DiGraph(), nodetype=int)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Creating the transition probability matrix"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# square matrix of size n x n, where n is the number of nodes in the graph. The matrix is filled with zeros and the (i,j) element is x if the node i is connected to the node j. Where x is 1/(number of nodes connected to i).\n",
|
||||
"\n",
|
||||
"def create_matrix(G):\n",
|
||||
" n = G.number_of_nodes()\n",
|
||||
" P = sp.sparse.lil_matrix((n,n))\n",
|
||||
" for i in G.nodes():\n",
|
||||
" for j in G[i]: #G[i] is the list of nodes connected to i, it's neighbors\n",
|
||||
" P[i-1,j-1] = 1/len(G[i])\n",
|
||||
" return P"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"To ensure that the random process has a unique stationary distribution and it will not stagnate, the transition matrix P is usually modified to be an irreducible stochastic matrix A (called the Google matrix) as follows\n",
|
||||
"\n",
|
||||
"$$ A = \\alpha \\tilde{P} + (1-\\alpha)v e^T$$\n",
|
||||
"\n",
|
||||
"Where $\\tilde{P}$ is defined as \n",
|
||||
"\n",
|
||||
"$$ \\tilde{P} = P + v d^T$$\n",
|
||||
"\n",
|
||||
"Where $d \\in \\mathbb{N}^{n \\times 1}$ s a binary vector tracing the indices of dangling web-pages with no hyperlinks, i.e., $d(i ) = 1$ if the `ith` page has no hyperlink, $v \\in \\mathbb{R}^{n \\times 1}$ is a probability vector, $e = [1, 1, . . . , 1]^T$ , and $0 < \\alpha < 1$ is the so-called damping factor that represents the probability in the model that the surfer transfer by clicking a hyperlink rather than other ways"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"n = G1.number_of_nodes()\n",
|
||||
"P = create_matrix(G1) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"the vector `d` solves the dangling nodes problem"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# define d as a nx1 sparse matrix, where n is the number of nodes in the graph. The vector is filled with d(i) = 1 if the i row of the matrix P is filled with zeros, other wise is 0\n",
|
||||
"d = sp.sparse.lil_matrix((n,1))\n",
|
||||
"for i in range(n):\n",
|
||||
" if P[i].sum() == 0:\n",
|
||||
" d[i] = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The vector v is a probability vector, the sum of its elements bust be one"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# define v as the probability vector of size n x 1, where n is the number of nodes in the graph. The vector is filled with 1/n\n",
|
||||
"# https://en.wikipedia.org/wiki/Probability_vector\n",
|
||||
"\n",
|
||||
"v = sp.sparse.lil_matrix((n,1))\n",
|
||||
"for i in range(n):\n",
|
||||
" v[i] = 1/n "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we can compute the transition matrix\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Pt = P + v.dot(d.T)\n",
|
||||
"\n",
|
||||
"# Pt is a sparse matrix too"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# e is a nx1 sparse matrix filled with ones\n",
|
||||
"e = sp.sparse.lil_matrix((1,n))\n",
|
||||
"for i in range(n):\n",
|
||||
" e[0,i] = 1"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# # v*eT is a nxn sparse matrix filled all with 1/n, let's call it B\n",
|
||||
"\n",
|
||||
"# B = sp.sparse.lil_matrix((n,n))\n",
|
||||
"# for i in range(n):\n",
|
||||
"# for j in range(n):\n",
|
||||
"# B[i,j] = 1/n\n",
|
||||
"\n",
|
||||
"# A = alpha*Pt + (1-alpha)*B"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Algorithm 1 Shifted-Power method for PageRank with multiple damping factors:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"a = [0.85, 0.9, 0.95, 0.99]\n",
|
||||
"tau = 10**-8\n",
|
||||
"max_mv = 1000\n",
|
||||
"\n",
|
||||
"# this should return mv (the number of iteration needed for the convergence), and two vector of lenght len(a) called x and r. Where x is the vector of the pagerank and r is the residual vector\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def Algorithm1(Pt, v, tau, max_mv, a: list):\n",
|
||||
" u = Pt.dot(v) - v\n",
|
||||
" mv = 1\n",
|
||||
" for i in range(len(a)):\n",
|
||||
" r = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" r[i] = a[i]*u\n",
|
||||
" Res = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" Res[i] = np.linalg.norm(r[i])\n",
|
||||
"\n",
|
||||
" if Res[i] > tau:\n",
|
||||
" x = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" x[i] = r[i] + v\n",
|
||||
" \n",
|
||||
" while max(Res) > tau and mv < max_mv:\n",
|
||||
" u = Pt.dot(u)\n",
|
||||
" mv += 1\n",
|
||||
"\n",
|
||||
" for i in range(len(a)):\n",
|
||||
" if Res[i] >= tau:\n",
|
||||
" r = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" r[i] = a[i]*u\n",
|
||||
" Res = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" Res[i] = np.linalg.norm(r[i])\n",
|
||||
"\n",
|
||||
" if Res[i] > tau:\n",
|
||||
" x = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" x[i] = r[i] + x[i]\n",
|
||||
" return mv, x, r "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ValueError",
|
||||
"evalue": "shape mismatch in assignment",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
|
||||
"\u001b[0;32m/tmp/ipykernel_128897/741213869.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# launch the algorithm\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mmv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mAlgorithm1\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mPt\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtau\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_mv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
||||
"\u001b[0;32m/tmp/ipykernel_128897/1942976890.py\u001b[0m in \u001b[0;36mAlgorithm1\u001b[0;34m(Pt, v, tau, max_mv, a)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlil_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mu\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0mRes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlil_matrix\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mRes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlinalg\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnorm\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/lib/python3/dist-packages/scipy/sparse/_lil.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, x)\u001b[0m\n\u001b[1;32m 330\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_set_intXint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 331\u001b[0m \u001b[0;31m# Everything else takes the normal path.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 332\u001b[0;31m \u001b[0mIndexMixin\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__setitem__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 333\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 334\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_mul_scalar\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;32m/usr/lib/python3/dist-packages/scipy/sparse/_index.py\u001b[0m in \u001b[0;36m__setitem__\u001b[0;34m(self, key, x)\u001b[0m\n\u001b[1;32m 130\u001b[0m if not ((broadcast_row or x.shape[0] == i.shape[0]) and\n\u001b[1;32m 131\u001b[0m (broadcast_col or x.shape[1] == i.shape[1])):\n\u001b[0;32m--> 132\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'shape mismatch in assignment'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 133\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 134\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
||||
"\u001b[0;31mValueError\u001b[0m: shape mismatch in assignment"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# launch the algorithm\n",
|
||||
"mv, x, r = Algorithm1(Pt, v, tau, max_mv, a)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Algorithm 2 Arnoldi process"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def Algorithm2():\n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Algorithm 4 shifted-GMRES method for PageRank with multiple damping factors: "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def Algorithm4(Pt, v, m , a: list, tau , maxit, x: list):\n",
|
||||
" iter = 1\n",
|
||||
" \n",
|
||||
" e1 = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" e1[0] = 1\n",
|
||||
"\n",
|
||||
" # identity matrix sparse\n",
|
||||
" I = sp.sparse.lil_matrix((len(a),len(a)))\n",
|
||||
" for i in range(n):\n",
|
||||
" I[i,i] = 1\n",
|
||||
"\n",
|
||||
" # create the page rank vector x\n",
|
||||
" x = sp.sparse.lil_matrix((n,1))\n",
|
||||
" for i in range(n):\n",
|
||||
" x[i] = 1/n\n",
|
||||
"\n",
|
||||
" # create the vector r \n",
|
||||
" r = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" for i in range(len(a)):\n",
|
||||
" r[i] = ((1-a[i])/a[i]).dot(v) - ((1/a[i]).dot(I) - Pt).dot(x[i]).dot(e1)\n",
|
||||
" \n",
|
||||
" # create the vector Res\n",
|
||||
" Res = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" for i in range(len(a)):\n",
|
||||
" Res[i] = a[i] * np.linalg.norm(r[i])\n",
|
||||
"\n",
|
||||
" mv = 0\n",
|
||||
"\n",
|
||||
" while max(Res) > tau and mv < maxit:\n",
|
||||
" # find the k that satisfy the condition res[k] = max(res[i])\n",
|
||||
" k = 0\n",
|
||||
" for i in range(len(a)):\n",
|
||||
" if Res[i] == max(Res):\n",
|
||||
" k = i\n",
|
||||
" break\n",
|
||||
" \n",
|
||||
" # compute a new vector called delta where delta[i] = (res[i]*a[k])/(res[k]*a[i])\n",
|
||||
" delta = sp.sparse.lil_matrix((len(a),1))\n",
|
||||
" for i in range(len(a)) and i != k:\n",
|
||||
" delta[i] = (Res[i]*a[k])/(Res[k]*a[i])\n",
|
||||
"\n",
|
||||
" # run algorithm 2\n",
|
||||
" # TO DO\n",
|
||||
"\n",
|
||||
" # j depends on the algorithm 2\n",
|
||||
" mv = mv + j\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" # .............\n",
|
||||
"\n",
|
||||
" \n",
|
||||
" "
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.10.6 64-bit",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
},
|
||||
"orig_nbformat": 4,
|
||||
"vscode": {
|
||||
"interpreter": {
|
||||
"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
|
||||
}
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue