fixed the documentation

2 years ago · 560bf58612
parent 6024b9eb75
commit 560bf58612
1 changed files with 58 additions and 37 deletions
--- a/src/algo.py
+++ b/src/algo.py
@ -10,8 +10,6 @@ import numpy as np
 import pandas as pd
 import networkx as nx
 from os.path import exists
 from scipy.sparse import *
 import plotly.graph_objs as go
 from typing import Literal
 def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph:
@ -19,15 +17,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
    Parameters
    ----------
-    dataset : Literal["Stanford", "BerkStan"]
+    dataset : Literal["Stanford", "BerkStan", "NotreDame"]
        The dataset to load.
    Returns
    -------
    nx.Graph
-        The graph of the dataset.
+        The graph of the dataset loaded.
        data/web-Stanford.txt
    Raises
    ------
    ValueError
        If the dataset is not valid.
    Notes
    -----
    The datasets are downloaded from the following link:
    http://snap.stanford.edu/data/web-NotreDame.html
    http://snap.stanford.edu/data/web-Stanford.html
    http://snap.stanford.edu/data/web-BerkStan.html
    If the dataset is already downloaded, it is not downloaded again.
    """
    # check if there is a data folder
@ -61,8 +72,7 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
 def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
-
+    """Returns the Google matrix of the graph. NetworkX implementation.
    """Returns the Google matrix of the graph.
    Parameters
    ----------
@ -103,6 +113,8 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
    Notes
    -----
    DO NOT USE THIS FUNCTION FOR LARGE GRAPHS.  It's memory intensive.
    The matrix returned represents the transition matrix that describes the
    Markov chain used in PageRank. For PageRank to converge to a unique
    solution (i.e., a unique stationary distribution in a Markov chain), the
@ -147,8 +159,7 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
 def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
-
+  """ Revised NetworkX implementation for sparse matrices. Returns the Ptilde matrix of the graph instead of the Google matrix.
  """Returns the Google matrix of the graph.
    Parameters
    ----------
@ -189,12 +200,12 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
    Notes
    -----
-  The matrix returned represents the transition matrix that describes the
+    This matrix i strictly speaking not the Google matrix, but the Ptilde matrix, described in the paper [1]
-  Markov chain used in PageRank. For PageRank to converge to a unique
+
-  solution (i.e., a unique stationary distribution in a Markov chain), the
+
-  transition matrix must be irreducible. In other words, it must be that
+    References
-  there exists a path between every pair of nodes in the graph, or else there
+    ----------
-  is the potential of "rank sinks."
+    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
  """
  if nodelist is None:
@ -243,7 +254,7 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
  return A, p
 def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
-    """Returns the PageRank of the nodes in the graph.
+    """Returns the PageRank of the nodes in the graph. NetworkX implementation.
    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
@ -306,7 +317,7 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", danglin
 def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):
    """
-    Returns the PageRank of the nodes in the graph.
+    Returns the PageRank of the nodes in the graph. Slighly modified NetworkX implementation.
        PageRank computes a ranking of the nodes in the graph G based on
        the structure of the incoming links. It was originally designed as
@ -415,15 +426,16 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
        if err < N * tol: # if the error is small enough, stop iterating
            return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'
-    # other wise, return a Null dictionary, the number of iterations, and the tolerance
+    # this is a failure to converges
-    # this is a failure to convergeS
+
    raise nx.PowerIterationFailedConvergence(max_iter)
    return {}, iter, tol
 def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):
    """
-    Compute the PageRank of each node in the graph G.
+    Compute the PageRank of each node in the graph G. Algorithm 1 in the paper [1].
    Parameters
    ----------
@ -441,8 +453,8 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
    Returns
    -------
-    pagerank : dictionary
+    pagerank : sparse matrix
-        Dictionary of nodes with PageRank as value
+        Each column of the sparse matrix is a pagerank vector for a different alpha value.
    mv : integer
        The number of matrix-vector multiplications used in the power method
@ -451,6 +463,15 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
    -----
    The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.
    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
    References
    ----------
    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
    """
    N = len(G)