fixed the documentation

2 years ago · 560bf58612
parent 6024b9eb75
commit 560bf58612
1 changed files with 58 additions and 37 deletions
--- a/src/algo.py
+++ b/src/algo.py
@ -10,8 +10,6 @@ import numpy as np
 import pandas as pd
 import networkx as nx
 from os.path import exists
-from scipy.sparse import *
-import plotly.graph_objs as go
 from typing import Literal

 def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph:
@ -19,15 +17,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph

    Parameters
    ----------
-    dataset : Literal["Stanford", "BerkStan"]
+    dataset : Literal["Stanford", "BerkStan", "NotreDame"]
        The dataset to load.

    Returns
    -------
    nx.Graph
-        The graph of the dataset.
-        data/web-Stanford.txt
+        The graph of the dataset loaded.

+    Raises
+    ------
+    ValueError
+        If the dataset is not valid.
+
+    Notes
+    -----
+    The datasets are downloaded from the following link:
+
+    http://snap.stanford.edu/data/web-NotreDame.html
+    http://snap.stanford.edu/data/web-Stanford.html
+    http://snap.stanford.edu/data/web-BerkStan.html
+
+    If the dataset is already downloaded, it is not downloaded again.
    """

    # check if there is a data folder
@ -61,8 +72,7 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph

 def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:

-
-    """Returns the Google matrix of the graph.
+    """Returns the Google matrix of the graph. NetworkX implementation.

    Parameters
    ----------
@ -103,6 +113,8 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we

    Notes
    -----
+    DO NOT USE THIS FUNCTION FOR LARGE GRAPHS.  It's memory intensive.
+
    The matrix returned represents the transition matrix that describes the
    Markov chain used in PageRank. For PageRank to converge to a unique
    solution (i.e., a unique stationary distribution in a Markov chain), the
@ -147,8 +159,7 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we

 def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:

-
-  """Returns the Google matrix of the graph.
+  """ Revised NetworkX implementation for sparse matrices. Returns the Ptilde matrix of the graph instead of the Google matrix.

    Parameters
    ----------
@ -189,12 +200,12 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei

    Notes
    -----
-  The matrix returned represents the transition matrix that describes the
-  Markov chain used in PageRank. For PageRank to converge to a unique
-  solution (i.e., a unique stationary distribution in a Markov chain), the
-  transition matrix must be irreducible. In other words, it must be that
-  there exists a path between every pair of nodes in the graph, or else there
-  is the potential of "rank sinks."
+    This matrix i strictly speaking not the Google matrix, but the Ptilde matrix, described in the paper [1]
+
+
+    References
+    ----------
+    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022

  """
  if nodelist is None:
@ -243,7 +254,7 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
  return A, p

 def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
-    """Returns the PageRank of the nodes in the graph.
+    """Returns the PageRank of the nodes in the graph. NetworkX implementation.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
@ -306,7 +317,7 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", danglin
 def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):

    """
-    Returns the PageRank of the nodes in the graph.
+    Returns the PageRank of the nodes in the graph. Slighly modified NetworkX implementation.

        PageRank computes a ranking of the nodes in the graph G based on
        the structure of the incoming links. It was originally designed as
@ -415,15 +426,16 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
        if err < N * tol: # if the error is small enough, stop iterating
            return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'

-    # other wise, return a Null dictionary, the number of iterations, and the tolerance
-    # this is a failure to convergeS
+    # this is a failure to converges
+
+    raise nx.PowerIterationFailedConvergence(max_iter)
+
    
-    return {}, iter, tol

 def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):

    """
-    Compute the PageRank of each node in the graph G.
+    Compute the PageRank of each node in the graph G. Algorithm 1 in the paper [1].

    Parameters
    ----------
@ -441,8 +453,8 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=

    Returns
    -------
-    pagerank : dictionary
-        Dictionary of nodes with PageRank as value
+    pagerank : sparse matrix
+        Each column of the sparse matrix is a pagerank vector for a different alpha value.

    mv : integer
        The number of matrix-vector multiplications used in the power method
@ -451,6 +463,15 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
    -----
    The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.

+    Raises
+    ------
+    PowerIterationFailedConvergence
+        If the algorithm fails to converge to the specified tolerance
+
+    References
+    ----------
+    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
+
    """

    N = len(G)