fixed the documentation

main
Luca Lombardo 2 years ago
parent 6024b9eb75
commit 560bf58612

@ -10,8 +10,6 @@ import numpy as np
import pandas as pd import pandas as pd
import networkx as nx import networkx as nx
from os.path import exists from os.path import exists
from scipy.sparse import *
import plotly.graph_objs as go
from typing import Literal from typing import Literal
def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph: def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph:
@ -19,15 +17,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
Parameters Parameters
---------- ----------
dataset : Literal["Stanford", "BerkStan"] dataset : Literal["Stanford", "BerkStan", "NotreDame"]
The dataset to load. The dataset to load.
Returns Returns
------- -------
nx.Graph nx.Graph
The graph of the dataset. The graph of the dataset loaded.
data/web-Stanford.txt
Raises
------
ValueError
If the dataset is not valid.
Notes
-----
The datasets are downloaded from the following link:
http://snap.stanford.edu/data/web-NotreDame.html
http://snap.stanford.edu/data/web-Stanford.html
http://snap.stanford.edu/data/web-BerkStan.html
If the dataset is already downloaded, it is not downloaded again.
""" """
# check if there is a data folder # check if there is a data folder
@ -61,8 +72,7 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix: def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
"""Returns the Google matrix of the graph. NetworkX implementation.
"""Returns the Google matrix of the graph.
Parameters Parameters
---------- ----------
@ -103,6 +113,8 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
Notes Notes
----- -----
DO NOT USE THIS FUNCTION FOR LARGE GRAPHS. It's memory intensive.
The matrix returned represents the transition matrix that describes the The matrix returned represents the transition matrix that describes the
Markov chain used in PageRank. For PageRank to converge to a unique Markov chain used in PageRank. For PageRank to converge to a unique
solution (i.e., a unique stationary distribution in a Markov chain), the solution (i.e., a unique stationary distribution in a Markov chain), the
@ -147,8 +159,7 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix: def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
""" Revised NetworkX implementation for sparse matrices. Returns the Ptilde matrix of the graph instead of the Google matrix.
"""Returns the Google matrix of the graph.
Parameters Parameters
---------- ----------
@ -189,12 +200,12 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
Notes Notes
----- -----
The matrix returned represents the transition matrix that describes the This matrix i strictly speaking not the Google matrix, but the Ptilde matrix, described in the paper [1]
Markov chain used in PageRank. For PageRank to converge to a unique
solution (i.e., a unique stationary distribution in a Markov chain), the
transition matrix must be irreducible. In other words, it must be that References
there exists a path between every pair of nodes in the graph, or else there ----------
is the potential of "rank sinks." [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
""" """
if nodelist is None: if nodelist is None:
@ -243,7 +254,7 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
return A, p return A, p
def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None): def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
"""Returns the PageRank of the nodes in the graph. """Returns the PageRank of the nodes in the graph. NetworkX implementation.
PageRank computes a ranking of the nodes in the graph G based on PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as the structure of the incoming links. It was originally designed as
@ -306,7 +317,7 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", danglin
def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, nstart=None, weight="weight", dangling=None,): def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):
""" """
Returns the PageRank of the nodes in the graph. Returns the PageRank of the nodes in the graph. Slighly modified NetworkX implementation.
PageRank computes a ranking of the nodes in the graph G based on PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as the structure of the incoming links. It was originally designed as
@ -415,15 +426,16 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
if err < N * tol: # if the error is small enough, stop iterating if err < N * tol: # if the error is small enough, stop iterating
return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values' return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'
# other wise, return a Null dictionary, the number of iterations, and the tolerance # this is a failure to converges
# this is a failure to convergeS
raise nx.PowerIterationFailedConvergence(max_iter)
return {}, iter, tol
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9): def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):
""" """
Compute the PageRank of each node in the graph G. Compute the PageRank of each node in the graph G. Algorithm 1 in the paper [1].
Parameters Parameters
---------- ----------
@ -441,8 +453,8 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
Returns Returns
------- -------
pagerank : dictionary pagerank : sparse matrix
Dictionary of nodes with PageRank as value Each column of the sparse matrix is a pagerank vector for a different alpha value.
mv : integer mv : integer
The number of matrix-vector multiplications used in the power method The number of matrix-vector multiplications used in the power method
@ -451,6 +463,15 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
----- -----
The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders. The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.
Raises
------
PowerIterationFailedConvergence
If the algorithm fails to converge to the specified tolerance
References
----------
[1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
""" """
N = len(G) N = len(G)

Loading…
Cancel
Save