fixed the documentation

main
Luca Lombardo 2 years ago
parent 6024b9eb75
commit 560bf58612

@ -10,8 +10,6 @@ import numpy as np
import pandas as pd
import networkx as nx
from os.path import exists
from scipy.sparse import *
import plotly.graph_objs as go
from typing import Literal
def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph:
@ -19,15 +17,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
Parameters
----------
dataset : Literal["Stanford", "BerkStan"]
dataset : Literal["Stanford", "BerkStan", "NotreDame"]
The dataset to load.
Returns
-------
nx.Graph
The graph of the dataset.
data/web-Stanford.txt
The graph of the dataset loaded.
Raises
------
ValueError
If the dataset is not valid.
Notes
-----
The datasets are downloaded from the following link:
http://snap.stanford.edu/data/web-NotreDame.html
http://snap.stanford.edu/data/web-Stanford.html
http://snap.stanford.edu/data/web-BerkStan.html
If the dataset is already downloaded, it is not downloaded again.
"""
# check if there is a data folder
@ -61,8 +72,7 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
"""Returns the Google matrix of the graph.
"""Returns the Google matrix of the graph. NetworkX implementation.
Parameters
----------
@ -103,6 +113,8 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
Notes
-----
DO NOT USE THIS FUNCTION FOR LARGE GRAPHS. It's memory intensive.
The matrix returned represents the transition matrix that describes the
Markov chain used in PageRank. For PageRank to converge to a unique
solution (i.e., a unique stationary distribution in a Markov chain), the
@ -147,8 +159,7 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:
"""Returns the Google matrix of the graph.
""" Revised NetworkX implementation for sparse matrices. Returns the Ptilde matrix of the graph instead of the Google matrix.
Parameters
----------
@ -189,12 +200,12 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
Notes
-----
The matrix returned represents the transition matrix that describes the
Markov chain used in PageRank. For PageRank to converge to a unique
solution (i.e., a unique stationary distribution in a Markov chain), the
transition matrix must be irreducible. In other words, it must be that
there exists a path between every pair of nodes in the graph, or else there
is the potential of "rank sinks."
This matrix i strictly speaking not the Google matrix, but the Ptilde matrix, described in the paper [1]
References
----------
[1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
"""
if nodelist is None:
@ -243,7 +254,7 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
return A, p
def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
"""Returns the PageRank of the nodes in the graph.
"""Returns the PageRank of the nodes in the graph. NetworkX implementation.
PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
@ -306,7 +317,7 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", danglin
def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):
"""
Returns the PageRank of the nodes in the graph.
Returns the PageRank of the nodes in the graph. Slighly modified NetworkX implementation.
PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
@ -415,15 +426,16 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
if err < N * tol: # if the error is small enough, stop iterating
return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'
# other wise, return a Null dictionary, the number of iterations, and the tolerance
# this is a failure to convergeS
# this is a failure to converges
raise nx.PowerIterationFailedConvergence(max_iter)
return {}, iter, tol
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):
"""
Compute the PageRank of each node in the graph G.
Compute the PageRank of each node in the graph G. Algorithm 1 in the paper [1].
Parameters
----------
@ -441,8 +453,8 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
Returns
-------
pagerank : dictionary
Dictionary of nodes with PageRank as value
pagerank : sparse matrix
Each column of the sparse matrix is a pagerank vector for a different alpha value.
mv : integer
The number of matrix-vector multiplications used in the power method
@ -451,6 +463,15 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=
-----
The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.
Raises
------
PowerIterationFailedConvergence
If the algorithm fails to converge to the specified tolerance
References
----------
[1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022
"""
N = len(G)

Loading…
Cancel
Save