now algo1 works perfectly

main
Luca Lombardo 2 years ago
parent 563a09b23c
commit 55d9631ee4

@ -52,6 +52,8 @@ def load_data(dataset: Literal["Stanford", "BerkStan"]) -> nx.Graph:
# create the graph
print(f"\nCreating the graph of the dataset {dataset}...\n")
G_dataset = nx.read_edgelist(f"data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int)
print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}")
print(f"\tNumber of edges: {G_dataset.number_of_edges()}")
return G_dataset
@ -202,7 +204,7 @@ def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", danglin
norm = largest.sum()
return dict(zip(G, map(float, largest / norm)))
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):
def pagerank(G, alpha=0.85, personalization=None, max_iter=200, tol=1.0e-9, nstart=None, weight="weight", dangling=None,):
"""
Returns the PageRank of the nodes in the graph.
@ -310,15 +312,52 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-9, nsta
# check convergence, l1 norm
err = np.absolute(x - xlast).sum() # err is the error between the current and previous vectors of PageRank values
if err < N * tol: # if the error is small enough, stop iterating
return dict(zip(nodelist, map(float, x))), iter, alpha, tol # return the current vector of PageRank values
raise nx.PowerIterationFailedConvergence(max_iter) # if the error is not small enough, raise an error
return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'
# other wise, return a Null dictionary, the number of iterations, and the tolerance
# this is a failure to convergeS
return {}, iter, tol
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=200, tol=1.0e-9):
"""
Compute the PageRank of each node in the graph G.
Parameters
----------
G : graph
A NetworkX graph. Undirected graphs will be converted to a directed graph.
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=100, tol=1.0e-9):
alphas : list, optional
A list of alpha values to use in the shifted power method. The default is [0.85, 0.9, 0.95, 0.99].
max_iter : integer, optional
Maximum number of iterations in power method eigenvalue solver.
tol : float, optional
Error tolerance used to check convergence in power method solver.
Returns
-------
pagerank : dictionary
Dictionary of nodes with PageRank as value
mv : integer
The number of matrix-vector multiplications used in the power method
Notes
-----
The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.
"""
N = len(G)
if N == 0:
return {}
# initialize a random sparse matrix of dimension N x len(alphas). The cols of this matrix are the page rank vectors for each alpha.
x = sp.sparse.random(N, len(alphas), density=0.01, format="lil", dtype=float)
nodelist = list(G)
A = nx.to_scipy_sparse_array(G, nodelist=nodelist, dtype=float)
@ -327,34 +366,30 @@ def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=100, tol=1.
Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights going into each node
A = Q
x = np.repeat(1.0 / N, N) # x is the vector of PageRank values
v = np.repeat(1.0 / N, N) # p is the personalization vector
mu = A @ v - v # mu is the vector of PageRank values for the random walk with restart
mu = A @ v - v
for i in range(len(alphas)):
# create a vector r of len(alphas) where r[i] = alpha[i] * mu
r = alphas[i] * mu
Res = np.linalg.norm(r, 2)
r = alphas[i] * mu # residual vector
Res = np.linalg.norm(r, 2) # residual norm
if Res >= tol:
x = r + v # update x
x[:, [i]] = r + v # update the i-th column of x
iter = 1
mv = 0 # number of matrix-vector multiplications
for _ in range(max_iter):
xlast = x
iter += 1
mu = A @ x - x
mv += 1
mu = A @ mu
for i in range(len(alphas)):
r = alphas[i]**(iter+1) * mu
Res = np.linalg.norm(r, 2)
if Res >= tol:
x = r + x
r = pow(alphas[i], mv+1) * mu
Res = np.linalg.norm(r,2)
err = np.absolute(x - xlast).sum() # err is the error between the current and previous vectors of PageRank values
if Res >= tol:
x[:, [i]] = r + v
err = np.absolute(r).max()
if err < tol:
return dict(zip(nodelist, map(float, x))), iter, alphas, tol
return x, mv, alphas, tol
raise nx.PowerIterationFailedConvergence(max_iter) # if the error is not small enough, raise an error

@ -19,21 +19,40 @@ if __name__ == "__main__":
dataset_number = int(input("Choose the dataset to work with. The options are:\n\t [1] Web-Stanford\n\t [2] Web-BerkStan\nType your number of choice: "))
G = choice(dataset_number)
alphas = [0.85, 0.86, 0.87, 0.88, 0.89, 0.90, 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99]
### STANDARD PAGERANK ALGORITHM ###
iter_dict = dict.fromkeys(alphas, 0)
list_of_pageranks = [] # list of pageranks dict for each alpha
start1 = time.time()
prank, iterations, alpha, tol = pagerank(G)
for alpha in alphas:
x, iter, tol = pagerank(G, alpha, tol=1e-9)
iter_dict[alpha] = iter
list_of_pageranks.append(x)
end1 = time.time()
print("STANDARD PAGERANK ALGORITHM\n")
total_iter = sum(iter_dict.values())
print("\nSTANDARD PAGERANK ALGORITHM\n")
print("\tCPU time (s):", round(end1 - start1,1))
print("\tIterations:", iterations)
print("\tAlpha:", alpha)
print("\tMatrix-vector multiplications:", total_iter)
print("\tAlpha:", alphas)
print("\tTolerance:", tol)
print()
# check if there are entries in the list of pageranks that are empty dict, if so, print the corresponding alpha saying that the algorithm did not converge for that alpha
for i in range(len(list_of_pageranks)):
if not list_of_pageranks[i]:
print("The algorithm did not converge for alpha =", alphas[i])
### SHIFTED PAGERANK ALGORITHM ###
start2 = time.time()
shifted_pagerank, iterations, alphas, tol = shifted_pow_pagerank(G)
x, mv, alphas, tol = shifted_pow_pagerank(G, alphas, tol=1e-9)
end2 = time.time()
print("\nSHIFTED PAGERANK ALGORITHM\n")
print("\tCPU time (s):", round(end2 - start2,1))
print("\tIterations:", iterations)
print("\tMatrix-vector multiplications:", mv)
print("\tAlphas:", alphas)
print("\tTolerance:", tol)

Loading…
Cancel
Save