Grammar review of the report

main
Luca Lombardo 2 years ago
parent 0e1a607dfd
commit e3ae0ce14a

@ -1,6 +1,3 @@
#!/usr/bin/env python3
# Importing the libraries
import os import os
import wget import wget
import gzip import gzip
@ -147,6 +144,8 @@ def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="we
# Convert the dangling dictionary into an array in nodelist order # Convert the dangling dictionary into an array in nodelist order
dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
dangling_weights /= dangling_weights.sum() dangling_weights /= dangling_weights.sum()
# Assign dangling weights to any dangling nodes (nodes with no out links)
dangling_nodes = np.where(A.sum(axis=1) == 0)[0] dangling_nodes = np.where(A.sum(axis=1) == 0)[0]
# Assign dangling_weights to any dangling nodes (nodes with no out links) # Assign dangling_weights to any dangling nodes (nodes with no out links)
@ -235,12 +234,7 @@ def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, wei
dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
dangling_weights /= dangling_weights.sum() dangling_weights /= dangling_weights.sum()
# # Assign dangling_weights to any dangling nodes (nodes with no out links). # Assign dangling_weights to any dangling nodes (nodes with no out links).
# for i in range(N):
# if A[[i],:].sum(axis=1) == 0:
# A[[i],:] = dangling_weights
# replace rows with all zeros with dangling_weights
A[[A.sum(axis=1)==0],:] = dangling_weights A[[A.sum(axis=1)==0],:] = dangling_weights
# Normalize rows # Normalize rows
@ -426,7 +420,6 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values' return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'
# this is a failure to converges # this is a failure to converges
raise nx.PowerIterationFailedConvergence(max_iter) raise nx.PowerIterationFailedConvergence(max_iter)
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9): def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):

@ -1,6 +1,9 @@
\section{Introduction} \section{Introduction}
The PageRank model was proposed by Google in a series of papers to evaluate accurately the most important web-pages from the World Wide Web matching a set of keywords entered by a user. For search engine rankings, the importance of web-pages is computed from the stationary probability vector of the random process of a web surfer who keeps visiting a large set of web-pages connected by hyperlinks. The link structure of the World Wide Web is represented by a directed graph, the so-called web link graph, and its corresponding adjacency matrix $G \in \N^{n \times n}$ where $n$ denotes the number of pages and $G_{ij}$ is nonzero (being 1) only if the \emph{jth} page has a hyperlink pointing to the \emph{ith} page. The transition probability matrix $P \in \R^{n \times n}$ of the random process has entries as described in \ref{eq:transition}. % The PageRank model was proposed by Google in a series of papers to evaluate accurately the most important web-pages from the World Wide Web matching a set of keywords entered by a user. For search engine rankings, the importance of web-pages is computed from the stationary probability vector of the random process of a web surfer who keeps visiting a large set of web-pages connected by hyperlinks. The link structure of the World Wide Web is represented by a directed graph, the so-called web link graph, and its corresponding adjacency matrix $G \in \N^{n \times n}$ where $n$ denotes the number of pages and $G_{ij}$ is nonzero (being 1) only if the \emph{jth} page has a hyperlink pointing to the \emph{ith} page. The transition probability matrix $P \in \R^{n \times n}$ of the random process has entries as described in \ref{eq:transition}.
% write the paragraph above, with different words
The PageRank algorithm is a method developed by Google to determine the relevance of web pages to specific keywords. It is used to rank search results based on the importance of the pages, as determined by the probability that a web surfer will visit them. The algorithm works by representing the links between web pages as a directed graph, with each page represented by a vertex and each link represented by an edge. The importance of a page is then determined by the number of links pointing to it and the importance of the pages that link to it. The PageRank algorithm is based on the idea that a page is more likely to be important if it is linked to by other important pages, and it is represented mathematically by a transition probability matrix $P \in \R^{n \times n}$, which can be calculated using the formula in equation \ref{eq:transition}, where we consider its adjacency matrix $G \in \N^{n \times n}$, where $n$ is the number of pages and $G_{ij}$ is nonzero (being 1) only if the \emph{j-th} page has a hyperlink pointing to the \emph{i-th} page.
\begin{equation}\label{eq:transition} \begin{equation}\label{eq:transition}
P(i,j) = P(i,j) =
@ -10,7 +13,7 @@ The PageRank model was proposed by Google in a series of papers to evaluate accu
\end{cases} \end{cases}
\end{equation} \end{equation}
\noindent The entire random process needs a unique stationary distribution. To ensure this propriety is satisfied , the transition matrix $P$ is usually modified to be an irreducible stochastic matrix $A$ (called the Google matrix) as follows: \noindent The entire random process needs a unique stationary distribution. To ensure this propriety is satisfied, the transition matrix $P$ is usually modified to be an irreducible stochastic matrix $A$ (called the Google matrix) as follows:
% \noindent To ensure that the random process has a unique stationary distribution and it will not stagnate, the transition matrix P is usually modified to be an irreducible stochastic matrix $A$ (called the Google matrix) as follows % \noindent To ensure that the random process has a unique stationary distribution and it will not stagnate, the transition matrix P is usually modified to be an irreducible stochastic matrix $A$ (called the Google matrix) as follows
@ -22,28 +25,33 @@ The PageRank model was proposed by Google in a series of papers to evaluate accu
\begin{equation}\label{eq:pr} \begin{equation}\label{eq:pr}
Ax = x, \quad \lVert x \rVert = 1, \quad x > 0 Ax = x, \quad \lVert x \rVert = 1, \quad x > 0
\end{equation} \end{equation}
or, equivalently, as the solution of the linear system equivalently, the problem of finding the solution of the linear system
\begin{equation}\label{eq:pr2} \begin{equation}\label{eq:pr2}
(I - \alpha \tilde P)x = (1 - \alpha)v (I - \alpha \tilde P)x = (1 - \alpha)v
\end{equation} \end{equation}
\noindent The authors of the paper \cite{SHEN2022126799} emphasize how in the in the past decade or so, considerable research attention has been devoted to the efficient solution of problems \ref{eq:pr} \ref{eq:pr2}, especially when $n$ is very large. For moderate values of the damping factor, e.g. for $\alpha = 0.85$ as initially suggested by Google for search engine rankings, solution strategies based on the simple Power method have proved to be very effective. However, when $\alpha$ approaches 1, as is required in some applications, the convergence rates of classical stationary iterative methods including the Power method tend to deteriorate sharply, and more robust algorithms need to be used. \vspace*{0.4cm} % \noindent The authors of the paper \cite{SHEN2022126799} emphasize how in the in the past decade or so, considerable research attention has been devoted to the efficient solution of problems \ref{eq:pr} \ref{eq:pr2}, especially when $n$ is very large. For moderate values of the damping factor, e.g. for $\alpha = 0.85$ as initially suggested by Google for search engine rankings, solution strategies based on the simple Power method have proved to be very effective. However, when $\alpha$ approaches 1, as is required in some applications, the convergence rates of classical stationary iterative methods including the Power method tend to deteriorate sharply, and more robust algorithms need to be used. \vspace*{0.4cm}
\noindent In recent years, there has been a lot of interest in finding efficient ways to solve problems \ref{eq:pr} and \ref{eq:pr2}, especially when the number of variables (denoted by $n$) is very large. For moderate values of the damping factor (e.g., $\alpha = 0.85$, as suggested by Google for search engine rankings), the Power method has proven to be a reliable solution. However, when the damping factor gets closer to 1, which is necessary in some cases, traditional iterative methods like the Power method may not work as well and more robust algorithms may be required. This point is emphasized in the paper \cite{SHEN2022126799}. \vspace*{0.4cm}
\noindent In the reference paper that we are using for this project, the authors focus their attention in the area of PageRank computations with the same network structure but multiple damping factors. For example, in the Random Alpha PageRank model used in the design of anti-spam mechanism \cite{Constantine2009Random}, the rankings corresponding to many different damping factors close to 1 need to be computed simultaneously. They explain that the problem can be expressed mathematically as solving a sequence of linear systems \noindent In the reference paper that we are using for this project, the authors focus their attention in the area of PageRank computations with the same network structure but multiple damping factors. For example, in the Random Alpha PageRank model used in the design of anti-spam mechanism \cite{Constantine2009Random}, the rankings corresponding to many different damping factors close to 1 need to be computed simultaneously. They explain that the problem can be expressed mathematically as solving a sequence of linear systems
\begin{equation}\label{eq:pr3} \begin{equation}\label{eq:pr3}
(I - \alpha_i \tilde P)x_i = (1 - \alpha_i)v \quad \alpha_i \in (0, 1) \quad \forall i \in \{1, 2, ..., s\} (I - \alpha_i \tilde P)x_i = (1 - \alpha_i)v \quad \alpha_i \in (0, 1) \quad \forall i \in \{1, 2, ..., s\}
\end{equation} \end{equation}
As we know, standard PageRank algorithms applied to \ref{eq:pr3} would solve the $s$ linear systems independently. Although these solutions can be performed in parallel, the process would still demand large computational resources for high dimension problems. % As we know, standard PageRank algorithms applied to \ref{eq:pr3} would solve the $s$ linear systems independently. Although these solutions can be performed in parallel, the process would still demand large computational resources for high dimension problems.
This consideration motived the authors to search novel methods with reduced algorithmic and memory complexity, to afford the solution of larger problems on moderate computing resources. They suggest to write the PageRank problem with multiple damping factors given at once \ref{eq:pr3} as a sequence of shifted linear systems of the form: % This consideration motived the authors to search novel methods with reduced algorithmic and memory complexity, to afford the solution of larger problems on moderate computing resources. They suggest to write the PageRank problem with multiple damping factors given at once \ref{eq:pr3} as a sequence of shifted linear systems of the form:
Traditionally, PageRank algorithms applied to problem \ref{eq:pr3} would involve solving multiple linear systems independently. While this process can be parallelized to some extent, it can still be computationally intensive for high-dimensional problems. In an effort to find more efficient methods with lower algorithmic and memory complexity, the authors of the paper searched for alternative approaches that would allow them to solve larger problems with moderate computing resources. They proposed expressing the PageRank problem with multiple damping factors given in \ref{eq:pr3} as a series of shifted linear systems, in the form described in the following equation. This approach aims to reduce the computational demands of the problem.
\begin{equation} \begin{equation}
\Big(\frac{1}{\alpha_i}I - \tilde P \Big)x^{(i)} = \frac{1 - \alpha_i}{\alpha_i}v \quad \forall i \in \{1, 2, ..., s\} \quad 0 < \alpha_i < 1 \Big(\frac{1}{\alpha_i}I - \tilde P \Big)x^{(i)} = \frac{1 - \alpha_i}{\alpha_i}v \quad \forall i \in \{1, 2, ..., s\} \quad 0 < \alpha_i < 1
\end{equation} \end{equation}
We know from literature that the Shifted Krylov methods may still suffer from slow convergence when the damping factor approaches 1, requiring larger search spaces to converge with satisfactory speed. In \cite{SHEN2022126799} is suggest that, to overcome this problem, we can combine stationary iterative methods and shifted Krylov subspace methods. They derive an implementation of the Power method that solves the PageRank problem with multiple dumpling factors at almost the same computational time of the standard Power method for solving one single system. They also demonstrate that this shifted Power method generates collinear residual vectors. Based on this result, they use the shifted Power iterations to provide smooth initial solutions for running shifted Krylov subspace methods such as \texttt{GMRES}. Besides, they discuss how to apply seed system choosing strategy and extrapolation techniques to further speed up the iterative process. % We know from literature that the Shifted Krylov methods may still suffer from slow convergence when the damping factor approaches 1, requiring larger search spaces to converge with satisfactory speed. In \cite{SHEN2022126799} is suggest that, to overcome this problem, we can combine stationary iterative methods and shifted Krylov subspace methods. They derive an implementation of the Power method that solves the PageRank problem with multiple dumpling factors at almost the same computational time of the standard Power method for solving one single system. They also demonstrate that this shifted Power method generates collinear residual vectors. Based on this result, they use the shifted Power iterations to provide smooth initial solutions for running shifted Krylov subspace methods such as \texttt{GMRES}. Besides, they discuss how to apply seed system choosing strategy and extrapolation techniques to further speed up the iterative process.
It has been previously noted in the literature that the Shifted Krylov methods may have slow convergence when the damping factor gets close to 1, requiring a larger search space to achieve satisfactory speed. In order to address this issue, the authors of \cite{SHEN2022126799} suggest combining stationary iterative methods with shifted Krylov subspace methods. They present an implementation of the Power method that can solve the PageRank problem with multiple damping factors in approximately the same amount of time as the standard Power method for solving a single system. They also show that this shifted Power method generates collinear residual vectors. Based on this result, they use the shifted Power iterations to provide smooth initial solutions for running shifted Krylov subspace methods such as \texttt{GMRES}. In addition, they discuss how techniques such as seed system choosing and extrapolation can be used to further accelerate the iterative process.
% As an attempt of a possible remedy in this situation, we present a framework that combines. shifted stationary iterative methods and shifted Krylov subspace methods. In detail, we derive the implementation of the Power method that solves the PageRank problem with multiple damping factors at almost the same computational cost of the standard Power method for solving one single system. Furthermore, we demonstrate that this shifted Power method generates collinear residual vectors. Based on this result, we use the shifted Power iterations to provide smooth initial solutions for running shifted Krylov subspace methods such as GMRES. Besides, we discuss how to apply seed system choosing strategy and extrapolation techniques to further speed up the iterative process. % As an attempt of a possible remedy in this situation, we present a framework that combines. shifted stationary iterative methods and shifted Krylov subspace methods. In detail, we derive the implementation of the Power method that solves the PageRank problem with multiple damping factors at almost the same computational cost of the standard Power method for solving one single system. Furthermore, we demonstrate that this shifted Power method generates collinear residual vectors. Based on this result, we use the shifted Power iterations to provide smooth initial solutions for running shifted Krylov subspace methods such as GMRES. Besides, we discuss how to apply seed system choosing strategy and extrapolation techniques to further speed up the iterative process.
\subsection{Overview of the classical PageRank problem} \subsection{Overview of the classical PageRank problem}
The Power method is considered one of the algorithms of choice for solving either the eigenvalue \ref{eq:pr} or the linear system \ref{eq:pr2} formulation of the PageRank problem, as it was originally used by Google. Power iterations write as % The Power method is considered one of the algorithms of choice for solving either the eigenvalue \ref{eq:pr} or the linear system \ref{eq:pr2} formulation of the PageRank problem, as it was originally used by Google. Power iterations write as
The Power method is a popular algorithm for solving either the eigenvalue problem in equation \ref{eq:pr} or the linear system in equation \ref{eq:pr2}, which were originally used to calculate PageRank by Google. It works by iteratively applying the matrix $A$ to an initial estimate of the solution, using the following formula:
\begin{equation}\label{eq:power} \begin{equation}\label{eq:power}
x_{(k+1)} = Ax_k =\alpha \tilde P x_{(k)} + (1 - \alpha)v x_{(k+1)} = Ax_k =\alpha \tilde P x_{(k)} + (1 - \alpha)v
\end{equation} \end{equation}

Binary file not shown.

@ -49,7 +49,7 @@
\maketitle \maketitle
\begin{abstract} \begin{abstract}
\noindent In the years following its publication in 1998, the PageRank model has been studied deeply to be extended in fields such as chemistry, biology and social network analysis. The aim of this project is the implementation of a modified version of the Power method to solve the PageRank problem with multiple damping factors as proposed in \cite{SHEN2022126799}. \noindent In the years following its publication in 1998, the PageRank model has been studied deeply to be extended to different forms and applications. The aim of this project is the implementation of a modified version of the Power method to solve the PageRank problem with multiple damping factors as proposed in \cite{SHEN2022126799}. At the end, will be proposed an algorithm to solve the PageRank problem with multiple damping factors using the Shifted Power-GMRES method. This last one, has not been fully implemented yet, so the numerical results are not presented.
\end{abstract} \end{abstract}
\tableofcontents \tableofcontents

@ -1,7 +1,8 @@
\section{Numerical experiments}\label{sec:exp} \section{Numerical experiments}\label{sec:exp}
In this experiment, we test the performance of the shifted Power method against the conventional Power method for solving PageRank problems with multiple damping factors, namely $\{ \alpha_1 = 0.85, ~\alpha_2 = 0.86, ~...~ ,~ \alpha_{15} = 0.99 \}$ on the \texttt{web-stanford} and \texttt{web-BerkStan} datasets. The \texttt{web-stanford} dataset is a directed graph with $|V| = 281,903$ nodes and $|E| = 1,810,314$ edges, and the \texttt{web-BerkStan} dataset is a directed graph with $|V| = 1, 013, 320$ nodes and $|E| = 5, 308, 054$ edges. The datasets are available at \url{http://snap.stanford.edu/data/web-Stanford.html} and \url{http://snap.stanford.edu/data/web-BerkStan.html} respectively. The datasets are stored in the \texttt{.txt} edge-list format. The characteristics of the datasets are summarized in Table \ref{tab:datasets}. % In this experiment, we test the performance of the shifted Power method against the conventional Power method for solving PageRank problems with multiple damping factors, namely $\{ \alpha_1 = 0.85, ~\alpha_2 = 0.86, ~...~ ,~ \alpha_{15} = 0.99 \}$ on the \texttt{web-stanford} and \texttt{web-BerkStan} datasets. The \texttt{web-stanford} dataset is a directed graph with $|V| = 281,903$ nodes and $|E| = 1,810,314$ edges, and the \texttt{web-BerkStan} dataset is a directed graph with $|V| = 1, 013, 320$ nodes and $|E| = 5, 308, 054$ edges. The datasets are available at \url{http://snap.stanford.edu/data/web-Stanford.html} and \url{http://snap.stanford.edu/data/web-BerkStan.html} respectively. The datasets are stored in the \texttt{.txt} edge-list format. The characteristics of the datasets are summarized in Table \ref{tab:datasets}.
This experiment aims to compare the performance of the shifted Power method to the traditional Power method in solving PageRank problems involving multiple damping factors, specifically ${ \alpha_1 = 0.85, \alpha_2 = 0.86, ... , \alpha_{15} = 0.99 }$, on the \texttt{web-stanford} and \texttt{web-BerkStan} datasets. The \texttt{web-stanford} dataset consists of a directed graph with $|V| = 281,903$ nodes and $|E| = 1,810,314$ edges, while the \texttt{web-BerkStan} dataset is a directed graph with $|V| = 1, 013, 320$ nodes and $|E| = 5, 308, 054$ edges. These datasets can be found at \url{http://snap.stanford.edu/data/web-Stanford.html} and \url{http://snap.stanford.edu/data/web-BerkStan.html} respectively and are stored in the \texttt{.txt} edge-list format. A summary of the characteristics of the datasets is provided in Table \ref{tab:datasets}.
% create a table with cols: Name, Number of Nodes, Number of edges, Density, Average Number of zeros (per row) % create a table with cols: Name, Number of Nodes, Number of edges, Density, Average Number of zeros (per row)
\begin{table}[h] \begin{table}[h]
@ -16,7 +17,7 @@ In this experiment, we test the performance of the shifted Power method against
\label{tab:datasets} \label{tab:datasets}
\end{table} \end{table}
\noindent The personalization vector $v$ has been set to $v = [1, 1, ... , 1]^T/n$. All the experiments are run in Python 3.10 on a 64-bit Arch Linux machine with an AMD Ryzen™ 5 2600 Processor and 16 GB of RAM. \noindent In this study, the personalization vector $v$ was set to $v = [1, 1, ... , 1]^T/n$. All experiments were conducted using Python 3.10 on a 64-bit Arch Linux machine equipped with an AMD Ryzen™ 5 2600 Processor and 16 GB of RAM.
\subsection{Technical details} \subsection{Technical details}
@ -25,11 +26,11 @@ In this experiment, we test the performance of the shifted Power method against
\url{https://github.com/lukefleed/ShfitedPowGMRES} \url{https://github.com/lukefleed/ShfitedPowGMRES}
\end{problem} \end{problem}
\noindent In the project github repository we can find an \texttt{algo.py} file where all the functions used in the experiments are implemented. The \texttt{algo.py} file contains the following functions: \noindent In the GitHub repository for this project, there is an \texttt{algo.py} file which contains the implementation of all the functions used in the experiments. The \texttt{algo.py} file includes the following functions:
\paragraph{load\_data} This function loads the datasets from the \texttt{.txt} edge-list format and returns a networkx graph object. It takes as input a literal, the options are \texttt{web-stanford} and \texttt{web-BerkStan}. \paragraph{load\_data} This function loads datasets from the \texttt{.txt} edge-list format and returns a networkx graph object. It takes a string as input, with the options being \texttt{web-stanford} and \texttt{web-BerkStan}.
\paragraph{pagerank} This function computes the PageRank vector of a given graph. It takes as input the following parameters: \paragraph{pagerank} Returns the PageRank of the nodes in the graph. It takes as input the following parameters:
\begin{itemize} \begin{itemize}
\item \texttt{G:} a networkx graph object. \item \texttt{G:} a networkx graph object.
\item \texttt{alpha:} Damping parameter for PageRank, default=$0.85$. \item \texttt{alpha:} Damping parameter for PageRank, default=$0.85$.
@ -42,21 +43,19 @@ In this experiment, we test the performance of the shifted Power method against
\end{itemize} \end{itemize}
This function is strongly based on the \texttt{pagerank\_scipy} function of the networkx library. This function is strongly based on the \texttt{pagerank\_scipy} function of the networkx library.
\paragraph{shifted\_pow\_pagerank}: This is the implementation of the algorithm \ref{alg:algo1} with the difference that I am using the $l1$ norm since the $l2$ norm is still not implemented for sparse matrices in SciPy. \paragraph{shifted\_pow\_pagerank}: This is the implementation of algorithm \ref{alg:algo1} with the modification of using the $l1$ norm instead of the $l2$ norm, which is not yet implemented for sparse matrices in SciPy. \vspace{0.5cm}
\vspace{0.5cm} \noindent There is also another function called \texttt{pagerank\_numpy} which utilizes NumPy's interface to the \texttt{LAPACK} eigenvalue solvers for the calculation of the eigenvector. This method is the fastest and most accurate for small graphs. However, the eigenvector calculation is not stable for large graphs, so the \texttt{pagerank\_numpy} function is not used in the experiments.
\noindent There are is also another function called \texttt{pagerank\_numpy}. The eigenvector calculation uses NumPy's interface to the \texttt{LAPACK} eigenvalue solvers. This will be the fastest and most accurate for small graphs. Unfortunately, the eigenvector calculation is not stable for large graphs. Therefore, the \texttt{pagerank\_numpy} function is not used in the experiments.
\subsection{Convergence results for the Shifted Power method} \subsection{Convergence results for the Shifted Power method}
In the PageRank formulation with multiple damping factors, the iterative solution of each $i-th$ linear system is started from the initial guess $x_0^{(i)} = v$ and it's stopped when either the solution $x_k^{(i)}$ satisfies In the PageRank formulation involving multiple damping factors, the iterative solution of each $i$-th linear system is initialized with the initial guess $x_0^{(i)} = v$ and is terminated when the solution $x_k^{(i)}$ meets the following criteria:
\begin{equation*} \begin{equation*}
\frac{\lVert (1 - \alpha_i)v - (I - \alpha_i \tilde P x_k^{(i)} \rVert_2}{\lVert x_k^{(i)} \rVert_2} < 10^{-6} \frac{\lVert (1 - \alpha_i)v - (I - \alpha_i \tilde P x_k^{(i)} \rVert_2}{\lVert x_k^{(i)} \rVert_2} < 10^{-6}
\end{equation*} \end{equation*}
or the number of matrix-vector products exceeds $200$. \vspace*{0.5cm} or the number of matrix-vector products exceeds $200$. \vspace*{0.5cm}
\noindent In this experiment we test the performance of the shifted Power method against the conventional Power method for solving PageRank problems with multiple damping factors. \noindent In this experiment, the performance of the shifted Power method is compared to that of the traditional Power method in solving PageRank problems with multiple damping factors.
% create a table to store the results on each dataset for the two methods. We are interest in the mv and cpu time % create a table to store the results on each dataset for the two methods. We are interest in the mv and cpu time
\begin{table}[h] \begin{table}[h]
@ -76,6 +75,8 @@ or the number of matrix-vector products exceeds $200$. \vspace*{0.5cm}
\label{tab:results} \label{tab:results}
\end{table} \end{table}
\noindent The results presented on table \ref{tab:results} are a bit in contrast compared to what the paper \cite{SHEN2022126799} reports. In their experiment the CPU time of the shifted power method is lower then the one of the standard power method. However, in our experiments the CPU time of the shifted power method is far higher then the one of the standard power method. Furthermore, theoretically, the number of matrix-vector products should be lower for the shifted power method, in particular it should be equal to the one of the standard PageRank algorithm with the biggest damping factor. However, in our experiments the number of matrix-vector products is higher for the shifted power method for the dataset \texttt{web-BerkStan} and lower for the dataset \texttt{web-Stanford}. \vspace*{0.5cm} % \noindent The results presented on table \ref{tab:results} are a bit in contrast compared to what the paper \cite{SHEN2022126799} reports. In their experiment the CPU time of the shifted power method is lower then the one of the standard power method. However, in our experiments the CPU time of the shifted power method is far higher then the one of the standard power method. Furthermore, theoretically, the number of matrix-vector products should be lower for the shifted power method, in particular it should be equal to the one of the standard PageRank algorithm with the biggest damping factor. However, in our experiments the number of matrix-vector products is higher for the shifted power method for the dataset \texttt{web-BerkStan} and lower for the dataset \texttt{web-Stanford}. \vspace*{0.5cm}
\noindent The results presented in Table \ref{tab:results} differ somewhat from those reported in the study by Shen et al. \cite{SHEN2022126799}, where the CPU time of the shifted Power method was found to be lower than that of the standard Power method. In contrast, our experiments showed that the CPU time of the shifted Power method was significantly higher than that of the standard Power method. Additionally, it is theoretically expected that the number of matrix-vector products should be lower for the shifted Power method, specifically equal to that of the standard PageRank algorithm with the highest damping factor. However, our experiments found that the number of matrix-vector products was higher for the shifted Power method on the \texttt{web-BerkStan} dataset and lower on the \texttt{web-Stanford} dataset. \vspace*{0.5cm}
\noindent The reasons to those differences in results may be a lot. I think that the most plausible reason is the difference in programming language and implementation, combined with a possibility of misunderstanding of the pseudo-code presented in \cite{SHEN2022126799}. My standard PageRank function is a slightly modified version of the network library function \texttt{pagerank\_scipy}, so I suppose that is better optimized in comparison to the shifted power method implementation that I wrote. Also, the network \texttt{Web-BerkStan} is very different from the \texttt{web-stanford} one. The adjacency matrix relative to the first one, has a lot of rows full of zeros in comparison to the second one ($4744$ vs $172$). This might effect negatively the shifted power method for this specific cases of networks with a lot of dangling nodes. \vspace*{0.5cm} % \noindent The reasons to those differences in results may be a lot. I think that the most plausible reason is the difference in programming language and implementation, combined with a possibility of misunderstanding of the pseudo-code presented in \cite{SHEN2022126799}. My standard PageRank function is a slightly modified version of the network library function \texttt{pagerank\_scipy}, so I suppose that is better optimized in comparison to the shifted power method implementation that I wrote. Also, the network \texttt{Web-BerkStan} is very different from the \texttt{web-stanford} one. The adjacency matrix relative to the first one, has a lot of rows full of zeros in comparison to the second one ($4744$ vs $172$). This might effect negatively the shifted power method for this specific cases of networks with a lot of dangling nodes. \vspace*{0.5cm}
\noindent There could be various reasons for the discrepancies in the results. One potential explanation is the difference in programming language and implementation, as well as the possibility of a misunderstanding of the pseudo-code provided in \cite{SHEN2022126799}. It is also possible that the standard PageRank function, which is a slightly modified version of the network library function \texttt{pagerank\_scipy}, is better optimized compared to the implementation of the shifted Power method written for this study. Additionally, the \texttt{Web-BerkStan} network is quite different from the \texttt{web-stanford} network, with the adjacency matrix for the former containing many rows with a large number of zeros compared to the latter ($4744$ vs $172$). This could potentially have a negative impact on the performance of the shifted Power method for networks with a significant number of dangling nodes.

@ -1,11 +1,11 @@
\clearpage \clearpage
\section{Shifted power-GMRES method} \section{Shifted power-GMRES method}
In this section we'll cover the approach that the authors in \cite{SHEN2022126799} used to combine the shifted power method with the fast shifted \texttt{GMRES} method to create an hybrid algorithm for solving complex PageRank problems with multiple damping factors. This section discusses the approach used by the authors of \cite{SHEN2022126799} to combine the shifted power method with the fast shifted \texttt{GMRES} method to create a hybrid algorithm for solving complex PageRank problems with multiple damping factors. The goal of this combination is to create an efficient and reliable algorithm for solving these types of problems. The details of this approach and how it was implemented are described in the cited paper.
\subsection{Restarted GMRES method} \subsection{Restarted GMRES method}
The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-symmetric Krylov subspace solver based on the Arnoldi decomposition procedure, that the authors sketch in the following algorithm The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-symmetric Krylov subspace solver based on the Arnoldi decomposition procedure, that the authors sketch in algorithm \ref{alg:arnoldi}
\begin{algorithm}[H] \begin{algorithm}[H]
\caption{Arnoldi} \caption{Arnoldi}
@ -33,11 +33,32 @@ The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-sy
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm} % \noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm}
\noindent The Arnoldi procedure, which is used as the basis for the GMRES method, involves iteratively constructing an orthogonal basis $V_m = [v_1, \dots, v_m]$ and an upper Hessenberg matrix $H_m \in \R^{m\times m}$ from an initial vector $v_0 \in \R ^{n \times 1}$ and a matrix $A \in \R^{n\times n}$. After $m$ iterations, it also produces a residual vector $v_{m+1} \in \R^{n \times 1}$ and a residual norm $h_{m+1,m} \in \R$. The GMRES method then uses these to approximate the solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. To do this, it starts with an initial guess $x_0$ and a residual vector $v_0 = b$, and runs $m$ steps of the Arnoldi procedure outlined in algorithm \ref{alg:arnoldi}. \vspace*{0.4cm}
\paragraph{Implementation:} On the github res % \noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}.
% \noindent The accuracy of the approximate solution $\tilde x$ produced by the GMRES method depends significantly on the dimension $m$ of the search space used. In order to improve the efficiency of this method for solving the PageRank problem with multiple damping factors, the authors of \cite{SHEN2022126799} propose using a shifted-GMRES algorithm as an extension of the standard GMRES one, that can use the same Krylov subspace to solve the whole sequence of shifted linear systems simultaneously. The idea behind this approach is to run standard GMRES on a seed system and approximate the solutions for the other systems as byproducts. This is possible due to the shift-invariance property of the Krylov subspace, which allows the use of a single Krylov subspace for all the shifted systems as long as the residual vectors are collinear with one another. The authors present a specific algorithm for implementing this approach in Algorithm \ref{alg:shifted_GMRES}.
\noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}. % \noindent The shifted GMRES method proposed by Frommer and Glassner is an extension of the standard GMRES algorithm that can use the same Krylov subspace to solve the whole sequence of shifted linear systems
% \begin{equation*}
% (A + \sigma_i I)x_i = b \qquad (\sigma_i \in \R, b_i \in \R^{n\times 1}, i = \dots s)
% \end{equation*}
% simultaneously. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace. That enables us to use only one Krylov subspace to solve the whole sequence at once provided that the initial residual vectors of the shifted linear systems are collinear one another. However, after $m$ steps of Algorithm \ref{alg:arnoldi} the residuals vectors $r_m^i ~ (i = 2, 3, \dots, s)$ of the additional systems and the residual vector $r_m^1$ of the seed system will in general lose the collinearity, and consequently the shift-invariance property of the search space will not be maintained
% \begin{equation*}
% \mathcal{K}_m(A + \sigma_i I, r_m^i) \neq \mathcal{K}_m(A + \sigma_1 I, r_m^1) \qquad i = 2, 3, \dots, s
% \end{equation*}
% This means: upon restarting the GMRES algorithm, the mutual collinearity of all the residual vectors needs to be enforced explicitly. More precisely, let us assume that the initial residual vectors are collinear, i.e. $r_0^i = \gamma_0^i r_0^1 ~~ (\gamma_0^i \in \R, ~ i = 2, 3, \dots, s)$ where $r_0^i$ is the initial residual vector corresponding
% to the $i$-th system
\noindent The modified GMRES algorithm represents an extension of the classical GMRES method, allowing for the solution of a series of shifted linear systems using the same Krylov subspace.
\begin{equation*}
(A + \sigma_i I)x_i = b \qquad (\sigma_i \in \R, b_i \in \R^{n\times 1}, i = \dots s)
\end{equation*}
The central concept behind the shifted GMRES method is the utilization of the standard GMRES algorithm on a seed system, with the solutions to the other systems being approximated as byproducts. This approach is made possible through the shift-invariance property of the Krylov subspace, which allows for the utilization of a single Krylov subspace to solve the entire sequence of systems provided that the initial residual vectors of the shifted linear systems are collinear with one another. However, after a certain number of steps of the Arnoldi algorithm, the residual vectors of the additional systems and the residual vector of the seed system will typically lose collinearity, resulting in the loss of the shift-invariance property of the search space.
\begin{equation*}
\mathcal{K}_m(A + \sigma_i I, r_m^i) \neq \mathcal{K}_m(A + \sigma_1 I, r_m^1) \qquad i = 2, 3, \dots, s
\end{equation*}
This implies that the mutual collinearity of all the residual vectors must be enforced explicitly upon restarting the GMRES algorithm. Specifically, if we assume that the initial residual vectors are collinear, meaning $r_0^i = \gamma_0^i r_0^1$ for $i = 2, 3, \dots, s$ where $r_0^i$ is the initial residual vector corresponding to the $i$-th system and $\gamma_0^i \in \R$, then the shift-invariance property of the Krylov subspace can be maintained.
\begin{algorithm}[H] \begin{algorithm}[H]
\caption{Shifted GMRES} \caption{Shifted GMRES}
@ -72,6 +93,6 @@ The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-sy
\noindent Where $z = \beta e_1 - H_m^1 y_m^1$. In line 15, by solving this small size system, we can obtain the vector $y_m^i$ and scalar $\gamma_m^i$ that ensures the collinearity of the shifted results. \noindent Where $z = \beta e_1 - H_m^1 y_m^1$. In line 15, by solving this small size system, we can obtain the vector $y_m^i$ and scalar $\gamma_m^i$ that ensures the collinearity of the shifted results.
\paragraph{Problems:} The implementation of this algorithm has been very problematic. The key of this algorithm is the use of the \emph{seed choosing strategy} described in \cite{SHEN2022126799}. However, during my tests, after the second iteration, the $k$ value remains the same and the $res$ vector does not change. This leads obviously to a stall situation, where the program runs without updating the values until it reaches the maximum number of iterations allowed. This problem is still under investigation. I have provided anyway a notebook in the github repository with the code of the algorithm for completeness, even if it's still not working. I think that the problem is related to some misunderstanding of the algorithm provided in the pseudo-code, but I have not been able to find it yet. For this reason, there won't be any tests results for this algorithm in the following section. \paragraph{Problems:} There have been significant issues with the implementation of the shifted GMRES algorithm. A key component of the algorithm is the use of the seed choosing strategy outlined in \cite{SHEN2022126799}. However, during testing, it was observed that after the second iteration, the value of $k$ remained constant and the $res$ vector did not change, resulting in a stall situation where the program runs without updating values until the maximum number of iterations is reached. The cause of this problem is currently under investigation. Although a notebook containing the code for the algorithm has been included in the GitHub repository for completeness, it is not functional at this time. It is believed that the issue may be related to a misunderstanding of the algorithm as presented in the pseudo-code, but this has not yet been determined. As a result, there will be no test results for this algorithm in the following section.

@ -1,9 +1,11 @@
\section{The shifted power method for PageRank computations} \section{The shifted power method for PageRank computations}
In this section we'll see the extensions of stationary iterative methods for the solution of PageRank problems with multiple damping factors, as presented in \cite{SHEN2022126799}. We are interested in knowing if, for each method, there exists an implementation such that the computational cost of solving the PageRank problem with multiple damping factor is comparable to that of solving the ordinary PageRank problem with single damping factor. % In this section we'll see the extensions of stationary iterative methods for the solution of PageRank problems with multiple damping factors, as presented in \cite{SHEN2022126799}. We are interested in knowing if, for each method, there exists an implementation such that the computational cost of solving the PageRank problem with multiple damping factor is comparable to that of solving the ordinary PageRank problem with single damping factor.
This section presents the adaptations of stationary iterative methods for solving PageRank problems with multiple damping factors, as described in \cite{SHEN2022126799}. The goal is to determine if there are implementations of these methods that have a computational cost similar to that of solving a standard PageRank problem with a single damping factor when applied to the problem with multiple damping factors. In other words, we want to know if these methods are efficient for solving PageRank problems with multiple damping factors.
\subsection{The implementation of the shifted power method} \subsection{The implementation of the shifted power method}
Inspired by the reason why shifted Krylov subspaces can save computational cost, the authors of \cite{SHEN2022126799} investigate whether there are duplications in the calculations of multiple linear systems in this problem class by the stationary iterative methods, so that the duplications in the computation can be deleted and used for all systems. It's some sort of dynamic programming approach. Firstly, they analyze the Power method applied to the sequence of linear systems in \ref{eq:pr2}. It computes at the \emph{k-th} iteration approximate solutions $x_k^{(i)} (1 \leq i \leq s)$ of the form % Inspired by the reason why shifted Krylov subspaces can save computational cost, the authors of \cite{SHEN2022126799} investigate whether there are duplications in the calculations of multiple linear systems in this problem class by the stationary iterative methods, so that the duplications in the computation can be deleted and used for all systems. It's some sort of dynamic programming approach. Firstly, they analyze the Power method applied to the sequence of linear systems in \ref{eq:pr2}. It computes at the \emph{k-th} iteration approximate solutions $x_k^{(i)} (1 \leq i \leq s)$ of the form
The authors of \cite{SHEN2022126799} were motivated by the idea that shifted Krylov subspaces can save computational cost by reducing duplications in the calculations of multiple linear systems. They therefore sought to determine if there were similar opportunities for optimization in the case of stationary iterative methods applied to the PageRank problem with multiple damping factors. To do this, they used a dynamic programming approach, in which they analyzed the Power method applied to the sequence of linear systems in equation \ref{eq:pr2}. This method computes approximate solutions $x_k^{(i)} (1 \leq i \leq s)$ at the $k^{th}$ iteration of the form:
\begin{equation} \begin{equation}
x_k^{(i)} = \alpha_i^k \tilde P^k x_k^{(i)} + (1 - \alpha_i^k) \sum_{j=0}^{k-1} \alpha_i^j \tilde P^j v x_k^{(i)} = \alpha_i^k \tilde P^k x_k^{(i)} + (1 - \alpha_i^k) \sum_{j=0}^{k-1} \alpha_i^j \tilde P^j v
\end{equation} \end{equation}
@ -64,11 +66,7 @@ Since in general each of the $s$ linear systems may require a different number o
\end{algorithmic} \end{algorithmic}
\end{algorithm} \end{algorithm}
\noindent Where $mv$ is an integer that counts the number of matrix-vector products performed by the algorithm. The algorithm stops when either all the residual norms are smaller than the tolerance $\tau$ or the maximum number of matrix-vector products is reached. An implementation of this algorithm written in Python is available in the github repository of this project. % \noindent Where $mv$ is an integer that counts the number of matrix-vector products performed by the algorithm. The algorithm stops when either all the residual norms are smaller than the tolerance $\tau$ or the maximum number of matrix-vector products is reached. An implementation of this algorithm written in Python is available in the github repository of this project.
\noindent The algorithm stops when either all the residual norms (a measure of how close the current estimate is to the true solution) are smaller than a specified tolerance $\tau$, or when the maximum number of matrix-vector products (multiplication of a matrix by a vector) has been reached. The integer $mv$ counts the number of matrix-vector products performed by the algorithm. An implementation of this algorithm, written in Python, is available in the corresponding github repository for the project.
\clearpage \clearpage
Loading…
Cancel
Save