diff --git a/.gitignore b/.gitignore index 46be8fe..c92ca66 100644 --- a/.gitignore +++ b/.gitignore @@ -276,5 +276,4 @@ TSWLatexianTemp* *.lpz data/ - __pycache__/ diff --git a/README.md b/README.md index c170f85..a8215f9 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This repository contains the code of my attempt to replicate the results obtaine pip install -r requirements.txt ``` -At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program, go into the folder `src/` and execute the `./main.py` file. It takes as input two arguments: +At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program we need to execute the `main.py` file. It takes as input two arguments: - `--dataset`: the options are `BerkStan` and `Stanford`. This commands selects the web-graph to run the algorithms on. - `--algo`: the options are `power`, `shifted`, `both`. If you choose the last option, it will first run the standard power method and then the shifted one. @@ -16,7 +16,6 @@ At the moment, the standard and shifted power method to compute the PageRank wit Here an example of what's described above. ```bash -cd src sudo chmod +x main.py ``` diff --git a/src/algo.py b/algo.py similarity index 97% rename from src/algo.py rename to algo.py index cf05e51..1add09e 100644 --- a/src/algo.py +++ b/algo.py @@ -42,29 +42,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph """ # check if there is a data folder - if not exists(os.path.join(os.getcwd(), "data")): - os.mkdir(os.path.join(os.getcwd(), "data")) - + if not exists(os.path.join("data")): + os.mkdir(os.path.join("data")) if dataset not in ["Stanford", "NotreDame", "BerkStan"]: raise ValueError("Invalid dataset. Please choose a valid dataset.") # Download the dataset - if not exists(f"../data/Web-{dataset}.txt.gz"): + if not exists(f"data/Web-{dataset}.txt.gz"): print(f"\nDownloading the dataset {dataset}...") wget.download(f"http://snap.stanford.edu/data/web-{dataset}.txt.gz", out=f"data/Web-{dataset}.txt.gz") else: print(f"\nThe dataset {dataset} is already downloaded.") # unzip the dataset - if not exists(f"../data/Web-{dataset}.txt"): + if not exists(f"data/Web-{dataset}.txt"): print(f"\nUnzipping the dataset {dataset}...") - with gzip.open(f"../data/Web-{dataset}.txt.gz", "rb") as f_in: - with open(f"../data/Web-{dataset}.txt", "wb") as f_out: + with gzip.open(f"data/Web-{dataset}.txt.gz", "rb") as f_in: + with open(f"data/Web-{dataset}.txt", "wb") as f_out: f_out.write(f_in.read()) # create the graph print(f"\nCreating the graph of the dataset {dataset}...\n") - G_dataset = nx.read_edgelist(f"../data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int) + G_dataset = nx.read_edgelist(f"data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int) print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}") print(f"\tNumber of edges: {G_dataset.number_of_edges()}") @@ -430,8 +429,6 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns raise nx.PowerIterationFailedConvergence(max_iter) - - def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9): """ diff --git a/src/main.py b/main.py old mode 100644 new mode 100755 similarity index 97% rename from src/main.py rename to main.py index 95e539f..f03d704 --- a/src/main.py +++ b/main.py @@ -41,7 +41,7 @@ def run_standard_pagerank(G, alphas): def run_shifted_powe(G, alphas): - print("\nStarting the shifted pagerank algorithm...\n") + print("\nStarting the SHIFTED PAGERANK ALGORITHM...\n") start2 = time.time() x, mv, alphas, tol = shifted_pow_pagerank(G, alphas, tol=1e-6) diff --git a/tex/main.pdf b/tex/main.pdf index a06565d..7f6e9ac 100644 Binary files a/tex/main.pdf and b/tex/main.pdf differ diff --git a/tex/shift_GMRES.tex b/tex/shift_GMRES.tex index 800805d..c43f2a1 100644 --- a/tex/shift_GMRES.tex +++ b/tex/shift_GMRES.tex @@ -35,6 +35,8 @@ The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-sy \noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm} +\paragraph{Implementation:} On the github res + \noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}. \begin{algorithm}[H]