Small fixes to avoid path errors

2 years ago · 378fa96686
parent f31bab3543
commit 378fa96686
6 changed files with 11 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@ -276,5 +276,4 @@ TSWLatexianTemp*
 *.lpz
 data/
 __pycache__/
--- a/README.md
+++ b/README.md
@ -8,7 +8,7 @@ This repository contains the code of my attempt to replicate the results obtaine
 pip install -r requirements.txt
 ```
-At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program, go into the folder `src/` and execute the `./main.py` file. It takes as input two arguments:
+At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program we need to execute the `main.py` file. It takes as input two arguments:
 - `--dataset`: the options are `BerkStan` and `Stanford`. This commands selects the web-graph to run the algorithms on. 
 - `--algo`: the options are `power`, `shifted`, `both`. If you choose the last option, it will first run the standard power method and then the shifted one. 
@ -16,7 +16,6 @@ At the moment, the standard and shifted power method to compute the PageRank wit
 Here an example of what's described above.
 ```bash
 cd src
 sudo chmod +x main.py
 ```
--- a/src/algo.py
+++ b/src/algo.py
@ -42,29 +42,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
    """
    # check if there is a data folder
-    if not exists(os.path.join(os.getcwd(), "data")):
+    if not exists(os.path.join("data")):
-        os.mkdir(os.path.join(os.getcwd(), "data"))
+        os.mkdir(os.path.join("data"))
    if dataset not in ["Stanford", "NotreDame", "BerkStan"]:
        raise ValueError("Invalid dataset. Please choose a valid dataset.")
    # Download the dataset
-    if not exists(f"../data/Web-{dataset}.txt.gz"):
+    if not exists(f"data/Web-{dataset}.txt.gz"):
        print(f"\nDownloading the dataset {dataset}...")
        wget.download(f"http://snap.stanford.edu/data/web-{dataset}.txt.gz", out=f"data/Web-{dataset}.txt.gz")
    else:
        print(f"\nThe dataset {dataset} is already downloaded.")
    # unzip the dataset
-    if not exists(f"../data/Web-{dataset}.txt"):
+    if not exists(f"data/Web-{dataset}.txt"):
        print(f"\nUnzipping the dataset {dataset}...")
-        with gzip.open(f"../data/Web-{dataset}.txt.gz", "rb") as f_in:
+        with gzip.open(f"data/Web-{dataset}.txt.gz", "rb") as f_in:
-            with open(f"../data/Web-{dataset}.txt", "wb") as f_out:
+            with open(f"data/Web-{dataset}.txt", "wb") as f_out:
                f_out.write(f_in.read())
    # create the graph
    print(f"\nCreating the graph of the dataset {dataset}...\n")
-    G_dataset = nx.read_edgelist(f"../data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int)
+    G_dataset = nx.read_edgelist(f"data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int)
    print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}")
    print(f"\tNumber of edges: {G_dataset.number_of_edges()}")
@ -430,8 +429,6 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
    raise nx.PowerIterationFailedConvergence(max_iter)
 def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):
    """
--- a/src/main.py
+++ b/src/main.py
@ -41,7 +41,7 @@ def run_standard_pagerank(G, alphas):
 def run_shifted_powe(G, alphas):
-    print("\nStarting the shifted pagerank algorithm...\n")
+    print("\nStarting the SHIFTED PAGERANK ALGORITHM...\n")
    start2 = time.time()
    x, mv, alphas, tol = shifted_pow_pagerank(G, alphas, tol=1e-6)
--- a/tex/main.pdf
+++ b/tex/main.pdf
--- a/tex/shift_GMRES.tex
+++ b/tex/shift_GMRES.tex
@ -35,6 +35,8 @@ The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-sy
 \noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm}
 \paragraph{Implementation:} On the github res
 \noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}.
 \begin{algorithm}[H]