Small fixes to avoid path errors

main
Luca Lombardo 2 years ago
parent f31bab3543
commit 378fa96686

1
.gitignore vendored

@ -276,5 +276,4 @@ TSWLatexianTemp*
*.lpz *.lpz
data/ data/
__pycache__/ __pycache__/

@ -8,7 +8,7 @@ This repository contains the code of my attempt to replicate the results obtaine
pip install -r requirements.txt pip install -r requirements.txt
``` ```
At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program, go into the folder `src/` and execute the `./main.py` file. It takes as input two arguments: At the moment, the standard and shifted power method to compute the PageRank with multiple damping factors are fully implemented (as described in `[1]`). To run the program we need to execute the `main.py` file. It takes as input two arguments:
- `--dataset`: the options are `BerkStan` and `Stanford`. This commands selects the web-graph to run the algorithms on. - `--dataset`: the options are `BerkStan` and `Stanford`. This commands selects the web-graph to run the algorithms on.
- `--algo`: the options are `power`, `shifted`, `both`. If you choose the last option, it will first run the standard power method and then the shifted one. - `--algo`: the options are `power`, `shifted`, `both`. If you choose the last option, it will first run the standard power method and then the shifted one.
@ -16,7 +16,6 @@ At the moment, the standard and shifted power method to compute the PageRank wit
Here an example of what's described above. Here an example of what's described above.
```bash ```bash
cd src
sudo chmod +x main.py sudo chmod +x main.py
``` ```

@ -42,29 +42,28 @@ def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph
""" """
# check if there is a data folder # check if there is a data folder
if not exists(os.path.join(os.getcwd(), "data")): if not exists(os.path.join("data")):
os.mkdir(os.path.join(os.getcwd(), "data")) os.mkdir(os.path.join("data"))
if dataset not in ["Stanford", "NotreDame", "BerkStan"]: if dataset not in ["Stanford", "NotreDame", "BerkStan"]:
raise ValueError("Invalid dataset. Please choose a valid dataset.") raise ValueError("Invalid dataset. Please choose a valid dataset.")
# Download the dataset # Download the dataset
if not exists(f"../data/Web-{dataset}.txt.gz"): if not exists(f"data/Web-{dataset}.txt.gz"):
print(f"\nDownloading the dataset {dataset}...") print(f"\nDownloading the dataset {dataset}...")
wget.download(f"http://snap.stanford.edu/data/web-{dataset}.txt.gz", out=f"data/Web-{dataset}.txt.gz") wget.download(f"http://snap.stanford.edu/data/web-{dataset}.txt.gz", out=f"data/Web-{dataset}.txt.gz")
else: else:
print(f"\nThe dataset {dataset} is already downloaded.") print(f"\nThe dataset {dataset} is already downloaded.")
# unzip the dataset # unzip the dataset
if not exists(f"../data/Web-{dataset}.txt"): if not exists(f"data/Web-{dataset}.txt"):
print(f"\nUnzipping the dataset {dataset}...") print(f"\nUnzipping the dataset {dataset}...")
with gzip.open(f"../data/Web-{dataset}.txt.gz", "rb") as f_in: with gzip.open(f"data/Web-{dataset}.txt.gz", "rb") as f_in:
with open(f"../data/Web-{dataset}.txt", "wb") as f_out: with open(f"data/Web-{dataset}.txt", "wb") as f_out:
f_out.write(f_in.read()) f_out.write(f_in.read())
# create the graph # create the graph
print(f"\nCreating the graph of the dataset {dataset}...\n") print(f"\nCreating the graph of the dataset {dataset}...\n")
G_dataset = nx.read_edgelist(f"../data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int) G_dataset = nx.read_edgelist(f"data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int)
print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}") print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}")
print(f"\tNumber of edges: {G_dataset.number_of_edges()}") print(f"\tNumber of edges: {G_dataset.number_of_edges()}")
@ -430,8 +429,6 @@ def pagerank(G, alpha=0.85, personalization=None, max_iter=10000, tol=1.0e-9, ns
raise nx.PowerIterationFailedConvergence(max_iter) raise nx.PowerIterationFailedConvergence(max_iter)
def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9): def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=10000, tol=1.0e-9):
""" """

@ -41,7 +41,7 @@ def run_standard_pagerank(G, alphas):
def run_shifted_powe(G, alphas): def run_shifted_powe(G, alphas):
print("\nStarting the shifted pagerank algorithm...\n") print("\nStarting the SHIFTED PAGERANK ALGORITHM...\n")
start2 = time.time() start2 = time.time()
x, mv, alphas, tol = shifted_pow_pagerank(G, alphas, tol=1e-6) x, mv, alphas, tol = shifted_pow_pagerank(G, alphas, tol=1e-6)

Binary file not shown.

@ -35,6 +35,8 @@ The Restarted GMRES method (hereafter referred to as GMRES in short) is a non-sy
\noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm} \noindent Where $A \in \R^{n\times n}$ and $v_0 \in \R ^{n \times 1}$ is the initial vector. After $m$ iterations, the Arnoldi procedure produces the orthogonal basis $V_m = [v_1, \dots, v_m]$ and the upper Hessenberg matrix $H_m \in \R^{m\times m}$, and the residual vector $v_{m+1} \in \R^{n \times 1}$ and the residual norm $h_{m+1,m} \in \R$. Starting from $v_0 = b - Ax_0$ with an initial guess $x_0$, after running $m$ steps of the algorithm \ref{alg:arnoldi}, the \texttt{GMRES} method produces the approximate solution $\tilde x$ of the linear system $Ax = b$ that minimizes the residual norm $\lVert b - Ax \rVert$ in the Krylov subspace of dimension $m$. \vspace*{0.4cm}
\paragraph{Implementation:} On the github res
\noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}. \noindent We know that the accuracy of the approximate solution $\tilde x$ of \texttt{GMRES} depends heavily on the dimension $m$ of the search space. The authors in \cite{SHEN2022126799} propose to use the \texttt{GMRES} method as a preconditioner for the shifted power method presented in the previous section. The core idea of the method is to run standard GMRES on a seed system and to approximate the other solutions as by products. The theoretical basis is the shift-invariance property of the Krylov subspace that enables us to use only one Krylov subspace for all the shifted systems, provided that the residual vectors are collinear to one other. The algorithm proposed by the authors is presented in Algorithm \ref{alg:shifted_GMRES}.
\begin{algorithm}[H] \begin{algorithm}[H]

Loading…
Cancel
Save