import os
import wget
import gzip
import time
import scipy as sp
import numpy as np
import pandas as pd
import networkx as nx
from os.path import exists
from typing import Literal

def load_data(dataset: Literal["Stanford", "NotreDame", "BerkStan"]) -> nx.Graph:
    """Load the dataset and return a graph.

    Parameters
    ----------
    dataset : Literal["Stanford", "BerkStan", "NotreDame"]
        The dataset to load.

    Returns
    -------
    nx.Graph
        The graph of the dataset loaded.

    Raises
    ------
    ValueError
        If the dataset is not valid.

    Notes
    -----
    The datasets are downloaded from the following link:

    http://snap.stanford.edu/data/web-NotreDame.html
    http://snap.stanford.edu/data/web-Stanford.html
    http://snap.stanford.edu/data/web-BerkStan.html

    If the dataset is already downloaded, it is not downloaded again.
    """

    # check if there is a data folder
    if not exists(os.path.join("data")):
        os.mkdir(os.path.join("data"))
    if dataset not in ["Stanford", "NotreDame", "BerkStan"]:
        raise ValueError("Invalid dataset. Please choose a valid dataset.")

    # Download the dataset
    if not exists(f"data/Web-{dataset}.txt.gz"):
        print(f"\nDownloading the dataset {dataset}...")
        wget.download(f"http://snap.stanford.edu/data/web-{dataset}.txt.gz", out=f"data/Web-{dataset}.txt.gz")
    else:
        print(f"\nThe dataset {dataset} is already downloaded.")

    # unzip the dataset
    if not exists(f"data/Web-{dataset}.txt"):
        print(f"\nUnzipping the dataset {dataset}...")
        with gzip.open(f"data/Web-{dataset}.txt.gz", "rb") as f_in:
            with open(f"data/Web-{dataset}.txt", "wb") as f_out:
                f_out.write(f_in.read())

    # create the graph
    print(f"\nCreating the graph of the dataset {dataset}...\n")
    G_dataset = nx.read_edgelist(f"data/Web-{dataset}.txt", create_using=nx.DiGraph(), nodetype=int)
    print(f"\tNumber of nodes: {G_dataset.number_of_nodes()}")
    print(f"\tNumber of edges: {G_dataset.number_of_edges()}")

    return G_dataset


def google_matrix(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:

    """Returns the Google matrix of the graph. NetworkX implementation.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float
      The damping factor.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    nodelist : list, optional
      The rows and columns are ordered according to the nodes in nodelist.
      If nodelist is None, then the ordering is produced by G.nodes().

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes below). It may be common to have the dangling dict to
      be the same as the personalization dict.

    Returns
    -------
    A : NumPy matrix
       Google matrix of the graph

    Notes
    -----
    DO NOT USE THIS FUNCTION FOR LARGE GRAPHS.  It's memory intensive.

    The matrix returned represents the transition matrix that describes the
    Markov chain used in PageRank. For PageRank to converge to a unique
    solution (i.e., a unique stationary distribution in a Markov chain), the
    transition matrix must be irreducible. In other words, it must be that
    there exists a path between every pair of nodes in the graph, or else there
    is the potential of "rank sinks."

    """
    if nodelist is None:
        nodelist = list(G)

    A = nx.to_numpy_array(G, nodelist=nodelist, weight=weight)
    N = len(G)
    if N == 0:
        # TODO: Remove np.asmatrix wrapper in version 3.0
        return np.asmatrix(A)

    # Personalization vector
    if personalization is None:
        p = np.repeat(1.0 / N, N)
    else:
        p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
        if p.sum() == 0:
            raise ZeroDivisionError
        p /= p.sum()

    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
        dangling_weights /= dangling_weights.sum()

    # Assign dangling weights to any dangling nodes (nodes with no out links)    
    dangling_nodes = np.where(A.sum(axis=1) == 0)[0]

    # Assign dangling_weights to any dangling nodes (nodes with no out links)
    A[dangling_nodes] = dangling_weights

    A /= A.sum(axis=1)[:, np.newaxis]  # Normalize rows to sum to 1

    return np.asmatrix(alpha * A + (1 - alpha) * p)


def google_matrix_sparse(G, alpha=0.85, personalization=None, nodelist=None, weight="weight", dangling=None) -> np.matrix:

  """ Revised NetworkX implementation for sparse matrices. Returns the Ptilde matrix of the graph instead of the Google matrix.

    Parameters
    ----------
    G : graph
    A NetworkX graph.  Undirected graphs will be converted to a directed
    graph with two directed edges for each undirected edge.

    alpha : float
    The damping factor.

    personalization: dict, optional
    The "personalization vector" consisting of a dictionary with a
    key some subset of graph nodes and personalization value each of those.
    At least one personalization value must be non-zero.
    If not specfiied, a nodes personalization value will be zero.
    By default, a uniform distribution is used.

    nodelist : list, optional
    The rows and columns are ordered according to the nodes in nodelist.
    If nodelist is None, then the ordering is produced by G.nodes().

    weight : key, optional
    Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
    The outedges to be assigned to any "dangling" nodes, i.e., nodes without
    any outedges. The dict key is the node the outedge points to and the dict
    value is the weight of that outedge. By default, dangling nodes are given
    outedges according to the personalization vector (uniform if not
    specified) This must be selected to result in an irreducible transition
    matrix (see notes below). It may be common to have the dangling dict to
    be the same as the personalization dict.

    Returns
    -------
    A : NumPy matrix
        Google matrix of the graph

    Notes
    -----
    This matrix i strictly speaking not the Google matrix, but the Ptilde matrix, described in the paper [1]


    References
    ----------
    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022

  """
  if nodelist is None:
      nodelist = list(G)

  A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="lil", dtype=int)

  N = len(G)
  if N == 0:
      return np.asmatrix(A)

  # Personalization vector
  if personalization is None:
      p = np.repeat(1.0 / N, N)
      p = sp.sparse.lil_array(p)
  else:
      p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float)
      if p.sum() == 0:
          raise ZeroDivisionError
      p /= p.sum()

  # Dangling nodes
  if dangling is None:
      dangling_weights = np.ones(N, dtype=int)
      dangling_weights = sp.sparse.lil_array(dangling_weights, dtype=int)
  else:
      # Convert the dangling dictionary into an array in nodelist order
      dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float)
      dangling_weights /= dangling_weights.sum()

  # Assign dangling_weights to any dangling nodes (nodes with no out links). 
  A[[A.sum(axis=1)==0],:] = dangling_weights

  # Normalize rows
  row_sums = A.sum(axis=1) # row sums
  r_inv = np.power(row_sums.astype(float), -1).flatten() # inverse of row sums
  r_inv[np.isinf(r_inv)] = 0.0 # replace inf with 0
  R = sp.sparse.diags(r_inv) # create diagonal matrix
  A = R.dot(A) # normalize rows

  return A, p


def pagerank_numpy(G, alpha=0.85, personalization=None, weight="weight", dangling=None):
    """Returns the PageRank of the nodes in the graph. NetworkX implementation.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value.


    Notes
    -----
    The eigenvector calculation uses NumPy's interface to the LAPACK
    eigenvalue solvers.  This will be the fastest and most accurate
    for small graphs.

    """
    if len(G) == 0:
        return {}
    M = google_matrix(
        G, alpha, personalization=personalization, weight=weight, dangling=dangling
    )
    # use numpy LAPACK solver
    eigenvalues, eigenvectors = np.linalg.eig(M.T)
    ind = np.argmax(eigenvalues)
    # eigenvector of largest eigenvalue is at ind, normalized
    largest = np.array(eigenvectors[:, ind]).flatten().real
    norm = largest.sum()
    return dict(zip(G, map(float, largest / norm)))


def pagerank(G, alpha=0.85, personalization=None, max_iter=1000, tol=1.0e-8, nstart=None, weight="weight", dangling=None,):

    """
    Returns the PageRank of the nodes in the graph. Slighly modified NetworkX implementation.

        PageRank computes a ranking of the nodes in the graph G based on
        the structure of the incoming links. It was originally designed as
        an algorithm to rank web pages.

        Parameters
        ----------
        G : graph
        A NetworkX graph.  Undirected graphs will be converted to a directed
        graph with two directed edges for each undirected edge.

        alpha : float, optional
        Damping parameter for PageRank, default=0.85.

        personalization: dict, optional
        The "personalization vector" consisting of a dictionary with a
        key some subset of graph nodes and personalization value each of those.
        At least one personalization value must be non-zero.
        If not specfiied, a nodes personalization value will 1/N where N is the
        number of nodes in G.

        max_iter : integer, optional
        Maximum number of iterations in power method eigenvalue solver.

        tol : float, optional
        Error tolerance used to check convergence in power method solver.

        nstart : dictionary, optional
        Starting value of PageRank iteration for each node.

        weight : key, optional
        Edge data key to use as weight.  If None weights are set to 1.

        dangling: dict, optional
        The outedges to be assigned to any "dangling" nodes, i.e., nodes without
        any outedges. The dict key is the node the outedge points to and the dict
        value is the weight of that outedge. By default, dangling nodes are given
        outedges according to the personalization vector (uniform if not
        specified) This must be selected to result in an irreducible transition
        matrix (see notes under google_matrix). It may be common to have the
        dangling dict to be the same as the personalization dict.

        Returns
        -------
        pagerank : dictionary
        Dictionary of nodes with PageRank as value

        Notes
        -----
        The eigenvector calculation uses power iteration with a SciPy
        sparse matrix representation.

        Raises
        ------
        PowerIterationFailedConvergence
            If the algorithm fails to converge to the specified tolerance
            within the specified number of iterations of the power iteration
            method.
    """

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float)
    S = A.sum(axis=1) # S[i] is the sum of the weights of edges going out of node i
    S[S != 0] = 1.0 / S[S != 0] # S[i] is now the sum of the weights of edges going into node i
    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) # Q is the matrix of edge weights going into each node
    A = Q @ A # A is now the "stochastic matrix"

    # initial vector
    if nstart is None: # if no initial vector is specified, start with a uniform vector
        x = np.repeat(1.0 / N, N) # x is the vector of PageRank values
    else: # if an initial vector is specified, normalize it
        x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) # x is the vector of PageRank values
        x /= x.sum() # normalize x

    # Personalization vector
    if personalization is None: # if no personalization vector is specified, use a uniform vector
        p = np.repeat(1.0 / N, N) # p is the personalization vector
    else: # if a personalization vector is specified, normalize it
        p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) # p is the personalization vector
        if p.sum() == 0: # if the personalization vector is all zeros, use a uniform vector
            raise ZeroDivisionError
        p /= p.sum() # normalize p
    # Dangling nodes
    if dangling is None: # if no dangling nodes are specified, use a uniform vector
        dangling_weights = p # dangling_weights is the vector of dangling node weights
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) # dangling_weights is the vector of dangling node weights
        dangling_weights /= dangling_weights.sum() # normalize dangling_weights
    is_dangling = np.where(S == 0)[0] # is_dangling is the list of dangling nodes

    # power iteration: make up to max_iter iterations
    iter = 1
    for _ in range(max_iter):
        iter += 1
        xlast = x # xlast is the previous vector of PageRank values
        x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p # x is the current vector of PageRank values
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum() # err is the error between the current and previous vectors of PageRank values
        # print("Iteration: ", _, "\r", end="")

        if err < N * tol: # if the error is small enough, stop iterating
            return dict(zip(nodelist, map(float, x))), iter, tol # return the current vector of PageRank values'

    # this is a failure to converges
    raise nx.PowerIterationFailedConvergence(max_iter)


def shifted_pow_pagerank(G, alphas=[0.85, 0.9, 0.95, 0.99], max_iter=1000, tol=1.0e-8):

    """
    Compute the PageRank of each node in the graph G. Algorithm 1 in the paper [1].

    Parameters
    ----------
    G : graph
        A NetworkX graph. Undirected graphs will be converted to a directed graph.

    alphas : list, optional
        A list of alpha values to use in the shifted power method. The default is [0.85, 0.9, 0.95, 0.99].

    max_iter : integer, optional
        Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
        Error tolerance used to check convergence in power method solver.

    Returns
    -------
    pagerank : sparse matrix
        Each column of the sparse matrix is a pagerank vector for a different alpha value.

    mv : integer
        The number of matrix-vector multiplications used in the power method

    Notes
    -----
    The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. The shifted power method is described as Algorithm 1 in the paper located in the sources folders.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance

    References
    ----------
    [1] Zhao-Li Shen, Meng Su, Bruno Carpentieri, and Chun Wen. Shifted power-gmres method accelerated by extrapolation for solving pagerank with multiple damping factors. Applied Mathematics and Computation, 420:126799, 2022

    """

    N = len(G)
    if N == 0:
       raise nx.NetworkXException("Empty graph.")

    A, v = google_matrix_sparse(G)

    # initialize a sparse matrix of dimension N x len(alphas). The cols of this matrix are the page rank vectors for each alpha. Each col is v
    x = sp.sparse.lil_matrix((N, len(alphas)))
    for i in range(len(alphas)):
        # reshape v to a column vector
        v = v.reshape((N,1))
        x[:, [i]] = v

    # make v sparse
    v = sp.sparse.lil_array(v)

    mu = A @ v - v # column vector

    r = sp.sparse.lil_matrix((N, len(alphas))) # residual matrix
    res = np.ones(len(alphas)) # residual norm vector

    for i in range(len(alphas)):
        r[:,[i]] = alphas[i] * mu # residual vector for the i-th alpha
        r_i = r[:,[i]].toarray()
        # res[i] = sp.sparse.linalg.norm(r[:,[i]]) # residual norm for the i-th alpha
        res [i] = np.linalg.norm(r_i, ord = 2) # residual norm for the i-th alpha
        if res[i] >= tol:
            x[:, [i]] = r[:,[i]] + v # update the i-th column of x
            
    mv = 1 # number of matrix-vector multiplications
    for _ in range(max_iter): # starting of the while loop in the paper
        # print("Iteration: ", _)
        err = np.max(res) 
        # print("Error: ", err)
        if err < tol:
            # print("Convergence reached with, ", mv, " matrix-vector multiplications")
            return x, mv, alphas, tol

        # print("Iteration: ", _, "\r", end="")

        mu = A @ mu
        mv += 1
        print("\nCurrent number of matrix-vector products: ", mv, "\r", end="")
        for i in range(len(alphas)):
            if res[i] >= tol:
                r[:,[i]] = np.power(alphas[i], mv+1) * mu
                r_i = r[:,[i]].toarray()
                # res[i] = sp.sparse.linalg.norm(r[:,[i]])
                res[i] = np.linalg.norm(r_i, ord = 2)

                if res[i] >= tol:
                    x[:, [i]] = r[:,[i]] + v

    raise nx.PowerIterationFailedConvergence(max_iter) # if the error is not small enough, raise an error