diff --git a/tex/src/conclusion.tex b/tex/src/conclusion.tex new file mode 100644 index 0000000..afda65b --- /dev/null +++ b/tex/src/conclusion.tex @@ -0,0 +1,9 @@ +\section{Conclusion} + +In this paper we discussed the results of an exact algorithm for the computation of the $k$ most central nodes in a graph, according to closeness centrality. We saw that with the introduction of a lower bound, the real word performance are way better than a brute force algorithm that compute all the \texttt{BFS}. \s + +\nd Since there were no server with dozens of threads ad hundreds of Gigs of RAM to test the algorithm with, every thing has been adapted knowing that everything needed to be made on a laptop. \s + +\nd We have seen two different case studies, both based on the IMDb network. For each of them we had to find a way to filter the data without loosing accuracy on the results. We saw that with an harder filtering, we gain a lot of performance, but the results showed an increasing discrepancy from the reality. Analyzing this test made we were able to find, for both graphs, a balance that gives accuracy and performance at the same time. + +\s \nd This work is heavily based on \cite{DBLP:journals/corr/BergaminiBCMM17}. Even if this article use a more complex and complete approach, the results on the IMDb case study are almost identical. They worked with snapshot, analyzing single time periods, so there are some inevitable discrepancies. Despite that, most of the top-$k$ actors are the same and the closeness centrality values are very similar. We can use this comparison to attest the truthfulness and efficiency of the algorithm presented in this paper. diff --git a/tex/src/improvement.tex b/tex/src/improvement.tex new file mode 100644 index 0000000..a7b3a58 --- /dev/null +++ b/tex/src/improvement.tex @@ -0,0 +1,25 @@ +\section{Further Work: Harmonic centrality} + +The algorithm shown in this paper is very versatile. We have tested it with two different graphs and obtained excellent results. But there could be more. + +\s \nd It can be adapted very easily to compute other centralities, as the harmonic one. Given a graph $G = (V,E)$ and a node $v \in V$, it's defined as + +\begin{equation} + h(v) = \sum_{w \neq v} \frac{1}{d(v,w)} +\end{equation} + +\nd The main difference here is that we don't have a farness. Then we won't need a lower bound either. Since the biggest the number is the higher is the centrality we have to adapt the algorithm. Instead of a lower bound, we need an upper bound $U_B$ such that + +\begin{equation} + h(v) \leq U_B (v) \leq h(w) +\end{equation} + +\nd A possibile lower bound can be taken considering the worst case that could happen at each state + +\begin{equation} + U_b (v) = \sigma_{d-1} + \frac{n_d}{d} + \frac{n - r - n_d}{d+1} +\end{equation} + +\nd When we are at the level $d$ of our exploration, we already know the partial sum $\sigma_{d-1}$. The worst case in this level happens when the node $v$ is connected to all the other nodes. To consider this possibility we add the factors $\frac{n_d}{d} + \frac{n - r - n_d}{d+1}$. + +\s \nd This method has been tested and works with excellent results. What needs to be adjusted is a formal normalization for the harmonic centrality and for the upper bound. In the Github repository, the script already gives the possibility to compute the top-k harmonic centrality of both graphs diff --git a/tex/src/main.bbl b/tex/src/main.bbl new file mode 100644 index 0000000..88a6d61 --- /dev/null +++ b/tex/src/main.bbl @@ -0,0 +1,8 @@ +\begin{thebibliography}{1} + +\bibitem{DBLP:journals/corr/BergaminiBCMM17} +E.~Bergamini, M.~Borassi, P.~Crescenzi, A.~Marino, and H.~Meyerhenke. +\newblock Computing top-k closeness centrality faster in unweighted graphs. +\newblock {\em CoRR}, abs/1704.01077, 2017. + +\end{thebibliography} diff --git a/tex/src/main.blg b/tex/src/main.blg new file mode 100644 index 0000000..d473332 --- /dev/null +++ b/tex/src/main.blg @@ -0,0 +1,54 @@ +This is BibTeX, Version 0.99d (TeX Live 2020/Debian) +Capacity: max_strings=200000, hash_size=200000, hash_prime=170003 +The top-level auxiliary file: main.aux +A level-1 auxiliary file: introduction.aux +A level-1 auxiliary file: algorithm.aux +A level-1 auxiliary file: data.aux +A level-1 auxiliary file: code.aux +A level-1 auxiliary file: analysis.aux +A level-1 auxiliary file: visualization.aux +A level-1 auxiliary file: improvement.aux +A level-1 auxiliary file: conclusion.aux +The style file: abbrv.bst +Database file #1: ref.bib +You've used 1 entry, + 2118 wiz_defined-function locations, + 512 strings with 4062 characters, +and the built_in function-call counts, 357 in all, are: += -- 30 +> -- 28 +< -- 0 ++ -- 11 +- -- 10 +* -- 28 +:= -- 73 +add.period$ -- 3 +call.type$ -- 1 +change.case$ -- 8 +chr.to.int$ -- 0 +cite$ -- 1 +duplicate$ -- 10 +empty$ -- 18 +format.name$ -- 10 +if$ -- 65 +int.to.chr$ -- 0 +int.to.str$ -- 1 +missing$ -- 1 +newline$ -- 8 +num.names$ -- 2 +pop$ -- 6 +preamble$ -- 1 +purify$ -- 7 +quote$ -- 0 +skip$ -- 8 +stack$ -- 0 +substring$ -- 5 +swap$ -- 1 +text.length$ -- 0 +text.prefix$ -- 0 +top$ -- 0 +type$ -- 4 +warning$ -- 0 +while$ -- 2 +width$ -- 2 +write$ -- 13 diff --git a/tex/src/main.pdf b/tex/src/main.pdf index 507efee..ebd8676 100644 Binary files a/tex/src/main.pdf and b/tex/src/main.pdf differ diff --git a/tex/src/main.tex b/tex/src/main.tex index 1b94daa..c32a014 100644 --- a/tex/src/main.tex +++ b/tex/src/main.tex @@ -80,5 +80,10 @@ \include{code.tex} \include{analysis.tex} \include{visualization.tex} +\include{improvement.tex} +\include{conclusion.tex} + +\bibliographystyle{abbrv} +\bibliography{ref} \end{document} diff --git a/tex/src/ref.bib b/tex/src/ref.bib new file mode 100644 index 0000000..4596499 --- /dev/null +++ b/tex/src/ref.bib @@ -0,0 +1,17 @@ +@article{DBLP:journals/corr/BergaminiBCMM17, + author = {Elisabetta Bergamini and + Michele Borassi and + Pierluigi Crescenzi and + Andrea Marino and + Henning Meyerhenke}, + title = {Computing top-k Closeness Centrality Faster in Unweighted Graphs}, + journal = {CoRR}, + volume = {abs/1704.01077}, + year = {2017}, + url = {http://arxiv.org/abs/1704.01077}, + eprinttype = {arXiv}, + eprint = {1704.01077}, + timestamp = {Mon, 13 Aug 2018 16:48:41 +0200}, + biburl = {https://dblp.org/rec/journals/corr/BergaminiBCMM17.bib}, + bibsource = {dblp computer science bibliography, https://dblp.org} +}