some progress

main
Antonio De Lucreziis 3 months ago
parent 7cff1ce50a
commit 690c5fcd00

@ -0,0 +1,334 @@
⠖ estimating line count
skipped 2418 lines of type: P
Number of entries: 8347110
Computing graph stats...
Computing nodes degrees...
Computing histogram...
Stats:
- Nodes: 3460855
- Edges: 3459968
Graph degrees histogram (degree/count):
- 1: 2123
- 2: 2017235
- 3: 280142
- 4: 1047601
- 5: 69572
- 6: 20940
- 7: 8895
- 8: 4709
- 9: 2768
- 10: 1740
- 11: 1140
- 12: 873
- 13: 644
- 14: 436
- 15: 339
- 16: 267
- 17: 210
- 18: 183
- 19: 127
- 20: 100
- 21: 97
- 22: 77
- 23: 55
- 24: 48
- 25: 48
- 26: 28
- 27: 33
- 28: 32
- 29: 39
- 30: 29
- 31: 19
- 32: 18
- 33: 17
- 34: 8
- 35: 15
- 36: 7
- 37: 12
- 38: 14
- 39: 10
- 40: 8
- 41: 6
- 42: 8
- 43: 8
- 44: 6
- 45: 10
- 46: 5
- 47: 5
- 48: 6
- 49: 5
- 50: 9
- 51: 2
- 52: 6
- 53: 3
- 54: 4
- 55: 2
- 56: 1
- 57: 8
- 58: 1
- 59: 3
- 60: 3
- 61: 1
- 62: 1
- 63: 5
- 64: 1
- 65: 3
- 66: 1
- 67: 6
- 68: 2
- 69: 1
- 71: 3
- 72: 3
- 73: 2
- 74: 2
- 75: 1
- 78: 1
- 79: 1
- 80: 2
- 81: 1
- 83: 1
- 84: 2
- 85: 2
- 87: 2
- 90: 1
- 91: 1
- 93: 1
- 96: 2
- 97: 3
- 99: 1
- 100: 1
- 101: 2
- 105: 2
- 108: 1
- 110: 1
- 112: 1
- 115: 2
- 116: 1
- 118: 1
- 119: 2
- 126: 1
- 127: 1
- 130: 1
- 131: 1
- 132: 1
- 135: 1
- 136: 1
- 142: 1
- 143: 1
- 150: 1
- 151: 2
- 164: 1
- 165: 1
- 168: 2
- 176: 1
- 183: 1
- 186: 1
- 197: 1
- 206: 1
- 208: 1
- 211: 1
- 229: 1
- 235: 1
- 269: 1
- 310: 1
- 439: 1
- 454: 1
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 3459280
- BackEdge: 33402
- ForwardEdge: 352745
- CrossEdge: 1041433
Removing back edges...
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 3459280
- ForwardEdge: 352745
- CrossEdge: 1041433
Computing connected components...
Computing sizes histogram...
Connected components histogram (size/count):
- 2: 531
- 3: 1
- 4: 2
- 8: 1
- 92: 1
- 135: 1
- 293: 1
- 305: 1
- 2286: 1
- 5040: 1
- 5639: 1
- 9714: 1
- 12885: 1
- 30694: 1
- 40082: 1
- 50661: 1
- 58959: 1
- 101785: 1
- 149495: 1
- 160235: 1
- 518148: 1
- 951466: 1
- 1361860: 1
Picking largest connected component...
Computing graph stats...
Computing nodes degrees...
Computing histogram...
Stats:
- Nodes: 1361860
- Edges: 1361860
Graph degrees histogram (degree/count):
- 2: 2547
- 4: 790748
- 6: 124055
- 8: 397684
- 10: 29479
- 12: 8481
- 14: 3512
- 16: 1819
- 18: 1032
- 20: 666
- 22: 448
- 24: 346
- 26: 207
- 28: 167
- 30: 139
- 32: 92
- 34: 81
- 36: 58
- 38: 44
- 40: 35
- 42: 29
- 44: 28
- 46: 24
- 48: 24
- 50: 11
- 52: 4
- 54: 13
- 56: 9
- 58: 13
- 60: 5
- 62: 4
- 64: 2
- 66: 3
- 68: 5
- 70: 2
- 72: 4
- 74: 1
- 76: 1
- 78: 1
- 80: 1
- 82: 1
- 84: 1
- 86: 5
- 88: 4
- 90: 2
- 94: 1
- 100: 1
- 104: 1
- 108: 2
- 114: 1
- 128: 1
- 134: 1
- 144: 1
- 146: 1
- 156: 1
- 166: 1
- 192: 1
- 198: 1
- 200: 1
- 218: 1
- 230: 1
- 238: 1
- 258: 1
- 298: 1
- 326: 1
- 364: 1
- 470: 1
Compacting chains...
Compacted 121 nodes
Computing graph stats...
Computing nodes degrees...
Computing histogram...
Stats:
- Nodes: 1361739
- Edges: 1361739
Graph degrees histogram (degree/count):
- 2: 2547
- 4: 790627
- 6: 124055
- 8: 397684
- 10: 29479
- 12: 8481
- 14: 3512
- 16: 1819
- 18: 1032
- 20: 666
- 22: 448
- 24: 346
- 26: 207
- 28: 167
- 30: 139
- 32: 92
- 34: 81
- 36: 58
- 38: 44
- 40: 35
- 42: 29
- 44: 28
- 46: 24
- 48: 24
- 50: 11
- 52: 4
- 54: 13
- 56: 9
- 58: 13
- 60: 5
- 62: 4
- 64: 2
- 66: 3
- 68: 5
- 70: 2
- 72: 4
- 74: 1
- 76: 1
- 78: 1
- 80: 1
- 82: 1
- 84: 1
- 86: 5
- 88: 4
- 90: 2
- 94: 1
- 100: 1
- 104: 1
- 108: 2
- 114: 1
- 128: 1
- 134: 1
- 144: 1
- 146: 1
- 156: 1
- 166: 1
- 192: 1
- 198: 1
- 200: 1
- 218: 1
- 230: 1
- 238: 1
- 258: 1
- 298: 1
- 326: 1
- 364: 1
- 470: 1
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 1361738
- BackEdge: 1914594
- ForwardEdge: 552856
Cleaning up...
cargo run --release -- show -i dataset/chrX.hprc-v1.0-pggb.local.gfa 142.91s user 16.48s system 96% cpu 2:45.14 total

@ -0,0 +1,257 @@
skipped 2418 lines of type: P
Number of entries: 8347110
Computing graph stats...
Computing nodes degrees...
Computing histogram...
Stats:
- Nodes: 3460855
- Edges: 3459968
Graph degrees histogram (degree/count):
- 1: 2123
- 2: 2017235
- 3: 280142
- 4: 1047601
- 5: 69572
- 6: 20940
- 7: 8895
- 8: 4709
- 9: 2768
- 10: 1740
- 11: 1140
- 12: 873
- 13: 644
- 14: 436
- 15: 339
- 16: 267
- 17: 210
- 18: 183
- 19: 127
- 20: 100
- 21: 97
- 22: 77
- 23: 55
- 24: 48
- 25: 48
- 26: 28
- 27: 33
- 28: 32
- 29: 39
- 30: 29
- 31: 19
- 32: 18
- 33: 17
- 34: 8
- 35: 15
- 36: 7
- 37: 12
- 38: 14
- 39: 10
- 40: 8
- 41: 6
- 42: 8
- 43: 8
- 44: 6
- 45: 10
- 46: 5
- 47: 5
- 48: 6
- 49: 5
- 50: 9
- 51: 2
- 52: 6
- 53: 3
- 54: 4
- 55: 2
- 56: 1
- 57: 8
- 58: 1
- 59: 3
- 60: 3
- 61: 1
- 62: 1
- 63: 5
- 64: 1
- 65: 3
- 66: 1
- 67: 6
- 68: 2
- 69: 1
- 71: 3
- 72: 3
- 73: 2
- 74: 2
- 75: 1
- 78: 1
- 79: 1
- 80: 2
- 81: 1
- 83: 1
- 84: 2
- 85: 2
- 87: 2
- 90: 1
- 91: 1
- 93: 1
- 96: 2
- 97: 3
- 99: 1
- 100: 1
- 101: 2
- 105: 2
- 108: 1
- 110: 1
- 112: 1
- 115: 2
- 116: 1
- 118: 1
- 119: 2
- 126: 1
- 127: 1
- 130: 1
- 131: 1
- 132: 1
- 135: 1
- 136: 1
- 142: 1
- 143: 1
- 150: 1
- 151: 2
- 164: 1
- 165: 1
- 168: 2
- 176: 1
- 183: 1
- 186: 1
- 197: 1
- 206: 1
- 208: 1
- 211: 1
- 229: 1
- 235: 1
- 269: 1
- 310: 1
- 439: 1
- 454: 1
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 3459280
- BackEdge: 33402
- ForwardEdge: 352745
- CrossEdge: 1041433
Removing back edges...
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 3459280
- ForwardEdge: 352745
- CrossEdge: 1041433
Computing connected components...
Computing sizes histogram...
Connected components histogram (size/count):
- 2: 531
- 3: 1
- 4: 2
- 8: 1
- 92: 1
- 135: 1
- 293: 1
- 305: 1
- 2286: 1
- 5040: 1
- 5639: 1
- 9714: 1
- 12885: 1
- 30694: 1
- 40082: 1
- 50661: 1
- 58959: 1
- 101785: 1
- 149495: 1
- 160235: 1
- 518148: 1
- 951466: 1
- 1361860: 1
Picking largest connected component...
Computing graph stats...
Computing nodes degrees...
Computing histogram...
Stats:
- Nodes: 1361860
- Edges: 1356266
Graph degrees histogram (degree/count):
- 1: 2547
- 2: 790748
- 3: 124055
- 4: 397684
- 5: 29479
- 6: 8481
- 7: 3512
- 8: 1819
- 9: 1032
- 10: 666
- 11: 448
- 12: 346
- 13: 207
- 14: 167
- 15: 139
- 16: 92
- 17: 81
- 18: 58
- 19: 44
- 20: 35
- 21: 29
- 22: 28
- 23: 24
- 24: 24
- 25: 11
- 26: 4
- 27: 13
- 28: 9
- 29: 13
- 30: 5
- 31: 4
- 32: 2
- 33: 3
- 34: 5
- 35: 2
- 36: 4
- 37: 1
- 38: 1
- 39: 1
- 40: 1
- 41: 1
- 42: 1
- 43: 5
- 44: 4
- 45: 2
- 47: 1
- 50: 1
- 52: 1
- 54: 2
- 57: 1
- 64: 1
- 67: 1
- 72: 1
- 73: 1
- 78: 1
- 83: 1
- 96: 1
- 99: 1
- 100: 1
- 109: 1
- 115: 1
- 119: 1
- 129: 1
- 149: 1
- 163: 1
- 182: 1
- 235: 1
Computing edge types...
Computing edge types histogram...
Edge types histogram (type/count):
- TreeEdge: 1361622
- ForwardEdge: 110203
- CrossEdge: 442890
Cleaning up...
cargo run --release -- show -i dataset/chrX.hprc-v1.0-pggb.local.gfa 136.17s user 28.10s system 89% cpu 3:03.85 total

4354
Cargo.lock generated

File diff suppressed because it is too large Load Diff

@ -6,6 +6,7 @@ edition = "2021"
[dependencies] [dependencies]
argh = "0.1.12" argh = "0.1.12"
indicatif = "0.17.8" indicatif = "0.17.8"
rand = "0.8.5"
[workspace] # [workspace]
members = ["examples/*"] # members = ["examples/*"]

@ -1,4 +1,4 @@
// #![allow(dead_code)] #![allow(dead_code)]
use std::{ use std::{
io::{self, BufRead, BufReader, Read}, io::{self, BufRead, BufReader, Read},

@ -1,19 +1,55 @@
#![allow(dead_code)]
use std::{ use std::{
cell::RefCell, collections::{BTreeMap, BTreeSet, VecDeque},
collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque},
fmt::Debug, fmt::Debug,
hash::Hash,
rc::Rc,
}; };
use indicatif::ProgressIterator; use indicatif::{ProgressBar, ProgressIterator};
use super::{AdjacencyGraph, Graph, UndirectedGraph};
impl<V> Graph<V> for AdjacencyGraph<V>
where
V: Ord + Clone,
{
fn nodes(&self) -> &BTreeSet<V> {
&self.nodes
}
fn adjacencies(&self) -> &BTreeMap<V, BTreeSet<V>> {
&self.adjacencies
}
fn edges(&self) -> BTreeMap<V, V> {
self.adjacencies
.iter()
.flat_map(|(from, tos)| tos.iter().map(move |to| (from.clone(), to.clone())))
.collect()
}
}
impl<V> Graph<V> for UndirectedGraph<V>
where
V: Ord + Clone,
{
fn nodes(&self) -> &BTreeSet<V> {
self.directed.nodes()
}
fn adjacencies(&self) -> &BTreeMap<V, BTreeSet<V>> {
self.directed.adjacencies()
}
use super::{AdjacencyGraph, UndirectedGraph}; fn edges(&self) -> BTreeMap<V, V> {
self.directed.edges()
}
}
#[allow(dead_code)] #[allow(dead_code)]
impl<V> AdjacencyGraph<V> impl<V> AdjacencyGraph<V>
where where
V: Ord + Clone + Debug, V: Ord + Eq + Clone + Debug,
{ {
pub fn new() -> Self { pub fn new() -> Self {
AdjacencyGraph { AdjacencyGraph {
@ -46,6 +82,12 @@ where
.insert(to); .insert(to);
} }
pub fn remove_edge(&mut self, from: &V, to: &V) {
if let Some(adjacencies) = self.adjacencies.get_mut(from) {
adjacencies.remove(to);
}
}
pub fn get_adjacencies(&self, node: &V) -> Option<&BTreeSet<V>> { pub fn get_adjacencies(&self, node: &V) -> Option<&BTreeSet<V>> {
self.adjacencies.get(node) self.adjacencies.get(node)
} }
@ -75,16 +117,37 @@ where
opposite opposite
} }
pub fn undirected(&self) -> UndirectedGraph<&V> { pub fn undirected(&self) -> UndirectedGraph<V> {
let mut undirected = AdjacencyGraph::new(); let mut undirected = AdjacencyGraph::new();
// O(|E|) // O(|E|)
for (from, to) in self.edges() { for (from, to) in self.edges() {
undirected.add_edge(from, to); undirected.add_edge(from.clone(), to.clone());
undirected.add_edge(to, from); undirected.add_edge(to.clone(), from.clone());
} }
UndirectedGraph { graph: undirected } UndirectedGraph {
directed: undirected,
}
}
pub fn restricted(&self, nodes: &Vec<V>) -> AdjacencyGraph<V> {
let index = nodes.iter().collect::<BTreeSet<_>>();
let mut restricted = AdjacencyGraph::new();
for node in nodes {
restricted.add_node(node.clone());
if let Some(adjacencies) = self.get_adjacencies(&node) {
for adj in adjacencies {
if index.contains(adj) {
restricted.add_edge(node.clone(), adj.clone());
}
}
}
}
restricted
} }
pub fn has_edge(&self, from: &V, to: &V) -> bool { pub fn has_edge(&self, from: &V, to: &V) -> bool {
@ -118,49 +181,6 @@ where
}) })
} }
/// This computes if this undirected graph is cyclic or not by searching for an oriented cycle in the graph
pub fn is_cyclic(&self) -> bool {
let mut remaining_nodes = self.nodes.iter().collect::<BTreeSet<_>>();
// let progress_bar = ProgressBar::new(self.nodes.len() as u64);
// let mut visited_count = 0;
while !remaining_nodes.is_empty() {
let start: &V = remaining_nodes.iter().next().unwrap();
// visited_count += 1;
remaining_nodes.remove(start);
// progress_bar.inc(1);
let mut dfs_visited = BTreeSet::new();
let mut stack = VecDeque::new();
stack.push_back(start);
// start a new dfs from the current node
while let Some(node) = stack.pop_back() {
if dfs_visited.contains(node) {
// println!("Found cycle after {} nodes", visited_count);
// progress_bar.finish();
return true;
}
// visited_count += 1;
remaining_nodes.remove(node);
// progress_bar.inc(1);
dfs_visited.insert(node.clone());
if let Some(adjacencies) = self.get_adjacencies(node) {
stack.extend(adjacencies);
}
}
}
// println!("Found cycle after {} nodes", visited_count);
// progress_bar.finish();
false
}
pub fn shortest_path_matrix(&self) -> BTreeMap<&V, BTreeMap<&V, usize>> { pub fn shortest_path_matrix(&self) -> BTreeMap<&V, BTreeMap<&V, usize>> {
let mut result = BTreeMap::new(); let mut result = BTreeMap::new();
@ -247,116 +267,20 @@ where
result result
} }
// pub fn compute_ccs_2(&self) -> Vec<Vec<V>> { fn gc(&mut self) {
// let mut cc: BTreeMap<V, Rc<RefCell<BTreeSet<V>>>> = BTreeMap::new(); let mut to_remove = Vec::new();
// for node in self.nodes.iter() {
// if cc.contains_key(&node) {
// continue;
// }
// // println!("All CC: {:?}", cc);
// let new_cc = Rc::new(RefCell::new(HashSet::new()));
// let mut stack: Vec<&V> = vec![node];
// while let Some(node) = stack.pop() {
// // println!("New CC: {:?}", new_cc.borrow());
// if cc.contains_key(&node) {
// // merge the two connected components and go to the next node
// let old_cc: &Rc<RefCell<HashSet<V>>> = cc.get(&node).unwrap();
// // println!(
// // "Merging {:?} with {:?} due to link to {:?}",
// // new_cc.borrow(),
// // old_cc.borrow(),
// // node
// // );
// new_cc
// .borrow_mut()
// .extend(old_cc.borrow().iter().map(|x| x.to_owned()));
// break;
// }
// if new_cc.borrow().contains(&node) {
// continue;
// }
// new_cc.borrow_mut().insert(node.clone());
// if let Some(adjacencies) = self.get_adjacencies(&node) {
// for adj in adjacencies {
// stack.push(adj);
// }
// }
// }
// for n in new_cc.borrow().iter() { for node in self.nodes.iter() {
// cc.insert(n.to_owned(), new_cc.clone()); if let Some(adjacencies) = self.get_adjacencies(node) {
// } if adjacencies.is_empty() {
// } to_remove.push(node.clone());
}
// // extract the unique connected components by pointers
// let mut result = Vec::new();
// let mut seen = HashSet::new();
// for node in self.nodes.iter() {
// if seen.contains(node) {
// continue;
// }
// let cc = cc.get(node).unwrap();
// seen.extend(cc.borrow().iter().map(|x| x.to_owned()));
// result.push(cc.borrow().iter().map(|x| x.to_owned()).collect());
// }
// result
// }
/// This function prints the number of nodes, edges and a histogram of the degrees of the nodes
/// in the graph (computing the degrees might take a long time)
pub fn print_stats(&self) {
let mut vertices_degrees = BTreeMap::new();
for (from, tos) in self
.adjacencies
.iter()
.progress()
.with_style(
indicatif::ProgressStyle::default_bar()
.template("{prefix} {spinner} [{elapsed_precise}] [{wide_bar}] {pos}/{len}")
.unwrap(),
)
.with_prefix("computing nodes degrees")
{
*vertices_degrees.entry(from).or_insert(0) += tos.len();
for to in tos {
*vertices_degrees.entry(to).or_insert(0) += 1;
} }
} }
let histogram: BTreeMap<usize, usize> = vertices_degrees for node in to_remove {
.iter() self.nodes.remove(&node);
.map(|(_, degree)| *degree) self.adjacencies.remove(&node);
.fold(BTreeMap::new(), |mut acc, degree| {
*acc.entry(degree).or_insert(0) += 1;
acc
});
println!("Stats:");
println!("Nodes: {}", self.nodes.len());
println!("Edges: {}", self.edges().count());
println!("Histogram:");
for (degree, count) in histogram.iter() {
println!("{}: {}", degree, count);
} }
} }
} }
@ -365,11 +289,23 @@ impl<V> UndirectedGraph<V>
where where
V: Ord + Eq + Clone + Debug, V: Ord + Eq + Clone + Debug,
{ {
pub fn add_edge(&mut self, from: V, to: V) {
self.directed.add_edge(from.clone(), to.clone());
self.directed.add_edge(to.clone(), from.clone());
}
pub fn remove_edge(&mut self, from: &V, to: &V) {
self.directed.remove_edge(from, to);
self.directed.remove_edge(to, from);
}
pub fn connected_components(&self) -> Vec<Vec<V>> { pub fn connected_components(&self) -> Vec<Vec<V>> {
let mut visited = BTreeSet::new(); let mut visited = BTreeSet::new();
let mut result = Vec::new(); let mut result = Vec::new();
for node in self.graph.nodes.iter() { let pb = ProgressBar::new(self.directed.nodes.len() as u64);
for node in self.directed.nodes.iter() {
if visited.contains(node) { if visited.contains(node) {
continue; continue;
} }
@ -382,9 +318,10 @@ where
continue; continue;
} }
pb.inc(1);
cc.insert(node.clone()); cc.insert(node.clone());
if let Some(adjacencies) = self.graph.get_adjacencies(&node) { if let Some(adjacencies) = self.directed.get_adjacencies(&node) {
for adj in adjacencies { for adj in adjacencies {
stack.push(adj); stack.push(adj);
} }
@ -395,6 +332,81 @@ where
result.push(cc.iter().map(|x| x.to_owned()).collect()); result.push(cc.iter().map(|x| x.to_owned()).collect());
} }
pb.finish();
result result
} }
// This runs a depth-first search on the graph searching for o--o--o paths and removes the middle node
// recursively until no more o--o--o paths are found.
pub fn compact_chains(&mut self) {
let mut visited = BTreeSet::new();
let nodes = self.directed.nodes.clone();
let pb = ProgressBar::new(nodes.len() as u64);
let mut compacted_count = 0;
for node in nodes {
if visited.contains(&node) {
continue;
}
let mut stack = vec![node];
while let Some(node) = stack.pop() {
if visited.contains(&node) {
continue;
}
pb.inc(1);
visited.insert(node.clone());
// while adj has only one neighbor
let mut curr = node;
let mut path = vec![curr.clone()];
while let Some(adjacencies) = self.directed.get_adjacencies(&curr) {
let probes = adjacencies
.iter()
.filter(|&x| !path.contains(x))
.collect::<Vec<_>>();
if probes.len() != 1 {
break;
}
curr = probes[0].clone();
visited.insert(curr.clone());
path.push(curr.clone());
}
if path.len() < 3 {
continue;
}
path.windows(2).for_each(|x| {
self.remove_edge(&x[0], &x[1]);
});
self.add_edge(path[0].clone(), path[path.len() - 1].clone());
compacted_count += path.len() - 2;
if let Some(adjacencies) = self.directed.get_adjacencies(&curr) {
for adj in adjacencies {
stack.push(adj.clone());
}
}
}
}
println!("Compacted {} nodes", compacted_count);
self.directed.gc();
pb.finish();
}
} }

@ -1,6 +1,7 @@
#![allow(dead_code)]
use std::{ use std::{
cmp::Ordering, collections::{BTreeMap, BTreeSet},
collections::{BTreeMap, BTreeSet, HashMap, HashSet},
fmt::Debug, fmt::Debug,
hash::Hash, hash::Hash,
}; };
@ -9,7 +10,7 @@ use indicatif::ProgressBar;
use super::AdjacencyGraph; use super::AdjacencyGraph;
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
pub enum EdgeType { pub enum EdgeType {
TreeEdge, TreeEdge,
BackEdge, BackEdge,
@ -103,116 +104,100 @@ where
.classify_edges_rec(self); .classify_edges_rec(self);
} }
// pub fn compute_edge_types(&self) -> BTreeMap<(V, V), EdgeType> { pub fn compute_edge_types(&self) -> BTreeMap<(V, V), EdgeType> {
// println!("{:?}", self); let mut edge_types = BTreeMap::new();
let mut visited = BTreeSet::new();
// let mut edge_types: BTreeMap<(V, V), EdgeType> = BTreeMap::new(); let mut start_times = BTreeMap::new();
// let mut visited: BTreeSet<V> = BTreeSet::new(); let mut finished_nodes = BTreeSet::new();
let mut time = 0;
// let mut start_times: BTreeMap<V, i32> = BTreeMap::new();
// let mut finished_nodes: BTreeSet<V> = BTreeSet::new(); let progress_bar = ProgressBar::new(self.nodes().len() as u64);
// #[derive(Debug)] enum Continuation<V> {
// enum RecurseState<V> { Start { node: V, parent: Option<V> },
// Visit { node: V, parent: Option<V> }, Neighbors { node: V, continue_from: usize },
// End { node: V }, End { node: V },
// } }
// let mut time = 0; for start in self.nodes().iter() {
if visited.contains(start) {
// // let progress_bar = ProgressBar::new(self.nodes().len() as u64); continue;
}
// for start in self.nodes().iter() {
// if visited.contains(start) { let mut continuations = vec![Continuation::Start {
// continue; node: start.clone(),
// } parent: None,
}];
// let mut stack: Vec<RecurseState<V>> = Vec::new();
while let Some(continuation) = continuations.pop() {
// // The first node does not have a parent match continuation {
// stack.push(RecurseState::End { Continuation::Start { node, parent } => {
// node: start.clone(), continuations.push(Continuation::End { node: node.clone() });
// });
// stack.push(RecurseState::Visit { progress_bar.inc(1);
// node: start.clone(), visited.insert(node.clone());
// parent: None, time += 1;
// }); start_times.insert(node.clone(), time);
// println!("Starting DFS from {:?}", start); if let Some(parent) = parent {
edge_types.insert((parent.clone(), node.clone()), EdgeType::TreeEdge);
// while let Some(state) = stack.pop() { }
// println!("Current: {:?}", state);
// println!("Finished Nodes: {:?}", finished_nodes); continuations.push(Continuation::Neighbors {
node: node.clone(),
// match state { continue_from: 0,
// RecurseState::Visit { node, parent } => { });
// if visited.contains(&node) { }
// // progress_bar.inc(1); Continuation::Neighbors {
// } node,
continue_from: index,
// if let Some(parent) = parent.clone() { } => {
// if !visited.contains(&node) { if let Some(adjacencies) = self.get_adjacencies(&node) {
// println!("{:?} => TreeEdge", (parent.clone(), node.clone())); for (i, adj) in adjacencies.iter().enumerate() {
// edge_types if i < index {
// .insert((parent.clone(), node.clone()), EdgeType::TreeEdge); continue;
// } else { }
// if !finished_nodes.contains(&parent) {
// println!("{:?} => BackEdge", (parent.clone(), node.clone())); if !visited.contains(adj) {
// edge_types continuations.push(Continuation::Neighbors {
// .insert((node.clone(), parent.clone()), EdgeType::BackEdge); node: node.clone(),
// } else if start_times.get(&node) < start_times.get(&parent) { continue_from: i + 1,
// println!("{:?} => ForwardEdge", (parent.clone(), node.clone())); });
// edge_types.insert( continuations.push(Continuation::Start {
// (node.clone(), parent.clone()), node: adj.clone(),
// EdgeType::ForwardEdge, parent: Some(node.clone()),
// ); });
// } else { break;
// println!("{:?} => CrossEdge", (parent.clone(), node.clone())); } else {
// edge_types.insert( if !finished_nodes.contains(adj) {
// (node.clone(), parent.clone()), edge_types.insert(
// EdgeType::CrossEdge, (node.clone(), adj.clone()),
// ); EdgeType::BackEdge,
// } );
// } } else if start_times.get(&node) < start_times.get(adj) {
// } edge_types.insert(
(node.clone(), adj.clone()),
// time += 1; EdgeType::ForwardEdge,
// start_times.insert(node.clone(), time); );
} else {
// visited.insert(node.clone()); edge_types.insert(
(node.clone(), adj.clone()),
// // it is extremely important that this before the adjacencies to correctly EdgeType::CrossEdge,
// // iterate over the graph );
// // stack.push(RecurseState::AfterNeighbors { node }); }
}
// if let Some(adjacencies) = self.get_adjacencies(&node) { }
// println!("adjacencies: {:?}", adjacencies); }
// for adj in adjacencies.iter().rev() { }
// if !visited.contains(&adj) { Continuation::End { node } => {
// stack.push(RecurseState::End { node: adj.clone() }); time += 1;
// stack.push(RecurseState::Visit { finished_nodes.insert(node.clone());
// node: adj.clone(), }
// parent: Some(node.clone()), }
// }); }
// } }
// }
// } progress_bar.finish();
// } return edge_types;
// RecurseState::End { node } => { }
// time += 1;
// finished_nodes.insert(node.clone());
// }
// }
// println!();
// // println!("after:");
// // println!("~> {:?}", stack);
// }
// }
// // progress_bar.finish();
// return edge_types;
// }
} }

@ -1,9 +1,17 @@
use std::{ use std::{
collections::{BTreeMap, BTreeSet, HashSet}, collections::{BTreeMap, BTreeSet},
fmt::Debug, fmt::Debug,
hash::Hash,
}; };
pub trait Graph<V>
where
V: Clone,
{
fn nodes(&self) -> &BTreeSet<V>;
fn adjacencies(&self) -> &BTreeMap<V, BTreeSet<V>>;
fn edges(&self) -> BTreeMap<V, V>;
}
#[derive(Debug)] #[derive(Debug)]
pub struct AdjacencyGraph<V> pub struct AdjacencyGraph<V>
where where
@ -13,11 +21,12 @@ where
adjacencies: BTreeMap<V, BTreeSet<V>>, adjacencies: BTreeMap<V, BTreeSet<V>>,
} }
#[derive(Debug)]
pub struct UndirectedGraph<V> pub struct UndirectedGraph<V>
where where
V: Clone, V: Clone,
{ {
graph: AdjacencyGraph<V>, pub directed: AdjacencyGraph<V>,
} }
pub mod algorithms; pub mod algorithms;
@ -59,7 +68,7 @@ mod tests {
fn test_compute_edge_types_cycle() { fn test_compute_edge_types_cycle() {
let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (2, 3), (3, 0)]); let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (2, 3), (3, 0)]);
let edge_types = g.compute_edge_types_rec(); let edge_types = g.compute_edge_types();
print_edge_types(&edge_types); print_edge_types(&edge_types);
assert_eq!(edge_types.len(), 4); assert_eq!(edge_types.len(), 4);
@ -73,7 +82,7 @@ mod tests {
fn test_compute_edge_types_forward() { fn test_compute_edge_types_forward() {
let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 2)]); let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 2)]);
let edge_types = g.compute_edge_types_rec(); let edge_types = g.compute_edge_types();
print_edge_types(&edge_types); print_edge_types(&edge_types);
assert_eq!(edge_types.len(), 3); assert_eq!(edge_types.len(), 3);
@ -86,7 +95,7 @@ mod tests {
fn test_compute_edge_types_cross() { fn test_compute_edge_types_cross() {
let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 3), (3, 4), (2, 4)]); let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 3), (3, 4), (2, 4)]);
let edge_types = g.compute_edge_types_rec(); let edge_types = g.compute_edge_types();
print_edge_types(&edge_types); print_edge_types(&edge_types);
assert_eq!(edge_types.len(), 5); assert_eq!(edge_types.len(), 5);
@ -110,7 +119,7 @@ mod tests {
("w", "z"), ("w", "z"),
]); ]);
let edge_types = g.compute_edge_types_rec(); let edge_types = g.compute_edge_types();
print_edge_types(&edge_types); print_edge_types(&edge_types);
assert_eq!(edge_types.len(), 7); assert_eq!(edge_types.len(), 7);
@ -122,4 +131,14 @@ mod tests {
assert_eq!(edge_types[&("w", "y")], edge_types::EdgeType::CrossEdge); assert_eq!(edge_types[&("w", "y")], edge_types::EdgeType::CrossEdge);
assert_eq!(edge_types[&("w", "z")], edge_types::EdgeType::TreeEdge); assert_eq!(edge_types[&("w", "z")], edge_types::EdgeType::TreeEdge);
} }
#[test]
fn test_compact_chains() {
let mut g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (2, 3), (3, 4)]).undirected();
println!("Compacting chains...");
println!("{:?}", g);
g.compact_chains();
println!("{:?}", g);
}
} }

@ -1,15 +1,20 @@
#![allow(dead_code)]
mod gfa; mod gfa;
mod graph; mod graph;
use std::{ use std::{
collections::{BTreeMap, HashMap}, collections::{BTreeMap, HashMap},
fmt::Debug,
io::{BufRead, BufReader}, io::{BufRead, BufReader},
process,
}; };
use argh::FromArgs; use argh::FromArgs;
use gfa::{Entry, Orientation}; use gfa::{Entry, Orientation};
use graph::AdjacencyGraph; use graph::{AdjacencyGraph, Graph};
use indicatif::ProgressIterator; use indicatif::ProgressIterator;
use rand::seq::SliceRandom;
#[derive(FromArgs, PartialEq, Debug)] #[derive(FromArgs, PartialEq, Debug)]
/// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024 /// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024
@ -71,64 +76,179 @@ fn main() -> std::io::Result<()> {
} }
} }
// Print the graph compute_graph_stats(&graph);
// for ((from, orient), adjacencies) in graph.adjacencies().iter() {
// println!(
// "{}{} -> {}",
// from,
// orient,
// adjacencies
// .iter()
// .map(|(to, orient)| format!("{}{}", to, orient))
// .collect::<Vec<String>>()
// .join(", ")
// );
// }
// let cc = graph.compute_ccs(); let edge_types = compute_edge_types(&graph);
// println!("CCs: {:?}", cc); println!("Removing back edges...");
// println!("Number of connected components: {}", cc.len());
// graph.print_stats(); for ((from, to), edge_type) in edge_types.iter() {
match edge_type {
graph::edge_types::EdgeType::BackEdge => {
graph.remove_edge(from, to);
}
_ => {}
}
}
// println!("Graph has cycles: {}", graph.is_cyclic()); compute_edge_types(&graph);
let edge_types = graph.compute_edge_types_rec(); let ccs = compute_ccs(&graph);
let edge_type_histogram: BTreeMap<_, _> = edge_types println!("Picking largest connected component...");
// pick the largest connected component
let largest_cc = ccs
.iter() .iter()
.map(|(_, edge_type)| edge_type) .max_by_key(|cc| cc.len())
.fold(BTreeMap::new(), |mut acc, edge_type| { .expect("at least one connected components");
*acc.entry(edge_type).or_insert(0) += 1;
let largest_cc_graph = graph.restricted(largest_cc);
compute_graph_stats(&largest_cc_graph);
compute_edge_types(&largest_cc_graph);
// let mut largest_cc_graph = graph.restricted(largest_cc).undirected();
// compute_graph_stats(&largest_cc_graph);
// // compute_edge_types(&largest_cc_graph);
// println!("Compacting chains...");
// largest_cc_graph.compact_chains();
// compute_graph_stats(&largest_cc_graph);
// compute_edge_types(&largest_cc_graph.directed);
println!("Cleaning up...");
process::exit(0);
}
}
}
fn compute_ccs<V>(graph: &AdjacencyGraph<V>) -> Vec<Vec<V>>
where
V: Ord + Eq + Clone + Debug,
{
println!("Computing connected components...");
let ccs = graph.undirected().connected_components();
println!("Computing sizes histogram...");
let hist: BTreeMap<_, _> = ccs
.iter()
.map(|cc| cc.len()) // map to size of each cc
.fold(BTreeMap::new(), |mut acc, len| {
*acc.entry(len).or_insert(0) += 1;
acc acc
}); });
println!("Edge types histogram: {:?}", edge_type_histogram); println!("Connected components histogram (size/count):");
for (size, count) in hist.iter() {
println!("- {}: {}", size, count);
}
// println!("Convert to undirected graph..."); ccs
// let undir_graph = graph.undirected(); }
// println!("Computing connected components..."); fn compute_edge_types<V>(graph: &AdjacencyGraph<V>) -> BTreeMap<(V, V), graph::edge_types::EdgeType>
// let cc = undir_graph.connected_components(); where
V: Ord + Eq + Clone + Debug,
{
println!("Computing edge types...");
let edge_types = graph.compute_edge_types();
println!("Computing edge types histogram...");
let histogram = edge_types.iter().map(|(_, edge_type)| edge_type).fold(
BTreeMap::new(),
|mut acc, edge_type| {
*acc.entry(edge_type.clone()).or_insert(0) += 1;
acc
},
);
// println!("Computing histogram..."); println!("Edge types histogram (type/count):");
// let cc_histogram: BTreeMap<_, _> = cc for (edge_type, count) in histogram.iter() {
// .iter() println!("- {:?}: {}", edge_type, count);
// .map(|cc| cc.len()) // map to size of each cc }
// .fold(BTreeMap::new(), |mut acc, len| {
// *acc.entry(len).or_insert(0) += 1;
// acc
// });
// println!("Connected Components Size Histogram:"); edge_types
// for (size, count) in cc_histogram.iter() { }
// println!("{}: {}", size, count);
// }
println!("Cleaning up..."); fn compute_shuffled_graph<V>(graph: &AdjacencyGraph<V>) -> AdjacencyGraph<V>
where
V: Ord + Eq + Clone + Debug,
{
println!("Shuffling graph...");
let mut g2 = AdjacencyGraph::new();
let mut shuffled_nodes: Vec<_> = graph.nodes().iter().collect::<Vec<_>>();
shuffled_nodes.shuffle(&mut rand::thread_rng());
for node in shuffled_nodes.iter() {
g2.add_node((*node).clone());
} }
let mut shuffled_map = BTreeMap::new();
for (i, node) in graph.nodes().iter().enumerate() {
shuffled_map.insert(node.clone(), shuffled_nodes[i].clone());
} }
Ok(()) for edge in graph.edges() {
g2.add_edge(
shuffled_map.get(&edge.0).unwrap().clone(),
shuffled_map.get(&edge.1).unwrap().clone(),
);
}
compute_edge_types(&g2);
g2
}
/// This function prints the number of nodes, edges and a histogram of the degrees of the nodes
/// in the graph (computing the degrees might take a long time)
fn compute_graph_stats<V>(graph: &impl Graph<V>)
where
V: Ord + Eq + Clone + Debug,
{
println!("Computing graph stats...");
let mut vertices_degrees = BTreeMap::new();
let mut vertices_in_degrees = BTreeMap::new();
let mut vertices_out_degrees = BTreeMap::new();
println!("Computing nodes degrees...");
let progress_bar = indicatif::ProgressBar::new(graph.edges().len() as u64);
for (from, tos) in graph.adjacencies() {
*vertices_degrees.entry(from).or_insert(0) += tos.len();
*vertices_out_degrees.entry(from).or_insert(0) += tos.len();
for to in tos {
progress_bar.inc(1);
*vertices_degrees.entry(to).or_insert(0) += 1;
*vertices_in_degrees.entry(to).or_insert(0) += 1;
}
}
progress_bar.finish();
println!("Computing histogram...");
let histogram: BTreeMap<usize, usize> = vertices_degrees
.iter()
.map(|(_, degree)| *degree)
.fold(BTreeMap::new(), |mut acc, degree| {
*acc.entry(degree).or_insert(0) += 1;
acc
});
println!("Stats:");
println!("- Nodes: {}", graph.nodes().len());
println!("- Edges: {}", graph.edges().len());
println!("Graph degrees histogram (degree/count):");
for (degree, count) in histogram.iter() {
println!("- {}: {}", degree, count);
}
} }

Loading…
Cancel
Save