From b0056571e24258857ede5a3d92e7e08905ffdaf6 Mon Sep 17 00:00:00 2001 From: Antonio De Lucreziis Date: Mon, 28 Oct 2024 18:20:23 +0100 Subject: [PATCH] disperazione 1 --- examples/edge_classify_2.py | 13 +- src/gfa/mod.rs | 2 +- src/graph/algorithms.rs | 159 ++++++++++++------------ src/graph/edge_types.rs | 234 +++++++++++++++++++++++++----------- src/graph/mod.rs | 124 ++++++++++++++----- src/main.rs | 2 +- 6 files changed, 357 insertions(+), 177 deletions(-) diff --git a/examples/edge_classify_2.py b/examples/edge_classify_2.py index 1a4cfe6..5edff0c 100644 --- a/examples/edge_classify_2.py +++ b/examples/edge_classify_2.py @@ -46,6 +46,7 @@ class Graph: def add_edge(self, u, v): if u not in self.adjacency_list: self.adjacency_list[u] = [] + self.adjacency_list[u].append(v) def vertices(self): @@ -56,10 +57,18 @@ class Graph: # Example usage: g = Graph() +# g.add_edge(0, 1) +# g.add_edge(1, 2) +# g.add_edge(2, 3) +# g.add_edge(3, 0) +# g.add_edge(3, 4) +# g.add_edge(4, 5) +# g.add_edge(5, 0) +# g.add_edge(4, 2) + g.add_edge(0, 1) g.add_edge(1, 2) -g.add_edge(2, 3) -g.add_edge(3, 0) # Creating the cycle 0 -> 1 -> 2 -> 3 -> 0 +g.add_edge(0, 2) # Running DFS results = dfs(g) diff --git a/src/gfa/mod.rs b/src/gfa/mod.rs index 8a554bc..ff02d65 100644 --- a/src/gfa/mod.rs +++ b/src/gfa/mod.rs @@ -1,6 +1,6 @@ use std::fmt::Display; -#[derive(Debug, Hash, PartialEq, Eq, Clone)] +#[derive(Debug, Hash, PartialEq, PartialOrd, Ord, Eq, Copy, Clone)] pub enum Orientation { Forward, Reverse, diff --git a/src/graph/algorithms.rs b/src/graph/algorithms.rs index 2abc7ce..a3b50e4 100644 --- a/src/graph/algorithms.rs +++ b/src/graph/algorithms.rs @@ -1,6 +1,6 @@ use std::{ cell::RefCell, - collections::{BTreeMap, HashMap, HashSet, VecDeque}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet, VecDeque}, fmt::Debug, hash::Hash, rc::Rc, @@ -13,41 +13,48 @@ use super::{AdjacencyGraph, UndirectedGraph}; #[allow(dead_code)] impl AdjacencyGraph where - V: Hash + Eq + Clone + Debug, + V: Ord + Clone + Debug, { pub fn new() -> Self { AdjacencyGraph { - nodes: HashSet::new(), - adjacencies: HashMap::new(), + nodes: BTreeSet::new(), + adjacencies: BTreeMap::new(), } } + pub fn from_edges(edges: &[(V, V)]) -> Self { + let mut graph = AdjacencyGraph::new(); + + for (from, to) in edges { + graph.add_edge(from.clone(), to.clone()); + } + + graph + } + pub fn add_node(&mut self, node: V) { - // O(1) self.nodes.insert(node); } pub fn add_edge(&mut self, from: V, to: V) { - // O(1) self.add_node(from.clone()); self.add_node(to.clone()); - // O(1) self.adjacencies .entry(from) - .or_insert_with(HashSet::new) + .or_insert_with(BTreeSet::new) .insert(to); } - pub fn get_adjacencies(&self, node: &V) -> Option<&HashSet> { + pub fn get_adjacencies(&self, node: &V) -> Option<&BTreeSet> { self.adjacencies.get(node) } - pub fn adjacencies(&self) -> &HashMap> { + pub fn adjacencies(&self) -> &BTreeMap> { &self.adjacencies } - pub fn nodes(&self) -> &HashSet { + pub fn nodes(&self) -> &BTreeSet { &self.nodes } @@ -91,7 +98,7 @@ where } pub fn dfs<'a>(&'a self, node: &'a V) -> impl Iterator + 'a { - let mut visited = HashSet::new(); + let mut visited = BTreeSet::new(); let mut stack = VecDeque::from([node]); std::iter::from_fn(move || { @@ -113,7 +120,7 @@ where /// This computes if this undirected graph is cyclic or not by searching for an oriented cycle in the graph pub fn is_cyclic(&self) -> bool { - let mut remaining_nodes = self.nodes.iter().collect::>(); + let mut remaining_nodes = self.nodes.iter().collect::>(); // let progress_bar = ProgressBar::new(self.nodes.len() as u64); // let mut visited_count = 0; @@ -125,7 +132,7 @@ where remaining_nodes.remove(start); // progress_bar.inc(1); - let mut dfs_visited = HashSet::new(); + let mut dfs_visited = BTreeSet::new(); let mut stack = VecDeque::new(); stack.push_back(start); @@ -154,12 +161,12 @@ where false } - pub fn shortest_path_matrix(&self) -> HashMap<&V, HashMap<&V, usize>> { - let mut result = HashMap::new(); + pub fn shortest_path_matrix(&self) -> BTreeMap<&V, BTreeMap<&V, usize>> { + let mut result = BTreeMap::new(); for node in self.nodes.iter() { - let mut distances = HashMap::new(); - let mut visited = HashSet::new(); + let mut distances = BTreeMap::new(); + let mut visited = BTreeSet::new(); let mut queue = VecDeque::from([node]); distances.insert(node, 0); @@ -190,7 +197,7 @@ where } pub fn compute_ccs(&self) -> Vec> { - let mut visited = HashSet::new(); + let mut visited = BTreeSet::new(); let mut result = Vec::new(); let op = self.opposite(); @@ -210,7 +217,7 @@ where continue; } - let mut cc: HashSet = HashSet::new(); + let mut cc: BTreeSet = BTreeSet::new(); let mut stack: Vec<&V> = vec![node]; while let Some(node) = stack.pop() { @@ -240,82 +247,82 @@ where result } - pub fn compute_ccs_2(&self) -> Vec> { - let mut cc: HashMap>>> = HashMap::new(); + // pub fn compute_ccs_2(&self) -> Vec> { + // let mut cc: BTreeMap>>> = BTreeMap::new(); - for node in self.nodes.iter() { - if cc.contains_key(&node) { - continue; - } + // for node in self.nodes.iter() { + // if cc.contains_key(&node) { + // continue; + // } - // println!("All CC: {:?}", cc); + // // println!("All CC: {:?}", cc); - let new_cc = Rc::new(RefCell::new(HashSet::new())); + // let new_cc = Rc::new(RefCell::new(HashSet::new())); - let mut stack: Vec<&V> = vec![node]; + // let mut stack: Vec<&V> = vec![node]; - while let Some(node) = stack.pop() { - // println!("New CC: {:?}", new_cc.borrow()); + // while let Some(node) = stack.pop() { + // // println!("New CC: {:?}", new_cc.borrow()); - if cc.contains_key(&node) { - // merge the two connected components and go to the next node + // if cc.contains_key(&node) { + // // merge the two connected components and go to the next node - let old_cc: &Rc>> = cc.get(&node).unwrap(); + // let old_cc: &Rc>> = cc.get(&node).unwrap(); - // println!( - // "Merging {:?} with {:?} due to link to {:?}", - // new_cc.borrow(), - // old_cc.borrow(), - // node - // ); + // // println!( + // // "Merging {:?} with {:?} due to link to {:?}", + // // new_cc.borrow(), + // // old_cc.borrow(), + // // node + // // ); - new_cc - .borrow_mut() - .extend(old_cc.borrow().iter().map(|x| x.to_owned())); + // new_cc + // .borrow_mut() + // .extend(old_cc.borrow().iter().map(|x| x.to_owned())); - break; - } + // break; + // } - if new_cc.borrow().contains(&node) { - continue; - } + // if new_cc.borrow().contains(&node) { + // continue; + // } - new_cc.borrow_mut().insert(node.clone()); + // new_cc.borrow_mut().insert(node.clone()); - if let Some(adjacencies) = self.get_adjacencies(&node) { - for adj in adjacencies { - stack.push(adj); - } - } - } + // if let Some(adjacencies) = self.get_adjacencies(&node) { + // for adj in adjacencies { + // stack.push(adj); + // } + // } + // } - for n in new_cc.borrow().iter() { - cc.insert(n.to_owned(), new_cc.clone()); - } - } + // for n in new_cc.borrow().iter() { + // cc.insert(n.to_owned(), new_cc.clone()); + // } + // } - // extract the unique connected components by pointers - let mut result = Vec::new(); - let mut seen = HashSet::new(); + // // extract the unique connected components by pointers + // let mut result = Vec::new(); + // let mut seen = HashSet::new(); - for node in self.nodes.iter() { - if seen.contains(node) { - continue; - } + // for node in self.nodes.iter() { + // if seen.contains(node) { + // continue; + // } - let cc = cc.get(node).unwrap(); - seen.extend(cc.borrow().iter().map(|x| x.to_owned())); + // let cc = cc.get(node).unwrap(); + // seen.extend(cc.borrow().iter().map(|x| x.to_owned())); - result.push(cc.borrow().iter().map(|x| x.to_owned()).collect()); - } + // result.push(cc.borrow().iter().map(|x| x.to_owned()).collect()); + // } - result - } + // result + // } /// This function prints the number of nodes, edges and a histogram of the degrees of the nodes /// in the graph (computing the degrees might take a long time) pub fn print_stats(&self) { - let mut vertices_degrees = HashMap::new(); + let mut vertices_degrees = BTreeMap::new(); for (from, tos) in self .adjacencies @@ -356,10 +363,10 @@ where impl UndirectedGraph where - V: Hash + Eq + Clone + Debug, + V: Ord + Eq + Clone + Debug, { pub fn connected_components(&self) -> Vec> { - let mut visited = HashSet::new(); + let mut visited = BTreeSet::new(); let mut result = Vec::new(); for node in self.graph.nodes.iter() { @@ -367,7 +374,7 @@ where continue; } - let mut cc: HashSet = HashSet::new(); + let mut cc: BTreeSet = BTreeSet::new(); let mut stack: Vec<&V> = vec![node]; while let Some(node) = stack.pop() { diff --git a/src/graph/edge_types.rs b/src/graph/edge_types.rs index a08ee5f..eb5310d 100644 --- a/src/graph/edge_types.rs +++ b/src/graph/edge_types.rs @@ -1,6 +1,6 @@ use std::{ cmp::Ordering, - collections::{HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, fmt::Debug, hash::Hash, }; @@ -17,106 +17,202 @@ pub enum EdgeType { CrossEdge, } -impl AdjacencyGraph +struct ClassifyState { + progress_bar: ProgressBar, + + edge_types: BTreeMap<(V, V), EdgeType>, + + visited: BTreeSet, + + start_times: BTreeMap, + finished_nodes: BTreeSet, + + time: i32, +} + +impl ClassifyState where - V: Hash + Eq + Clone + Debug, + V: Ord + Eq + Clone + Debug, { - pub fn compute_edge_types(&self) -> HashMap<(&V, &V), EdgeType> { - let mut edge_types = HashMap::new(); + pub fn classify_edges_rec(mut self, graph: &AdjacencyGraph) -> BTreeMap<(V, V), EdgeType> { + for start in graph.nodes().iter() { + if self.visited.contains(start) { + continue; + } - // TODO: ... + self.dfs(graph, start, None); + } - return edge_types; + self.progress_bar.finish(); + return self.edge_types; } - // pub fn compute_edge_types(&self) -> HashMap<(&V, &V), EdgeType> { - // /// To correctly compute the start and end times of the nodes in the - // /// graph, we need to keep do work before and after the recursion call - // enum RecurseState<'a, V> { - // Before(&'a V), - // BeforeNeighbor(&'a V, &'a V), - // AfterNeighbor(&'a V), - // } + pub fn dfs(&mut self, graph: &AdjacencyGraph, node: &V, parent: Option<&V>) { + if self.visited.contains(node) { + return; + } + + self.progress_bar.inc(1); + self.visited.insert(node.clone()); + self.time += 1; + self.start_times.insert(node.clone(), self.time); + + if let Some(parent) = parent { + self.edge_types + .insert((parent.clone(), node.clone()), EdgeType::TreeEdge); + } + + if let Some(adjacencies) = graph.get_adjacencies(node) { + for adj in adjacencies.iter() { + if !self.visited.contains(adj) { + self.dfs(graph, adj, Some(node)); + } else { + if !self.finished_nodes.contains(adj) { + self.edge_types + .insert((node.clone(), adj.clone()), EdgeType::BackEdge); + } else if self.start_times.get(node) < self.start_times.get(adj) { + self.edge_types + .insert((node.clone(), adj.clone()), EdgeType::ForwardEdge); + } else { + self.edge_types + .insert((node.clone(), adj.clone()), EdgeType::CrossEdge); + } + } + } + } + + self.time += 1; + self.finished_nodes.insert(node.clone()); + } +} - // let mut edge_types = HashMap::new(); +impl AdjacencyGraph +where + V: Ord + Eq + Clone + Debug, +{ + pub fn compute_edge_types_rec(&self) -> BTreeMap<(V, V), EdgeType> { + return ClassifyState { + progress_bar: ProgressBar::new(self.nodes().len() as u64), + + edge_types: BTreeMap::new(), + visited: BTreeSet::new(), + start_times: BTreeMap::new(), + finished_nodes: BTreeSet::new(), + time: 0, + } + .classify_edges_rec(self); + } + + // pub fn compute_edge_types(&self) -> BTreeMap<(V, V), EdgeType> { + // println!("{:?}", self); + + // let mut edge_types: BTreeMap<(V, V), EdgeType> = BTreeMap::new(); + // let mut visited: BTreeSet = BTreeSet::new(); - // let mut visited = HashSet::new(); - // let mut start_times = HashMap::new(); - // let mut finished_nodes = HashSet::new(); + // let mut start_times: BTreeMap = BTreeMap::new(); + // let mut finished_nodes: BTreeSet = BTreeSet::new(); + + // #[derive(Debug)] + // enum RecurseState { + // Visit { node: V, parent: Option }, + // End { node: V }, + // } // let mut time = 0; - // let progress_bar = ProgressBar::new(self.nodes().len() as u64); + // // let progress_bar = ProgressBar::new(self.nodes().len() as u64); - // for node in self.nodes().iter() { - // if visited.contains(node) { + // for start in self.nodes().iter() { + // if visited.contains(start) { // continue; // } - // let mut stack = Vec::new(); + // let mut stack: Vec> = Vec::new(); + + // // The first node does not have a parent + // stack.push(RecurseState::End { + // node: start.clone(), + // }); + // stack.push(RecurseState::Visit { + // node: start.clone(), + // parent: None, + // }); - // stack.push(RecurseState::Before(node)); + // println!("Starting DFS from {:?}", start); // while let Some(state) = stack.pop() { + // println!("Current: {:?}", state); + // println!("Finished Nodes: {:?}", finished_nodes); + // match state { - // RecurseState::Before(node) => { - // progress_bar.inc(1); - // visited.insert(node.clone()); - // start_times.insert(node, time); - // time += 1; + // RecurseState::Visit { node, parent } => { + // if visited.contains(&node) { + // // progress_bar.inc(1); + // } - // // it is extremely important that this before the adjacencies to correctly - // // iterate over the graph + // if let Some(parent) = parent.clone() { + // if !visited.contains(&node) { + // println!("{:?} => TreeEdge", (parent.clone(), node.clone())); + // edge_types + // .insert((parent.clone(), node.clone()), EdgeType::TreeEdge); + // } else { + // if !finished_nodes.contains(&parent) { + // println!("{:?} => BackEdge", (parent.clone(), node.clone())); + // edge_types + // .insert((node.clone(), parent.clone()), EdgeType::BackEdge); + // } else if start_times.get(&node) < start_times.get(&parent) { + // println!("{:?} => ForwardEdge", (parent.clone(), node.clone())); + // edge_types.insert( + // (node.clone(), parent.clone()), + // EdgeType::ForwardEdge, + // ); + // } else { + // println!("{:?} => CrossEdge", (parent.clone(), node.clone())); + // edge_types.insert( + // (node.clone(), parent.clone()), + // EdgeType::CrossEdge, + // ); + // } + // } + // } - // if let Some(adjacencies) = self.get_adjacencies(node) { - // for adj in adjacencies { - // println!("Node: {:?} Adj: {:?}", node, adj,); + // time += 1; + // start_times.insert(node.clone(), time); - // stack.push(RecurseState::AfterNeighbor(node)); + // visited.insert(node.clone()); - // if !visited.contains(adj) { - // edge_types.insert((node, adj), EdgeType::TreeEdge); - // stack.push(RecurseState::Before(adj)); - // } else { - // stack.push(RecurseState::BeforeNeighbor(node, adj)); + // // it is extremely important that this before the adjacencies to correctly + // // iterate over the graph + // // stack.push(RecurseState::AfterNeighbors { node }); + + // if let Some(adjacencies) = self.get_adjacencies(&node) { + // println!("adjacencies: {:?}", adjacencies); + // for adj in adjacencies.iter().rev() { + // if !visited.contains(&adj) { + // stack.push(RecurseState::End { node: adj.clone() }); + // stack.push(RecurseState::Visit { + // node: adj.clone(), + // parent: Some(node.clone()), + // }); // } // } // } // } - // RecurseState::AfterNeighbor(node) => { - // finished_nodes.insert(node); + // RecurseState::End { node } => { // time += 1; - // } - // RecurseState::BeforeNeighbor(node, adj) => { - // let start_time_node = start_times.get(node).unwrap(); - // let start_time_adj = start_times.get(adj).unwrap(); - // let end_time_node = finished_nodes.get(node).unwrap_or(&0); - // let end_time_adj = finished_nodes.get(adj).unwrap_or(&0); - - // println!( - // "Times: ({:?}, {:?}) ({:?}, {:?})", - // start_time_node, end_time_node, start_time_adj, end_time_adj - // ); - - // match ( - // start_time_node.cmp(start_time_adj), - // end_time_node.cmp(end_time_adj), - // ) { - // (Ordering::Less, Ordering::Greater) => { - // edge_types.insert((node, adj), EdgeType::ForwardEdge); - // } - // (Ordering::Greater, Ordering::Less) => { - // edge_types.insert((node, adj), EdgeType::BackEdge); - // } - // _ => { - // edge_types.insert((node, adj), EdgeType::CrossEdge); - // } - // } + // finished_nodes.insert(node.clone()); // } // } + + // println!(); + + // // println!("after:"); + // // println!("~> {:?}", stack); // } // } - // edge_types + // // progress_bar.finish(); + + // return edge_types; // } } diff --git a/src/graph/mod.rs b/src/graph/mod.rs index a8737c5..ad5c364 100644 --- a/src/graph/mod.rs +++ b/src/graph/mod.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashSet}, fmt::Debug, hash::Hash, }; @@ -7,15 +7,15 @@ use std::{ #[derive(Debug)] pub struct AdjacencyGraph where - V: Hash + Eq + Clone, + V: Clone, { - nodes: HashSet, - adjacencies: HashMap>, + nodes: BTreeSet, + adjacencies: BTreeMap>, } pub struct UndirectedGraph where - V: Hash + Eq + Clone, + V: Clone, { graph: AdjacencyGraph, } @@ -29,29 +29,97 @@ mod tests { use super::*; - #[test] - fn test_compute_edge_types() { - let mut g = AdjacencyGraph::new(); - - g.add_edge(1, 2); - g.add_edge(2, 3); - g.add_edge(3, 4); - g.add_edge(4, 1); - - let edge_types = g.compute_edge_types(); - let edge_type_dict = - edge_types - .iter() - .fold(BTreeMap::new(), |mut acc, (edge, edge_type)| { - acc.entry(edge_type).or_insert_with(Vec::new).push(edge); - acc - }); - - for (edge_type, edges) in edge_type_dict.iter() { - println!("- {:?}", edge_type); - for edge in edges { - println!("Edge: {:?}", edge); - } + fn print_edge_types(edge_types: &BTreeMap<(T, T), edge_types::EdgeType>) + where + T: Debug, + { + println!(""); + println!("Edge types:"); + + for (edge, edge_type) in edge_types { + println!("{:?} -> {:?}: {:?}", edge.0, edge.1, edge_type); } + + // for (edge_type, edges) in edge_types + // .iter() + // .fold(BTreeMap::new(), |mut acc, (edge, edge_type)| { + // acc.entry(edge_type).or_insert_with(Vec::new).push(edge); + // acc + // }) + // .iter() + // { + // println!("- {:?}", edge_type); + // for edge in edges { + // println!("{:?}", edge); + // } + // } + } + + #[test] + fn test_compute_edge_types_cycle() { + let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (2, 3), (3, 0)]); + + let edge_types = g.compute_edge_types_rec(); + print_edge_types(&edge_types); + + assert_eq!(edge_types.len(), 4); + assert_eq!(edge_types[&(0, 1)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(1, 2)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(2, 3)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(3, 0)], edge_types::EdgeType::BackEdge); + } + + #[test] + fn test_compute_edge_types_forward() { + let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 2)]); + + let edge_types = g.compute_edge_types_rec(); + print_edge_types(&edge_types); + + assert_eq!(edge_types.len(), 3); + assert_eq!(edge_types[&(0, 1)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(1, 2)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(0, 2)], edge_types::EdgeType::ForwardEdge); + } + + #[test] + fn test_compute_edge_types_cross() { + let g = AdjacencyGraph::from_edges(&[(0, 1), (1, 2), (0, 3), (3, 4), (2, 4)]); + + let edge_types = g.compute_edge_types_rec(); + print_edge_types(&edge_types); + + assert_eq!(edge_types.len(), 5); + assert_eq!(edge_types[&(0, 1)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(1, 2)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(0, 3)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(2, 4)], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&(3, 4)], edge_types::EdgeType::CrossEdge); + } + + #[test] + fn test_compute_edge_types_all() { + let g = AdjacencyGraph::from_edges(&[ + // + ("u", "v"), + ("u", "x"), + ("v", "y"), + ("y", "x"), + ("x", "v"), + ("w", "y"), + ("w", "z"), + ]); + + let edge_types = g.compute_edge_types_rec(); + print_edge_types(&edge_types); + + assert_eq!(edge_types.len(), 7); + assert_eq!(edge_types[&("u", "v")], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&("u", "x")], edge_types::EdgeType::ForwardEdge); + assert_eq!(edge_types[&("v", "y")], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&("y", "x")], edge_types::EdgeType::TreeEdge); + assert_eq!(edge_types[&("x", "v")], edge_types::EdgeType::BackEdge); + assert_eq!(edge_types[&("w", "y")], edge_types::EdgeType::CrossEdge); + assert_eq!(edge_types[&("w", "z")], edge_types::EdgeType::TreeEdge); } } diff --git a/src/main.rs b/src/main.rs index 7cfda14..8faa2c0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -94,7 +94,7 @@ fn main() -> std::io::Result<()> { // println!("Graph has cycles: {}", graph.is_cyclic()); - let edge_types = graph.compute_edge_types(); + let edge_types = graph.compute_edge_types_rec(); let edge_type_histogram: BTreeMap<_, _> = edge_types .iter()