From f19862206e86bf4eed09808d669183f64c472551 Mon Sep 17 00:00:00 2001 From: Antonio De Lucreziis Date: Sat, 19 Oct 2024 02:04:26 +0200 Subject: [PATCH] working cycle detection and edge classification --- src/adv_graph.rs | 16 ++++ src/graph.rs | 214 ++++++++++++++++++++++++++++++++++++++++++++++- src/lib.rs | 3 + src/main.rs | 34 ++++++-- src/parser.rs | 20 ++++- src/utils.rs | 1 + 6 files changed, 277 insertions(+), 11 deletions(-) create mode 100644 src/adv_graph.rs create mode 100644 src/utils.rs diff --git a/src/adv_graph.rs b/src/adv_graph.rs new file mode 100644 index 0000000..9264504 --- /dev/null +++ b/src/adv_graph.rs @@ -0,0 +1,16 @@ +use std::{collections::HashMap, hash::Hash}; + +struct GraphEdge { + from: u32, + to: u32, +} + +pub struct Graph +where + V: Hash + Eq + Clone, +{ + nodes: HashMap, + edges: HashMap<(u32, u32), E>, + + adjacency_list: HashMap>, +} diff --git a/src/graph.rs b/src/graph.rs index 346fcd7..4b0144a 100644 --- a/src/graph.rs +++ b/src/graph.rs @@ -1,12 +1,20 @@ use std::{ cell::RefCell, - collections::{HashMap, HashSet, VecDeque}, + collections::{BTreeMap, HashMap, HashSet, VecDeque}, fmt::Debug, hash::Hash, rc::Rc, }; -use indicatif::ProgressIterator; +use indicatif::{ProgressBar, ProgressIterator}; + +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub enum EdgeType { + TreeEdge, + BackEdge, + ForwardEdge, + CrossEdge, +} #[derive(Debug)] pub struct AdjacencyGraph @@ -109,10 +117,171 @@ where return Some(node.clone()); } + None }) } + /// This computes if this undirected graph is cyclic or not by searching for an oriented cycle in the graph + pub fn is_cyclic(&self) -> bool { + let mut remaining_nodes = self.nodes.iter().collect::>(); + + // let progress_bar = ProgressBar::new(self.nodes.len() as u64); + // let mut visited_count = 0; + + while !remaining_nodes.is_empty() { + let start: &V = remaining_nodes.iter().next().unwrap(); + + // visited_count += 1; + remaining_nodes.remove(start); + // progress_bar.inc(1); + + let mut dfs_visited = HashSet::new(); + let mut stack = VecDeque::new(); + stack.push_back(start); + + // start a new dfs from the current node + while let Some(node) = stack.pop_back() { + if dfs_visited.contains(node) { + // println!("Found cycle after {} nodes", visited_count); + // progress_bar.finish(); + return true; + } + + // visited_count += 1; + remaining_nodes.remove(node); + // progress_bar.inc(1); + + dfs_visited.insert(node.clone()); + + if let Some(adjacencies) = self.get_adjacencies(node) { + stack.extend(adjacencies); + } + } + } + + // println!("Found cycle after {} nodes", visited_count); + // progress_bar.finish(); + false + } + + pub fn compute_edge_types(&self) -> HashMap<(&V, &V), EdgeType> { + /// To correctly compute the start and end times of the nodes in the graph, we need to keep do work before and after the recursion + /// call + enum RecurseState { + Before, + AfterNeighbor, + } + + let mut edge_types = HashMap::new(); + + let mut visited = HashSet::new(); + let mut start_times = HashMap::new(); + let mut end_times = HashMap::new(); + + let mut time = 0; + + let progress_bar = ProgressBar::new(self.nodes.len() as u64); + + for node in self.nodes.iter() { + if visited.contains(node) { + continue; + } + + let mut stack = Vec::new(); + + stack.push((node, RecurseState::Before)); + + while let Some((node, state)) = stack.pop() { + match state { + RecurseState::Before => { + progress_bar.inc(1); + visited.insert(node.clone()); + start_times.insert(node, time); + time += 1; + + // this is extremely important that is before the adjacencies to correctly + // iterate over the graph + + if let Some(adjacencies) = self.get_adjacencies(node) { + for adj in adjacencies { + // if visited.contains(adj) { + // if start_times.get(adj) < start_times.get(node) { + // edge_types.insert((node, adj), EdgeType::BackEdge); + // } else { + // edge_types.insert((node, adj), EdgeType::CrossEdge); + // } + // } else { + // edge_types.insert((node, adj), EdgeType::ForwardEdge); + // stack.push((adj, RecurseState::Before)); + // } + + stack.push((node, RecurseState::AfterNeighbor)); + + if !visited.contains(adj) { + edge_types.insert((node, adj), EdgeType::TreeEdge); + stack.push((adj, RecurseState::Before)); + } else { + let start_time_node = start_times.get(node).unwrap(); + let start_time_adj = start_times.get(adj).unwrap(); + let end_time_node = end_times.get(node).unwrap_or(&0); + let end_time_adj = end_times.get(adj).unwrap_or(&0); + + if start_time_node < start_time_adj + && end_time_node > end_time_adj + { + edge_types.insert((node, adj), EdgeType::ForwardEdge); + } else if start_time_node > start_time_adj + && end_time_node < end_time_adj + { + edge_types.insert((node, adj), EdgeType::BackEdge); + // } else if start_time_node > start_time_adj + // && end_time_node > end_time_adj + // { + // edge_types.insert((node, adj), EdgeType::CrossEdge); + } else { + edge_types.insert((node, adj), EdgeType::CrossEdge); + } + } + } + } + } + RecurseState::AfterNeighbor => { + end_times.insert(node, time); + time += 1; + } + } + } + } + + // for node in self.nodes.iter() { + // let mut stack = Vec::new(); + + // if visited.contains(node) { + // continue; + // } + + // stack.push(node); + + // while let Some(node) = stack.pop() { + // visited.insert(node.clone()); + + // if let Some(adjacencies) = self.get_adjacencies(node) { + // for adj in adjacencies { + // if visited.contains(adj) { + // // ... + // } else { + // edge_types.insert((node, adj), EdgeType::TreeEdge); + // stack.push(adj); + // } + // } + // } + // } + // } + + edge_types + } + pub fn shortest_path_matrix(&self) -> HashMap<&V, HashMap<&V, usize>> { let mut result = HashMap::new(); @@ -270,4 +439,45 @@ where result } + + /// This function prints the number of nodes, edges and a histogram of the degrees of the nodes + /// in the graph (computing the degrees might take a long time) + pub fn print_stats(&self) { + let mut vertices_degrees = HashMap::new(); + + for (from, tos) in self + .adjacencies + .iter() + .progress() + .with_style( + indicatif::ProgressStyle::default_bar() + .template("{prefix} {spinner} [{elapsed_precise}] [{wide_bar}] {pos}/{len}") + .unwrap(), + ) + .with_prefix("computing nodes degrees") + { + *vertices_degrees.entry(from).or_insert(0) += tos.len(); + + for to in tos { + *vertices_degrees.entry(to).or_insert(0) += 1; + } + } + + let histogram: BTreeMap = vertices_degrees + .iter() + .map(|(_, degree)| *degree) + .fold(BTreeMap::new(), |mut acc, degree| { + *acc.entry(degree).or_insert(0) += 1; + acc + }); + + println!("Stats:"); + println!("Nodes: {}", self.nodes.len()); + println!("Edges: {}", self.edges().count()); + + println!("Histogram:"); + for (degree, count) in histogram.iter() { + println!("{}: {}", degree, count); + } + } } diff --git a/src/lib.rs b/src/lib.rs index d466829..6865f44 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,7 @@ +pub mod adv_graph; pub mod gfa; pub mod graph; pub mod graph_2; pub mod parser; + +mod utils; diff --git a/src/main.rs b/src/main.rs index b3f1ef0..8fa440a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use std::{ - collections::HashMap, + collections::{BTreeMap, HashMap}, io::{BufRead, BufReader}, }; @@ -16,12 +16,12 @@ mod parser; /// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024 struct CliTool { #[argh(subcommand)] - nested: MySubCommandEnum, + nested: CliSubcommands, } #[derive(FromArgs, PartialEq, Debug)] #[argh(subcommand)] -enum MySubCommandEnum { +enum CliSubcommands { Show(CommandShow), } @@ -38,10 +38,12 @@ fn main() -> std::io::Result<()> { let opts = argh::from_env::(); match opts.nested { - MySubCommandEnum::Show(show) => { + CliSubcommands::Show(show) => { let file_lines_count = BufReader::new(std::fs::File::open(&show.input)?) .lines() - .progress_with(indicatif::ProgressBar::new_spinner().with_message("counting lines")) + .progress_with( + indicatif::ProgressBar::new_spinner().with_message("estimating line count"), + ) .count() as u64; let file = std::fs::File::open(show.input)?; @@ -84,10 +86,28 @@ fn main() -> std::io::Result<()> { // ); // } - let cc = graph.compute_ccs(); + // let cc = graph.compute_ccs(); // println!("CCs: {:?}", cc); - println!("Number of connected components: {}", cc.len()); + // println!("Number of connected components: {}", cc.len()); + + // graph.print_stats(); + + println!("Graph has cycles: {}", graph.is_cyclic()); + + let edge_types = graph.compute_edge_types(); + + let edge_type_histogram: BTreeMap<_, _> = edge_types + .iter() + .map(|(_, edge_type)| edge_type) + .fold(BTreeMap::new(), |mut acc, edge_type| { + *acc.entry(edge_type).or_insert(0) += 1; + acc + }); + + println!("Edge types histogram: {:?}", edge_type_histogram); + + println!("Cleaning up..."); } } diff --git a/src/parser.rs b/src/parser.rs index 6a93672..5938c89 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -174,8 +174,24 @@ pub fn parse_source(reader: R, line_count: u64) -> io::Result 1 { + eprintln!("skipped {} lines of type: {}", count, s); + } else { + eprintln!("skipped line type: {}", s); + } } Ok(entries) diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1 @@ +