chore: cleaned up rolling hash

main
Antonio De Lucreziis 4 weeks ago
parent 9f95680620
commit d5065151f1

@ -16,7 +16,7 @@ use gfa::{Entry, Orientation};
use graph::{AdjacencyGraph, DirectedAcyclicGraph, Graph}; use graph::{AdjacencyGraph, DirectedAcyclicGraph, Graph};
use indicatif::ProgressIterator; use indicatif::ProgressIterator;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use rolling_hash::RollingHash; use rolling_hash::RollingHasher;
#[derive(FromArgs, PartialEq, Debug)] #[derive(FromArgs, PartialEq, Debug)]
/// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024 /// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024
@ -264,7 +264,7 @@ fn compute_sequence_occurrences_rolling_hash(sequence: &str, pattern: &str) -> V
let mut occurrences = vec![]; let mut occurrences = vec![];
let mut rl = RollingHash::new(3000, 5); let mut rl = RollingHasher::new(3000, 5);
// let mut rl = RollingHash::new(1_000_000, 5); // let mut rl = RollingHash::new(1_000_000, 5);
let pattern_hash = rl.hash_pattern(&pattern.chars().map(letter_to_number).collect::<Vec<_>>()); let pattern_hash = rl.hash_pattern(&pattern.chars().map(letter_to_number).collect::<Vec<_>>());

@ -1,6 +1,6 @@
use std::{collections::VecDeque, fmt::Debug}; use std::{collections::VecDeque, fmt::Debug};
pub struct RollingHash<T: Into<u64> + Clone> { pub struct RollingHasher<T: Into<u64> + Clone> {
modulus: u64, modulus: u64,
alphabet_size: u64, alphabet_size: u64,
@ -15,27 +15,12 @@ pub struct Hashed {
offset: u64, offset: u64,
} }
fn wrapping_pow_correct(a: u64, b: u64) -> u64 { impl<T> RollingHasher<T>
// // println!("Wrapping pow: {}^{}", a, b);
// let mut result = 1u64;
// for _ in 0..b {
// result = result.wrapping_mul(a);
// }
// // println!("=> {}", result);
// result
a.wrapping_pow(b as u32)
}
impl<T> RollingHash<T>
where where
T: Into<u64> + Clone + Debug, T: Into<u64> + Clone + Debug,
{ {
pub fn new(modulus: u64, alphabet_size: u64) -> Self { pub fn new(modulus: u64, alphabet_size: u64) -> Self {
RollingHash { RollingHasher {
modulus, modulus,
alphabet_size, alphabet_size,
@ -45,10 +30,6 @@ where
} }
} }
// pub fn hash(&self) -> u64 {
// self.hash % self.modulus
// }
pub fn hash(&self) -> Hashed { pub fn hash(&self) -> Hashed {
Hashed { Hashed {
hash: self.hash % self.modulus, hash: self.hash % self.modulus,
@ -57,8 +38,6 @@ where
} }
pub fn compare(&self, lhs: &Hashed, rhs: &Hashed) -> bool { pub fn compare(&self, lhs: &Hashed, rhs: &Hashed) -> bool {
// println!("Comparing: {:?} {:?}", lhs, rhs);
let (lhs, rhs) = if lhs.offset < rhs.offset { let (lhs, rhs) = if lhs.offset < rhs.offset {
(lhs, rhs) (lhs, rhs)
} else { } else {
@ -66,10 +45,10 @@ where
}; };
// Shift lhs to the right by the difference in offsets // Shift lhs to the right by the difference in offsets
let shifted_lhs = (lhs.hash.wrapping_mul(wrapping_pow_correct( let shifted_lhs = (lhs.hash.wrapping_mul(
self.alphabet_size, self.alphabet_size
rhs.offset - lhs.offset, .wrapping_pow((rhs.offset - lhs.offset) as u32),
))) % self.modulus; )) % self.modulus;
shifted_lhs == rhs.hash shifted_lhs == rhs.hash
} }
@ -78,8 +57,7 @@ where
let mut hash = 0; let mut hash = 0;
for (i, value) in pattern.iter().enumerate() { for (i, value) in pattern.iter().enumerate() {
let char_hash = let char_hash = value.clone().into() * self.alphabet_size.wrapping_pow(i as u32);
value.clone().into() * wrapping_pow_correct(self.alphabet_size, i as u64);
hash += char_hash; hash += char_hash;
} }
@ -91,20 +69,10 @@ where
self.current_word.push_back(value.clone()); self.current_word.push_back(value.clone());
let i = self.offset + (self.current_word.len() as u64) - 1; let i = self.offset + (self.current_word.len() as u64) - 1;
// println!("Alphabet size: {}", self.alphabet_size);
// println!("Index: {}", i);
// println!(
// "Adding: {:?} * {} to {}",
// value,
// wrapping_pow_correct(self.alphabet_size, i),
// self.hash
// );
self.hash = self.hash.wrapping_add( self.hash = self.hash.wrapping_add(
value value
.into() .into()
.wrapping_mul(wrapping_pow_correct(self.alphabet_size, i)), .wrapping_mul(self.alphabet_size.wrapping_pow(i as u32)),
); );
} }
@ -116,7 +84,7 @@ where
self.hash = self.hash.wrapping_sub( self.hash = self.hash.wrapping_sub(
value value
.into() .into()
.wrapping_mul(wrapping_pow_correct(self.alphabet_size, i)), .wrapping_mul(self.alphabet_size.wrapping_pow(i as u32)),
); );
self.offset += 1; self.offset += 1;
@ -137,7 +105,7 @@ where
panic!("Invalid position"); panic!("Invalid position");
} }
(hash * wrapping_pow_correct(self.alphabet_size, diff as u64)) % self.modulus (hash * self.alphabet_size.wrapping_pow(diff as u32)) % self.modulus
} }
pub fn hash_value_at_caret(&self, h: &Hashed) -> u64 { pub fn hash_value_at_caret(&self, h: &Hashed) -> u64 {
@ -156,7 +124,7 @@ mod tests {
let modulus = 42; let modulus = 42;
let alphabet_size = 4; let alphabet_size = 4;
let mut rh = RollingHash::<u64>::new(modulus, alphabet_size); let mut rh = RollingHasher::<u64>::new(modulus, alphabet_size);
let initial_pattern_hash = rh.hash_pattern(&[1, 2, 3, 4, 5]); let initial_pattern_hash = rh.hash_pattern(&[1, 2, 3, 4, 5]);
println!("Initial pattern hash: {:?}", initial_pattern_hash); println!("Initial pattern hash: {:?}", initial_pattern_hash);
@ -206,7 +174,7 @@ mod tests {
let modulus = 10_000_000; let modulus = 10_000_000;
let alphabet_size = 2; let alphabet_size = 2;
let mut rh = RollingHash::<u64>::new(modulus, alphabet_size); let mut rh = RollingHasher::<u64>::new(modulus, alphabet_size);
let initial_pattern_hash = rh.hash_pattern(&[1, 1, 1, 1]); let initial_pattern_hash = rh.hash_pattern(&[1, 1, 1, 1]);
@ -226,22 +194,4 @@ mod tests {
rh.hash_value_at(&initial_pattern_hash, rh.offset) rh.hash_value_at(&initial_pattern_hash, rh.offset)
); );
} }
#[test]
fn test_wrappping_pow() {
println!("Wrapping pow test");
let a = 2;
let b = 3;
let result = wrapping_pow_correct(a, b);
assert_eq!(result, 8);
let a = 3;
let b = 100;
let result = wrapping_pow_correct(a, b);
assert_ne!(result, 0);
}
} }

Loading…
Cancel
Save