mirror of https://github.com/aziis98/asd-2024.git
initial commit
commit
c897945e06
@ -0,0 +1,5 @@
|
||||
# Cargo
|
||||
/target
|
||||
|
||||
# Local files
|
||||
*.local*
|
@ -0,0 +1,96 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "argh"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7af5ba06967ff7214ce4c7419c7d185be7ecd6cc4965a8f6e1d8ce0398aad219"
|
||||
dependencies = [
|
||||
"argh_derive",
|
||||
"argh_shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "argh_derive"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "56df0aeedf6b7a2fc67d06db35b09684c3e8da0c95f8f27685cb17e08413d87a"
|
||||
dependencies = [
|
||||
"argh_shared",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "argh_shared"
|
||||
version = "0.1.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5693f39141bda5760ecc4111ab08da40565d1771038c4a0250f03457ec707531"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "asd-2024-gfa"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"argh",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.83"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.202"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.202"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.65"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "asd-2024-gfa"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
argh = "0.1.12"
|
@ -0,0 +1,48 @@
|
||||
# Progetto ASD 2023/2024
|
||||
|
||||
## Example GFA
|
||||
|
||||
```
|
||||
H VN:Z:1.0
|
||||
S 11 G
|
||||
S 12 A
|
||||
S 13 T
|
||||
S 14 T
|
||||
S 15 A
|
||||
S 16 C
|
||||
S 17 A
|
||||
S 21 G
|
||||
S 22 A
|
||||
S 23 T
|
||||
S 24 T
|
||||
S 25 A
|
||||
L 11 + 12 + *
|
||||
L 11 + 13 + *
|
||||
L 12 + 14 + *
|
||||
L 13 + 14 + *
|
||||
L 14 + 15 + *
|
||||
L 14 + 16 + *
|
||||
L 15 + 17 + *
|
||||
L 16 + 17 + *
|
||||
L 21 + 22 + *
|
||||
L 21 + 23 + *
|
||||
L 22 + 24 + *
|
||||
L 23 + 24 - *
|
||||
L 24 + 25 + *
|
||||
P A 11+,12+,14+,15+,17+ *,*,*,*
|
||||
P B 21+,22+,24+,25+ *,*,*
|
||||
W sample 1 A 0 5 >11>12>14>15>17
|
||||
W sample 2 A 0 5 >11>13>14>16>17
|
||||
W sample 1 B 0 5 >21>22>24<23<21
|
||||
W sample 2 B 0 4 >21>22>24>25
|
||||
```
|
||||
|
||||
## Note
|
||||
|
||||
- Documentazione del formato GFA: http://gfa-spec.github.io/GFA-spec/GFA1.html
|
||||
|
||||
- Data set:
|
||||
- [example.gfa](https://github.com/jltsiren/gbwt-rs/blob/main/test-data/example.gfa)
|
||||
- [drb1.gfa](https://github.com/pangenome/odgi/blob/master/test/DRB1-3123_unsorted.gfa)
|
||||
- [chrY.gfa](https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chrY.hprc-v1.0-pggb.gfa.gz)
|
||||
- [chrX.gfa](https://s3-us-west-2.amazonaws.com/human-pangenomics/pangenomes/freeze/freeze1/pggb/chroms/chrX.hprc-v1.0-pggb.gfa.gz)
|
@ -0,0 +1,32 @@
|
||||
H VN:Z:1.0
|
||||
S 11 G
|
||||
S 12 A
|
||||
S 13 T
|
||||
S 14 T
|
||||
S 15 A
|
||||
S 16 C
|
||||
S 17 A
|
||||
S 21 G
|
||||
S 22 A
|
||||
S 23 T
|
||||
S 24 T
|
||||
S 25 A
|
||||
L 11 + 12 + *
|
||||
L 11 + 13 + *
|
||||
L 12 + 14 + *
|
||||
L 13 + 14 + *
|
||||
L 14 + 15 + *
|
||||
L 14 + 16 + *
|
||||
L 15 + 17 + *
|
||||
L 16 + 17 + *
|
||||
L 21 + 22 + *
|
||||
L 21 + 23 + *
|
||||
L 22 + 24 + *
|
||||
L 23 + 24 - *
|
||||
L 24 + 25 + *
|
||||
P A 11+,12+,14+,15+,17+ *,*,*,*
|
||||
P B 21+,22+,24+,25+ *,*,*
|
||||
W sample 1 A 0 5 >11>12>14>15>17
|
||||
W sample 2 A 0 5 >11>13>14>16>17
|
||||
W sample 1 B 0 5 >21>22>24<23<21
|
||||
W sample 2 B 0 4 >21>22>24>25
|
@ -0,0 +1,40 @@
|
||||
use argh::FromArgs;
|
||||
|
||||
mod parser;
|
||||
|
||||
#[derive(FromArgs, PartialEq, Debug)]
|
||||
/// Strumento CLI per il progetto di Algoritmi e Strutture Dati 2024
|
||||
struct CliTool {
|
||||
#[argh(subcommand)]
|
||||
nested: MySubCommandEnum,
|
||||
}
|
||||
|
||||
#[derive(FromArgs, PartialEq, Debug)]
|
||||
#[argh(subcommand)]
|
||||
enum MySubCommandEnum {
|
||||
Show(CommandShow),
|
||||
}
|
||||
|
||||
#[derive(FromArgs, PartialEq, Debug)]
|
||||
/// Parse and show the content of a file
|
||||
#[argh(subcommand, name = "show")]
|
||||
struct CommandShow {
|
||||
#[argh(option, short = 'i')]
|
||||
/// file to read
|
||||
input: String,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let opts = argh::from_env::<CliTool>();
|
||||
|
||||
match opts.nested {
|
||||
MySubCommandEnum::Show(show) => {
|
||||
let file = std::fs::read_to_string(show.input).expect("cannot read file");
|
||||
let entries = parser::parse_source(file.as_str());
|
||||
|
||||
for entry in entries {
|
||||
println!("{:?}", entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,184 @@
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Orientation {
|
||||
Forward,
|
||||
Reverse,
|
||||
}
|
||||
|
||||
impl From<&str> for Orientation {
|
||||
fn from(s: &str) -> Self {
|
||||
match s {
|
||||
"+" => Orientation::Forward,
|
||||
">" => Orientation::Forward,
|
||||
"-" => Orientation::Reverse,
|
||||
"<" => Orientation::Reverse,
|
||||
_ => panic!("Invalid orientation: {}", s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Entry {
|
||||
Header {
|
||||
version: String,
|
||||
},
|
||||
Segment {
|
||||
id: String,
|
||||
sequence: String,
|
||||
},
|
||||
Link {
|
||||
from: String,
|
||||
from_orient: Orientation,
|
||||
to: String,
|
||||
to_orient: Orientation,
|
||||
},
|
||||
Path {
|
||||
name: String,
|
||||
segments: Vec<(String, Orientation)>,
|
||||
},
|
||||
Walk {
|
||||
sample: String,
|
||||
|
||||
haplotype_index: usize,
|
||||
seq_id: String,
|
||||
seq_start: usize,
|
||||
seq_end: usize,
|
||||
|
||||
segments: Vec<(String, Orientation)>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Parse a line of the source file into a Header struct
|
||||
///
|
||||
/// ```txt
|
||||
/// H VN:Z:1.0
|
||||
/// ```
|
||||
///
|
||||
fn parse_header(line: &str) -> Entry {
|
||||
let columns: Vec<&str> = line.split(':').collect();
|
||||
|
||||
Entry::Header {
|
||||
version: columns[2].to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a line of the source file into a Segment struct
|
||||
///
|
||||
/// ```txt
|
||||
/// S 1 ACGT
|
||||
/// ```
|
||||
fn parse_segment(line: &str) -> Entry {
|
||||
let columns: Vec<&str> = line.split('\t').collect();
|
||||
|
||||
Entry::Segment {
|
||||
id: columns[1].to_string(),
|
||||
sequence: columns[2].to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a line of the source file into a Link struct
|
||||
///
|
||||
/// ```txt
|
||||
/// L 1 + 2 - 3M
|
||||
/// ```
|
||||
fn parse_link(line: &str) -> Entry {
|
||||
let columns: Vec<&str> = line.split('\t').collect();
|
||||
|
||||
Entry::Link {
|
||||
from: columns[1].to_string(),
|
||||
from_orient: columns[2].into(),
|
||||
to: columns[3].to_string(),
|
||||
to_orient: columns[4].into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a line of the source file into a Path struct
|
||||
///
|
||||
/// ```txt
|
||||
/// P A 11+,12+,14+,15-,17+ *,*,*,*
|
||||
/// ```
|
||||
fn parse_path(line: &str) -> Entry {
|
||||
let columns: Vec<&str> = line.split('\t').collect();
|
||||
|
||||
Entry::Path {
|
||||
name: columns[1].to_string(),
|
||||
segments: columns[2]
|
||||
.split(',')
|
||||
.map(|s| {
|
||||
let (name, orient) = s.split_at(s.len() - 1);
|
||||
(name.to_string(), orient.into())
|
||||
})
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_path_segments(s: &str) -> Vec<(String, Orientation)> {
|
||||
let mut result = Vec::new();
|
||||
let mut rest = s;
|
||||
|
||||
loop {
|
||||
println!("Rest: {}", rest);
|
||||
|
||||
let r = rest;
|
||||
|
||||
let (orient, r) = r.split_at(1);
|
||||
let (name, r) = r.split_at(r.find(['<', '>']).unwrap_or(r.len()));
|
||||
|
||||
rest = r;
|
||||
result.push((name.to_string(), orient.into()));
|
||||
|
||||
if rest.is_empty() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Parse a line of the source file into a Walk struct
|
||||
///
|
||||
/// ```txt
|
||||
/// W sample 1 A 0 5 >11>12>14>15>17
|
||||
/// ```
|
||||
fn parse_walk(line: &str) -> Entry {
|
||||
let columns: Vec<&str> = line.split('\t').collect();
|
||||
|
||||
Entry::Walk {
|
||||
sample: columns[1].to_string(),
|
||||
|
||||
haplotype_index: usize::from_str(columns[2]).unwrap(),
|
||||
seq_id: columns[3].to_string(),
|
||||
seq_start: usize::from_str(columns[4]).unwrap(),
|
||||
seq_end: usize::from_str(columns[5]).unwrap(),
|
||||
|
||||
segments: parse_path_segments(columns[6]),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_source(source: &str) -> Vec<Entry> {
|
||||
let mut entries = Vec::new();
|
||||
for line in source.lines() {
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line.starts_with('#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
println!("Parsing: {}", line);
|
||||
|
||||
let first_char = line.chars().next().unwrap();
|
||||
let entry = match first_char {
|
||||
'H' => parse_header(line),
|
||||
'S' => parse_segment(line),
|
||||
'L' => parse_link(line),
|
||||
'P' => parse_path(line),
|
||||
'W' => parse_walk(line),
|
||||
_ => {
|
||||
eprintln!("Unknown line type: {}", line);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
entries.push(entry);
|
||||
}
|
||||
entries
|
||||
}
|
Loading…
Reference in New Issue