diff --git a/Cargo.toml b/Cargo.toml index 268128f4..79bc8c20 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,7 +36,7 @@ log = "0.4" ordered-float = "3.4.0" rayon = "1.5" regex = "1.5.4" -rust-htslib = "0.43.1" +rust-htslib = "0.44.1" serde = {version = "1.0.104", features = ["derive"]} serde_json = "1.0.48" spin = "0.9.8" diff --git a/bamlift/Cargo.toml b/bamlift/Cargo.toml index ef0d74dc..932353d6 100644 --- a/bamlift/Cargo.toml +++ b/bamlift/Cargo.toml @@ -16,4 +16,4 @@ version = "0.1.0" #rayon = "1.5" env_logger = "0.9.0" log = "0.4" -rust-htslib = "0.43.1" +rust-htslib = "0.44.1" diff --git a/bio-io/Cargo.toml b/bio-io/Cargo.toml index 81f663dd..5d735432 100644 --- a/bio-io/Cargo.toml +++ b/bio-io/Cargo.toml @@ -20,4 +20,4 @@ lazy_static = "1.4.0" log = "0.4" niffler = {version = "2.5.0", default-features = false, features = ["gz"]} regex = "1.5.4" -rust-htslib = "0.43.1" +rust-htslib = "0.44.1" diff --git a/src/basemods/mod.rs b/src/basemods/mod.rs index e4798a47..22c47e5e 100644 --- a/src/basemods/mod.rs +++ b/src/basemods/mod.rs @@ -8,6 +8,7 @@ use rust_htslib::{ bam, bam::record::{Aux, AuxArray}, }; + use std::convert::TryFrom; #[derive(Eq, PartialEq, Debug)] @@ -96,6 +97,12 @@ pub struct BaseMods { impl BaseMods { pub fn new(record: &bam::Record, min_ml_score: u8) -> BaseMods { + //let new = BaseMods::new2(record, min_ml_score); + //assert_eq!(new, old); + BaseMods::my_mm_ml_parser(record, min_ml_score) + } + + pub fn my_mm_ml_parser(record: &bam::Record, min_ml_score: u8) -> BaseMods { // regex for matching the MM tag lazy_static! { static ref MM_RE: Regex = @@ -207,6 +214,88 @@ impl BaseMods { BaseMods { base_mods: rtn } } + /// this is actually way slower than my parser for some reason... + /// so I am not going to use it + fn _rust_htslib_mm_ml_parser(record: &bam::Record, min_ml_score: u8) -> BaseMods { + let mut rtn = vec![]; + log::debug!("{:?} {} ", record.qname(), record.is_reverse()); + + if let Ok(mods) = record.basemods_iter() { + // data from current iteration + let mut modified_bases_forward = vec![]; + let mut modified_probabilities_forward = vec![]; + let mut pre_mod_base = b' '; + let mut pre_mod_strand = ' '; + let mut pre_mod_type = ' '; + // loop over all the base mods base by base + for bp in mods { + let (mut pos, m) = bp.unwrap(); + if record.is_reverse() { + pos = record.seq().len() as i32 - pos - 1; + }; + let ml = m.qual as u8; + let mod_type = m.modified_base as u8 as char; + let mod_strand = if m.strand == 0 { '+' } else { '-' }; + let mod_base = m.canonical_base as u8; + // return a completed basemod + if (mod_type != pre_mod_type + || mod_strand != pre_mod_strand + || mod_base != pre_mod_base) + && pre_mod_type != ' ' + { + // need to flip if we are reversed + if record.is_reverse() { + modified_bases_forward.reverse(); + modified_probabilities_forward.reverse(); + } + let cur_mod = BaseMod::new( + record, + pre_mod_base, + pre_mod_strand, + pre_mod_type, + modified_bases_forward, + modified_probabilities_forward, + ); + // push the completed modifications if they are real + rtn.push(cur_mod); + // reset the current modification + modified_bases_forward = vec![]; + modified_probabilities_forward = vec![]; + } + // add the current position to the modification + if ml >= min_ml_score { + modified_bases_forward.push(pos as i64); + modified_probabilities_forward.push(ml); + } + // reset the previous modification + pre_mod_base = mod_base; + pre_mod_strand = mod_strand; + pre_mod_type = mod_type; + } + // push the last modification + if !modified_bases_forward.is_empty() { + // need to flip if we are reversed + if record.is_reverse() { + modified_bases_forward.reverse(); + modified_probabilities_forward.reverse(); + } + + let cur_mod = BaseMod::new( + record, + pre_mod_base, + pre_mod_strand, + pre_mod_type, + modified_bases_forward, + modified_probabilities_forward, + ); + rtn.push(cur_mod); + } + } else { + log::debug!("No MM tag found"); + }; + BaseMods { base_mods: rtn } + } + /// remove m6a base mods from the struct pub fn drop_m6a(&mut self) { self.base_mods.retain(|bm| !bm.is_m6a());