From dd8dcb3d217459a0578bee996729f12639b7894a Mon Sep 17 00:00:00 2001 From: Ryan Wick Date: Mon, 20 Jan 2025 11:26:51 +1100 Subject: [PATCH] Use bright colour for non-consentigs in cleaned graphs --- src/clean.rs | 2 +- src/cluster.rs | 2 +- src/combine.rs | 2 +- src/compress.rs | 2 +- src/graph_simplification.rs | 8 ++--- src/resolve.rs | 16 +++++---- src/table.rs | 6 ++-- src/tests.rs | 4 +-- src/trim.rs | 2 +- src/unitig.rs | 71 +++++++++++++++++++++---------------- src/unitig_graph.rs | 5 +-- 11 files changed, 66 insertions(+), 54 deletions(-) diff --git a/src/clean.rs b/src/clean.rs index 52ad8d3..717dbab 100644 --- a/src/clean.rs +++ b/src/clean.rs @@ -36,7 +36,7 @@ pub fn clean(in_gfa: PathBuf, out_gfa: PathBuf, remove: Option, duplicat duplicate_tigs(&mut graph, &duplicate); } merge_graph(&mut graph); - graph.save_gfa(&out_gfa, &vec![]).unwrap(); + graph.save_gfa(&out_gfa, &vec![], true).unwrap(); finished_message(&out_gfa); } diff --git a/src/cluster.rs b/src/cluster.rs index d7f2d6d..6ecbbc8 100644 --- a/src/cluster.rs +++ b/src/cluster.rs @@ -794,7 +794,7 @@ fn save_cluster_gfa(sequences: &[Sequence], cluster_num: u16, gfa_lines: &Vec, combined_gfa: &Path, combined_fasta: let circ = if unitig.is_isolated_and_circular() { " circular=true".to_string() } else { "".to_string() }; let depth_tag = format!("\tDP:f:{:.2}", unitig.depth); - let mut colour_tag = unitig.colour_tag(); + let mut colour_tag = unitig.colour_tag(true); if colour_tag.is_empty() { colour_tag = "\tCL:z:orangered".to_string(); } diff --git a/src/compress.rs b/src/compress.rs index 1032736..e7f32a6 100644 --- a/src/compress.rs +++ b/src/compress.rs @@ -43,7 +43,7 @@ pub fn compress(assemblies_dir: PathBuf, autocycler_dir: PathBuf, k_size: u32, t simplify_unitig_graph(&mut unitig_graph, &sequences); let out_gfa = autocycler_dir.join("input_assemblies.gfa"); let out_yaml = autocycler_dir.join("input_assemblies.yaml"); - unitig_graph.save_gfa(&out_gfa, &sequences).unwrap(); + unitig_graph.save_gfa(&out_gfa, &sequences, false).unwrap(); save_metrics(&mut metrics, assembly_count, &sequences, &unitig_graph, &out_yaml); finished_message(start_time, out_gfa, out_yaml); } diff --git a/src/graph_simplification.rs b/src/graph_simplification.rs index 4703c4a..4244ce0 100644 --- a/src/graph_simplification.rs +++ b/src/graph_simplification.rs @@ -19,7 +19,7 @@ use std::rc::Rc; use crate::misc::{reverse_complement, strand}; use crate::position::Position; use crate::sequence::Sequence; -use crate::unitig::{Unitig, UnitigStrand}; +use crate::unitig::{Unitig, UnitigStrand, UnitigType}; use crate::unitig_graph::UnitigGraph; @@ -438,8 +438,8 @@ fn merge_path(graph: &mut UnitigGraph, path: &Vec, new_unitig_numb ..Default::default() }; - if path.iter().any(|p| p.anchor()) { - unitig.set_as_consentig(); + if path.iter().any(|p| p.is_anchor() || p.is_consentig()) { + unitig.unitig_type = UnitigType::Consentig; } let unitig_rc = Rc::new(RefCell::new(unitig)); @@ -508,7 +508,7 @@ fn get_merge_path_depth(path: &Vec, forward_positions: &[Position] // If the path contains an anchor unitig, set the merged depth to the anchor's depth. for u in path { - if u.anchor() { + if u.is_anchor() { return u.depth(); } } diff --git a/src/resolve.rs b/src/resolve.rs index f707071..09bcc8e 100644 --- a/src/resolve.rs +++ b/src/resolve.rs @@ -24,7 +24,7 @@ use crate::log::{section_header, explanation}; use crate::misc::{check_if_dir_exists, check_if_file_exists, reverse_path, load_file_lines, sign_at_end, sign_at_end_vec}; use crate::sequence::Sequence; -use crate::unitig::Unitig; +use crate::unitig::{Unitig, UnitigType}; use crate::unitig_graph::UnitigGraph; @@ -49,9 +49,9 @@ pub fn resolve(cluster_dir: PathBuf, verbose: bool) { apply_unique_message(); apply_bridges(&mut unitig_graph, &bridges, bridge_depth); - unitig_graph.save_gfa(&bridged_gfa, &vec![]).unwrap(); + unitig_graph.save_gfa(&bridged_gfa, &vec![], false).unwrap(); merge_after_bridging(&mut unitig_graph); - unitig_graph.save_gfa(&merged_gfa, &vec![]).unwrap(); + unitig_graph.save_gfa(&merged_gfa, &vec![], false).unwrap(); let cull_count = cull_ambiguity(&mut bridges, verbose); if cull_count > 0 { @@ -62,7 +62,7 @@ pub fn resolve(cluster_dir: PathBuf, verbose: bool) { } else { eprintln!("All bridges were unique, no culling necessary.\n"); } - unitig_graph.save_gfa(&final_gfa, &vec![]).unwrap(); + unitig_graph.save_gfa(&final_gfa, &vec![], true).unwrap(); finished_message(&final_gfa); } @@ -118,7 +118,8 @@ fn load_graph(gfa_lines: &Vec, print_info: bool, let (unitig_graph, sequences) = UnitigGraph::from_gfa_lines(gfa_lines); if let Some(anchors) = anchors { for num in anchors { - unitig_graph.unitig_index.get(num).unwrap().borrow_mut().anchor = true; + unitig_graph.unitig_index.get(num).unwrap() + .borrow_mut().unitig_type = UnitigType::Anchor; } } if print_info { @@ -141,7 +142,7 @@ fn find_anchor_unitigs(graph: &mut UnitigGraph, sequences: &[Sequence]) -> Vec = unitig.forward_positions.iter().map(|p| p.seq_id()).collect(); forward_seq_ids.sort(); if forward_seq_ids == all_seq_ids { - unitig.anchor = true; + unitig.unitig_type = UnitigType::Anchor; anchor_ids.push(unitig.number); } } @@ -260,7 +261,8 @@ fn reduce_depths(graph: &mut UnitigGraph, bridge: &Bridge) { fn delete_unitigs_not_connected_to_anchor(graph: &mut UnitigGraph) { let to_delete: HashSet = graph.connected_components().into_iter() .filter_map(|component| { - if component.iter().all(|&num| !graph.unitig_index.get(&num).unwrap().borrow().anchor) { Some(component) } + if component.iter().all(|&num| graph.unitig_index.get(&num).unwrap().borrow() + .unitig_type != UnitigType::Anchor) { Some(component) } else { None } }) .flat_map(|component| component.into_iter()) .collect(); diff --git a/src/table.rs b/src/table.rs index 1be22dd..bd25ca4 100644 --- a/src/table.rs +++ b/src/table.rs @@ -132,7 +132,7 @@ fn visit_dirs_for_yaml_files(dir: &Path, yaml_files: &mut Vec) { let path = entry.path(); if path.is_dir() { visit_dirs_for_yaml_files(&path, yaml_files); - } else if path.extension().map_or(false, |ext| ext == "yaml") { + } else if path.extension().is_some_and(|ext| ext == "yaml") { yaml_files.push(path); } } @@ -144,7 +144,7 @@ fn get_one_copy_yaml(yaml_files: &[PathBuf], filename: &str) -> Option // Returns the YAML file in the given path with a matching filename. No match is okay and one // match is okay, but multiple matches will result in an error. let found_files = yaml_files.iter() - .filter(|path| path.file_name().map_or(false, |name| name == filename)).collect::>(); + .filter(|path| path.file_name().is_some_and(|name| name == filename)).collect::>(); match found_files.len() { 0 => None, 1 => Some(found_files[0].clone()), @@ -157,7 +157,7 @@ fn get_multi_copy_yaml(yaml_files: &[PathBuf], filename: &str) -> Vec { // Returns all YAML files in the given path with a matching filename, excluding those that are // in a qc_fail directory. yaml_files.iter().filter(|path| { - path.file_name().map_or(false, |name| name == filename) && + path.file_name().is_some_and(|name| name == filename) && !path.to_string_lossy().contains("/qc_fail/") }).cloned().collect() } diff --git a/src/tests.rs b/src/tests.rs index 3489f55..21c70c7 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -104,12 +104,12 @@ fn test_high_level(seq_a: &str, seq_b: &str, seq_c: &str, seq_d: &str, seq_e: &s let mut unitig_graph = UnitigGraph::from_kmer_graph(&kmer_graph); simplify_structure(&mut unitig_graph, &sequences); let gfa_1 = graph_dir.path().join("graph_1.gfa"); - unitig_graph.save_gfa(&gfa_1, &sequences).unwrap(); + unitig_graph.save_gfa(&gfa_1, &sequences, false).unwrap(); // Load the unitig graph from file, save it back to file and ensure the files are the same. let gfa_2 = graph_dir.path().join("graph_2.gfa"); let (unitig_graph, sequences) = UnitigGraph::from_gfa_file(&gfa_1); - unitig_graph.save_gfa(&gfa_2, &sequences).unwrap(); + unitig_graph.save_gfa(&gfa_2, &sequences, false).unwrap(); assert_same_content(&gfa_1, &gfa_2); // Reconstruct the sequences from the unitig graph. diff --git a/src/trim.rs b/src/trim.rs index 4a23ad3..0a78fcc 100644 --- a/src/trim.rs +++ b/src/trim.rs @@ -47,7 +47,7 @@ pub fn trim(cluster_dir: PathBuf, min_identity: f64, max_unitigs: usize, mad: f6 let sequences = choose_trim_type(start_end_results, hairpin_results, &mut graph, &sequences); let sequences = exclude_outliers_in_length(&mut graph, &sequences, mad); clean_up_graph(&mut graph, &sequences); - graph.save_gfa(&trimmed_gfa, &sequences).unwrap(); + graph.save_gfa(&trimmed_gfa, &sequences, false).unwrap(); save_metrics(&trimmed_yaml, &sequences); finished_message(&trimmed_gfa); } diff --git a/src/unitig.rs b/src/unitig.rs index d20fd4a..13d4db5 100644 --- a/src/unitig.rs +++ b/src/unitig.rs @@ -24,6 +24,7 @@ use crate::position::Position; static ANCHOR_COLOUR: &str = "forestgreen"; static BRIDGE_COLOUR: &str = "pink"; static CONSENTIG_COLOUR: &str = "steelblue"; +static OTHER_COLOUR: &str = "orangered"; #[derive(Clone, Default)] @@ -34,12 +35,7 @@ pub struct Unitig { pub forward_seq: Vec, pub reverse_seq: Vec, pub depth: f64, - - // TODO: I might want to drop anchor and bridge and instead create a unitig-type enum that - // can cover all options: anchor, bridge, consentig, etc. - pub anchor: bool, - pub bridge: bool, - + pub unitig_type: UnitigType, // anchor, bridge, consentig or other pub forward_positions: Vec, pub reverse_positions: Vec, pub forward_next: Vec, @@ -79,12 +75,17 @@ impl Unitig { quit_with_error("Could not find a depth tag (e.g. DP:f:10.00) in the GFA segment \ line.\nAre you sure this is an Autocycler-generated GFA file?"); }); - let anchor = parts.iter().any(|p| *p == format!("CL:z:{}", ANCHOR_COLOUR)) || - parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR)); - let bridge = parts.iter().any(|p| *p == format!("CL:z:{}", BRIDGE_COLOUR)) || - parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR)); + let unitig_type = if parts.iter().any(|p| *p == format!("CL:z:{}", CONSENTIG_COLOUR)) { + UnitigType::Consentig + } else if parts.iter().any(|p| *p == format!("CL:z:{}", ANCHOR_COLOUR)) { + UnitigType::Anchor + } else if parts.iter().any(|p| *p == format!("CL:z:{}", BRIDGE_COLOUR)) { + UnitigType::Bridge + } else { + UnitigType::Other + }; Unitig { - number, forward_seq, reverse_seq, depth, anchor, bridge, + number, forward_seq, reverse_seq, depth, unitig_type, ..Default::default() } } @@ -93,7 +94,7 @@ impl Unitig { // This constructor is for manually building a Unitig object when creating bridges. let reverse_seq = reverse_complement(&forward_seq); Unitig { - number, forward_seq, reverse_seq, depth, bridge: true, + number, forward_seq, reverse_seq, depth, unitig_type: UnitigType::Bridge, ..Default::default() } } @@ -163,16 +164,20 @@ impl Unitig { assert!(!self.forward_seq.is_empty()); } - pub fn gfa_segment_line(&self) -> String { + pub fn gfa_segment_line(&self, use_other_colour: bool) -> String { let seq_str = String::from_utf8_lossy(&self.forward_seq); - format!("S\t{}\t{}\tDP:f:{:.2}{}", self.number, seq_str, self.depth, self.colour_tag()) + format!("S\t{}\t{}\tDP:f:{:.2}{}", self.number, seq_str, self.depth, + self.colour_tag(use_other_colour)) } - pub fn colour_tag(&self) -> String { - if self.is_consentig() { format!("\tCL:z:{}", CONSENTIG_COLOUR) } - else if self.anchor { format!("\tCL:z:{}", ANCHOR_COLOUR) } - else if self.bridge { format!("\tCL:z:{}", BRIDGE_COLOUR) } - else { String::new() } + pub fn colour_tag(&self, use_other_colour: bool) -> String { + match self.unitig_type { + UnitigType::Consentig => format!("\tCL:z:{}", CONSENTIG_COLOUR), + UnitigType::Anchor => format!("\tCL:z:{}", ANCHOR_COLOUR), + UnitigType::Bridge => format!("\tCL:z:{}", BRIDGE_COLOUR), + UnitigType::Other => { if use_other_colour { format!("\tCL:z:{}", OTHER_COLOUR) } + else { String::new() } } + } } pub fn length(&self) -> u32 { @@ -277,16 +282,6 @@ impl Unitig { next.number() == self.number && next.strand && prev.number() == self.number && prev.strand } - fn is_consentig(&self) -> bool { - // A unitig is labelled as consentig by having both the anchor and bridge flags set. - self.anchor && self.bridge - } - - pub fn set_as_consentig(&mut self) { - self.anchor = true; - self.bridge = true; - } - pub fn clear_all_links(&mut self) { self.forward_next.clear(); self.forward_prev.clear(); @@ -353,8 +348,12 @@ impl UnitigStrand { self.unitig.borrow().get_seq(self.strand) } - pub fn anchor(&self) -> bool { - self.unitig.borrow().anchor + pub fn is_anchor(&self) -> bool { + self.unitig.borrow().unitig_type == UnitigType::Anchor + } + + pub fn is_consentig(&self) -> bool { + self.unitig.borrow().unitig_type == UnitigType::Consentig } } @@ -369,6 +368,16 @@ impl fmt::Debug for UnitigStrand { } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] +pub enum UnitigType { + Anchor, + Bridge, + Consentig, + #[default] + Other, +} + + #[cfg(test)] mod tests { use super::*; diff --git a/src/unitig_graph.rs b/src/unitig_graph.rs index 2b0a39a..446a68d 100644 --- a/src/unitig_graph.rs +++ b/src/unitig_graph.rs @@ -314,11 +314,12 @@ impl UnitigGraph { self.build_unitig_index(); } - pub fn save_gfa(&self, gfa_filename: &Path, sequences: &Vec) -> io::Result<()> { + pub fn save_gfa(&self, gfa_filename: &Path, sequences: &Vec, + use_other_colour: bool) -> io::Result<()> { let mut file = File::create(gfa_filename)?; writeln!(file, "H\tVN:Z:1.0\tKM:i:{}", self.k_size)?; for unitig in &self.unitigs { - writeln!(file, "{}", unitig.borrow().gfa_segment_line())?; + writeln!(file, "{}", unitig.borrow().gfa_segment_line(use_other_colour))?; } for (a, a_strand, b, b_strand) in self.get_links_for_gfa(0) { writeln!(file, "L\t{}\t{}\t{}\t{}\t0M", a, a_strand, b, b_strand)?;