Skip to content

Commit

Permalink
Simplify struct initialisation
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Feb 7, 2023
1 parent fe63e22 commit 1c200e8
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 50 deletions.
9 changes: 3 additions & 6 deletions src/merge_ska_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ impl MergeSkaArray {

/// Convert a dynamic [`MergeSkaDict`] to static array representation.
pub fn new(dynamic: &MergeSkaDict) -> Self {
let k = dynamic.kmer_len();
let rc = dynamic.rc();
let names = dynamic.names().clone();
let mut variants = Array2::zeros((0, dynamic.nsamples()));
let mut split_kmers: Vec<u64> = Vec::new();
split_kmers.reserve(dynamic.ksize());
Expand All @@ -116,9 +113,9 @@ impl MergeSkaArray {
}
variants.mapv_inplace(|b| u8::max(b, b'-')); // turns zeros to missing
Self {
k,
rc,
names,
k: dynamic.kmer_len(),
rc: dynamic.rc(),
names: dynamic.names().clone(),
split_kmers,
variants,
variant_count,
Expand Down
6 changes: 2 additions & 4 deletions src/merge_ska_dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,12 @@ impl MergeSkaDict {
/// Create an empty merged dictionary, to be used with [`MergeSkaDict::merge()`]
/// or [`MergeSkaDict::append()`].
pub fn new(k: usize, n_samples: usize, rc: bool) -> Self {
let names = vec!["".to_string(); n_samples];
let split_kmers = HashMap::default();
Self {
k,
rc,
n_samples,
names,
split_kmers,
names: vec!["".to_string(); n_samples],
split_kmers: HashMap::default(),
}
}

Expand Down
11 changes: 4 additions & 7 deletions src/ska_dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,14 @@ impl SkaDict {
if !(5..=31).contains(&k) || k % 2 == 0 {
panic!("Invalid k-mer length");
}
// Default/empty structs
let name = name.to_string();
let split_kmers: HashMap<u64, u8> = HashMap::default();
let cm_filter = CountMin::empty(CM_WIDTH, CM_HEIGHT, min_count);

let mut sk_dict = Self {
k,
rc,
sample_idx,
name,
split_kmers,
cm_filter,
name: name.to_string(),
split_kmers: HashMap::default(),
cm_filter: CountMin::empty(CM_WIDTH, CM_HEIGHT, min_count),
};

// Check if we're working with reads, and initalise the CM filter if so
Expand Down
8 changes: 2 additions & 6 deletions src/ska_dict/count_min_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,13 @@ impl CountMin {
let width_shift = u64::BITS - width_bits as u32;
let mask = (width as u64 - 1) << width_shift;

// Reserve for these gets call by the vec! macro used in init
let hash_factory = Vec::new();
let counts = Vec::new();

Self {
width,
width_shift,
height,
hash_factory,
hash_factory: Vec::new(),
mask,
counts,
counts: Vec::new(),
min_count,
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/ska_dict/split_kmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl<'a> SplitKmer<'a> {
rc: bool,
min_qual: u8,
) -> Option<Self> {
let (mut index, rc_upper, rc_lower, rc_middle_base) = (0, 0, 0, 0);
let mut index = 0;
let first_kmer = Self::build(&seq, seq_len, qual, k, &mut index, min_qual);
if let Some((upper, lower, middle_base)) = first_kmer {
let (lower_mask, upper_mask) = generate_masks(k);
Expand All @@ -201,9 +201,9 @@ impl<'a> SplitKmer<'a> {
lower,
middle_base,
rc,
rc_upper,
rc_lower,
rc_middle_base,
rc_upper: 0,
rc_lower: 0,
rc_middle_base: 0,
index,
};
if rc {
Expand Down
9 changes: 3 additions & 6 deletions src/ska_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,17 +193,14 @@ impl RefSka {
panic!("{filename} has no valid sequence");
}

let mapped_variants = Array2::zeros((0, 0));
let mapped_pos = Vec::new();
let mapped_names = Vec::new();
Self {
k,
seq,
chrom_names,
split_kmer_pos,
mapped_pos,
mapped_variants,
mapped_names,
mapped_pos: Vec::new(),
mapped_variants: Array2::zeros((0, 0)),
mapped_names: Vec::new(),
}
}

Expand Down
41 changes: 24 additions & 17 deletions src/ska_ref/aln_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,43 @@
/// each match.
#[derive(Clone)]
pub struct AlnWriter<'a> {
/// Next position where it is valid to write a match.
next_pos: usize,
/// The current chromsome.
curr_chrom: usize,
/// The latest position where a base has been mapped (may not have been written).
last_mapped: usize,
/// The latest position in `seq_out` that has been written to.
last_written: usize,
/// An offset which is added to convert chromosome positions to concatenated position.
chrom_offset: usize,
/// The reference sequences.
ref_seq: &'a Vec<Vec<u8>>,
/// The output alignment.
seq_out: Vec<u8>,
/// The size of the flanking region for each split k-mer match.
half_split_len: usize,
/// Whether the finalise function has been run, filling to the end of the
/// final contig.
finalised: bool,
}

impl<'a> AlnWriter<'a> {
/// Create a new [`AlnWriter`] taking the reference sequence mapped against
/// and the k-mer size used for the mapping
pub fn new(ref_seq: &'a Vec<Vec<u8>>, k: usize) -> Self {
let (curr_chrom, last_mapped, last_written, chrom_offset) = (0, 0, 0, 0);
let finalised = false;
let total_size = ref_seq.iter().map(|x| x.len()).sum();
let seq_out = vec![b'-'; total_size];
let half_split_len = (k - 1) / 2;
let next_pos = half_split_len;
Self {
next_pos,
curr_chrom,
last_mapped,
last_written,
chrom_offset,
next_pos: half_split_len,
curr_chrom: 0,
last_mapped: 0,
last_written: 0,
chrom_offset: 0,
ref_seq,
seq_out,
seq_out: vec![b'-'; total_size],
half_split_len,
finalised,
finalised: false,
}
}

Expand Down Expand Up @@ -129,16 +135,17 @@ impl<'a> AlnWriter<'a> {
/// Fills to the end of the final contig
/// Should only be called after the last call to [`AlnWriter::write_split_kmer()`].
pub fn finalise(&mut self) {
self.fill_contig();
self.finalised = true;
if !self.finalised {
while self.curr_chrom < self.ref_seq.len() {
self.fill_contig();
}
self.finalised = true;
}
}

/// Retrieve the written sequence. Calls [`AlnWriter::finalise()`] if not already called.
pub fn get_seq(&'a mut self) -> &'a [u8] {
if !self.finalised {
self.fill_contig();
self.finalised = true;
}
self.finalise();
self.seq_out.as_slice()
}
}
4 changes: 4 additions & 0 deletions src/ska_ref/idx_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
/// Builds the contig change indices and keeps track of the current chromosome.
#[derive(Debug)]
pub struct IdxCheck {
/// The end coordinates (last base) of each chromosome
end_coor: Vec<usize>,
}

Expand Down Expand Up @@ -38,8 +39,11 @@ impl IdxCheck {
///
/// Iterator as separate class so [`IdxCheck`] not modified during iteration
pub struct IdxCheckIter<'a> {
/// Ref to end coordinates
end_coor: &'a Vec<usize>,
/// Current chromosome
current_chr: usize,
/// Current absolute position
idx: usize,
}

Expand Down

0 comments on commit 1c200e8

Please sign in to comment.