Skip to content

Commit

Permalink
Merge pull request #2 from bacpop/map-test-fix
Browse files Browse the repository at this point in the history
Redo the alignment writer for ska map
  • Loading branch information
johnlees authored Feb 7, 2023
2 parents d91f624 + 1c200e8 commit 609327d
Show file tree
Hide file tree
Showing 29 changed files with 735 additions and 278 deletions.
8 changes: 4 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,15 +348,15 @@ pub fn main() {
let mut out_stream = set_ostream(output);
match format {
FileType::Aln => {
log::info!("Writing alignment");
log::info!("Generating alignment with {} threads", *threads);
ska_ref
.write_aln(&mut out_stream)
.write_aln(&mut out_stream, *threads)
.expect("Failed to write output alignment");
}
FileType::Vcf => {
log::info!("Writing VCF");
log::info!("Generating VCF with {} threads", *threads);
ska_ref
.write_vcf(&mut out_stream)
.write_vcf(&mut out_stream, *threads)
.expect("Failed to write output VCF");
}
}
Expand Down
17 changes: 8 additions & 9 deletions src/merge_ska_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ use crate::ska_ref::RefSka;
/// ```
/// use ska::merge_ska_array::MergeSkaArray;
/// use ska::io_utils::set_ostream;
/// use ska::ska_ref::RefSka;
///
/// // Load an array from file
/// let mut ska_array = MergeSkaArray::load(&"tests/test_files_in/merge.skf").expect("Could not open array");
Expand All @@ -53,6 +54,10 @@ use crate::ska_ref::RefSka;
///
/// // Delete a sample
/// ska_array.delete_samples(&[&"test_1"]);
///
/// // Delete k-mers
/// let ska_weed = RefSka::new(ska_array.kmer_len(), &"tests/test_files_in/weed.fa", ska_array.rc());
/// ska_array.weed(&ska_weed);
/// ```
#[derive(Serialize, Deserialize)]
pub struct MergeSkaArray {
Expand Down Expand Up @@ -97,9 +102,6 @@ impl MergeSkaArray {

/// Convert a dynamic [`MergeSkaDict`] to static array representation.
pub fn new(dynamic: &MergeSkaDict) -> Self {
let k = dynamic.kmer_len();
let rc = dynamic.rc();
let names = dynamic.names().clone();
let mut variants = Array2::zeros((0, dynamic.nsamples()));
let mut split_kmers: Vec<u64> = Vec::new();
split_kmers.reserve(dynamic.ksize());
Expand All @@ -111,9 +113,9 @@ impl MergeSkaArray {
}
variants.mapv_inplace(|b| u8::max(b, b'-')); // turns zeros to missing
Self {
k,
rc,
names,
k: dynamic.kmer_len(),
rc: dynamic.rc(),
names: dynamic.names().clone(),
split_kmers,
variants,
variant_count,
Expand Down Expand Up @@ -267,9 +269,6 @@ impl MergeSkaArray {
/// may be a multi-FASTA) generated with [`RefSka::new()`]
///
/// Used with `ska weed`.
///
/// # Examples
/// #TODO
pub fn weed(&mut self, weed_ref: &RefSka) {
let weed_kmers: HashSet<u64> = HashSet::from_iter(weed_ref.kmer_iter());

Expand Down
6 changes: 2 additions & 4 deletions src/merge_ska_dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,12 @@ impl MergeSkaDict {
/// Create an empty merged dictionary, to be used with [`MergeSkaDict::merge()`]
/// or [`MergeSkaDict::append()`].
pub fn new(k: usize, n_samples: usize, rc: bool) -> Self {
let names = vec!["".to_string(); n_samples];
let split_kmers = HashMap::default();
Self {
k,
rc,
n_samples,
names,
split_kmers,
names: vec!["".to_string(); n_samples],
split_kmers: HashMap::default(),
}
}

Expand Down
11 changes: 4 additions & 7 deletions src/ska_dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,14 @@ impl SkaDict {
if !(5..=31).contains(&k) || k % 2 == 0 {
panic!("Invalid k-mer length");
}
// Default/empty structs
let name = name.to_string();
let split_kmers: HashMap<u64, u8> = HashMap::default();
let cm_filter = CountMin::empty(CM_WIDTH, CM_HEIGHT, min_count);

let mut sk_dict = Self {
k,
rc,
sample_idx,
name,
split_kmers,
cm_filter,
name: name.to_string(),
split_kmers: HashMap::default(),
cm_filter: CountMin::empty(CM_WIDTH, CM_HEIGHT, min_count),
};

// Check if we're working with reads, and initalise the CM filter if so
Expand Down
4 changes: 4 additions & 0 deletions src/ska_dict/bit_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,13 +89,17 @@ pub fn decode_kmer(k: usize, kmer: u64, upper_mask: u64, lower_mask: u64) -> (St
/// e.g. on construction or after skipping an N.
#[inline(always)]
pub fn revcomp64_v2(mut res: u64, k_size: usize) -> u64 {
// This part reverses the bases by shuffling them using an on/off pattern
// of bits
res = (res >> 2 & 0x3333333333333333) | (res & 0x3333333333333333) << 2;
res = (res >> 4 & 0x0F0F0F0F0F0F0F0F) | (res & 0x0F0F0F0F0F0F0F0F) << 4;
res = (res >> 8 & 0x00FF00FF00FF00FF) | (res & 0x00FF00FF00FF00FF) << 8;
res = (res >> 16 & 0x0000FFFF0000FFFF) | (res & 0x0000FFFF0000FFFF) << 16;
res = (res >> 32 & 0x00000000FFFFFFFF) | (res & 0x00000000FFFFFFFF) << 32;
// This reverse complements
res ^= 0xAAAAAAAAAAAAAAAA;

// Shifts so LSB is at the bottom
res >> (2 * (32 - k_size))
}

Expand Down
18 changes: 10 additions & 8 deletions src/ska_dict/count_min_filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ use ahash::RandomState;
pub struct CountMin {
/// Table width (estimated number of unique k-mers)
width: usize,
/// Number of bits to shift masked value to get table entry
width_shift: u32,
/// Table height (number of hashes)
height: usize,
/// Hash generators
Expand All @@ -60,18 +62,17 @@ impl CountMin {
// Consistent with consts used, but ensures a power of two
let width_bits: usize = f64::floor(f64::log2(width as f64)) as usize;
let width = 1 << (width_bits + 1);
let mask = width as u64 - 1;

// Reserve for these gets call by the vec! macro used in init
let hash_factory = Vec::new();
let counts = Vec::new();
// Use MSB rather than LSB
let width_shift = u64::BITS - width_bits as u32;
let mask = (width as u64 - 1) << width_shift;

Self {
width,
width_shift,
height,
hash_factory,
hash_factory: Vec::new(),
mask,
counts,
counts: Vec::new(),
min_count,
}
}
Expand Down Expand Up @@ -102,7 +103,8 @@ impl CountMin {
let mut count = 0;
for hash_it in self.hash_factory.iter().enumerate() {
let (row_idx, hash) = hash_it;
let table_idx = row_idx * self.width + ((hash.hash_one(kmer) & self.mask) as usize);
let table_idx = row_idx * self.width
+ (((hash.hash_one(kmer) & self.mask) >> self.width_shift) as usize);
self.counts[table_idx] = self.counts[table_idx].saturating_add(1);
if row_idx == 0 {
count = self.counts[table_idx];
Expand Down
8 changes: 4 additions & 4 deletions src/ska_dict/split_kmer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ impl<'a> SplitKmer<'a> {
rc: bool,
min_qual: u8,
) -> Option<Self> {
let (mut index, rc_upper, rc_lower, rc_middle_base) = (0, 0, 0, 0);
let mut index = 0;
let first_kmer = Self::build(&seq, seq_len, qual, k, &mut index, min_qual);
if let Some((upper, lower, middle_base)) = first_kmer {
let (lower_mask, upper_mask) = generate_masks(k);
Expand All @@ -201,9 +201,9 @@ impl<'a> SplitKmer<'a> {
lower,
middle_base,
rc,
rc_upper,
rc_lower,
rc_middle_base,
rc_upper: 0,
rc_lower: 0,
rc_middle_base: 0,
index,
};
if rc {
Expand Down
Loading

0 comments on commit 609327d

Please sign in to comment.