Skip to content

Commit

Permalink
Add test and fix for fastq map
Browse files Browse the repository at this point in the history
  • Loading branch information
johnlees committed Jan 22, 2023
1 parent 2fd3318 commit d91f624
Show file tree
Hide file tree
Showing 2 changed files with 90 additions and 6 deletions.
19 changes: 13 additions & 6 deletions src/ska_ref.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,17 +466,24 @@ impl RefSka {
);
seq.extend(vec![
b'-';
self.seq[curr_chrom].len() - (next_pos + half_split_len)
self.seq[curr_chrom].len() - (next_pos + half_split_len) // This shouldn't overflow
]);
curr_chrom += 1;
next_pos = 0;
}
if *map_pos > next_pos {
// Missing bases, if no k-mers mapped over a region
seq.extend(vec![b'-'; *map_pos - next_pos - half_split_len]);
seq.extend_from_slice(
&self.seq[curr_chrom][(*map_pos - half_split_len)..*map_pos],
);
if *map_pos > next_pos + half_split_len {
// Missing bases, then first flanking half of split k-mer
// if no k-mers mapped over a large region
seq.extend(vec![b'-'; *map_pos - next_pos - half_split_len]);
seq.extend_from_slice(
&self.seq[curr_chrom][(*map_pos - half_split_len)..*map_pos],
);
} else {
// Short missing region, which can happen with fastq input
// Reasonable to take bases from match
seq.extend_from_slice(&self.seq[curr_chrom][next_pos..*map_pos]);
}
}
next_pos = *map_pos + 1;
if *base == b'-' {
Expand Down
77 changes: 77 additions & 0 deletions tests/fastq_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,83 @@ fn align_fastq() {
assert_eq!(var_hash(&fastq_align_out), var_hash(&fasta_align_out));
}

// Uses perfect reads with the same fasta input as merge.skf
#[test]
fn map_fastq() {
let sandbox = TestSetup::setup();
let rfile_name = sandbox.create_rfile(&"test", true);

Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("build")
.arg("-f")
.arg(rfile_name)
.arg("-o")
.arg("reads")
.args(&["--min-count", "1", "-v", "-k", "9", "--min-qual", "2"])
.assert()
.success();

let fastq_map_out = Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("map")
.arg(sandbox.file_string("test_ref.fa", TestDir::Input))
.arg("reads.skf")
.output()
.unwrap()
.stdout;

Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("build")
.arg(sandbox.file_string("test_1.fa", TestDir::Input))
.arg(sandbox.file_string("test_2.fa", TestDir::Input))
.arg("-o")
.arg("assemblies")
.args(&["-v", "-k", "9"])
.assert()
.success();

let fasta_map_out = Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("map")
.arg(sandbox.file_string("test_ref.fa", TestDir::Input))
.arg("assemblies.skf")
.output()
.unwrap()
.stdout;

assert_eq!(
String::from_utf8(fastq_map_out),
String::from_utf8(fasta_map_out)
);

let fastq_map_out_vcf = Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("map")
.arg(sandbox.file_string("test_ref.fa", TestDir::Input))
.arg("reads.skf")
.args(&["-f", "vcf"])
.output()
.unwrap()
.stdout;

let fasta_map_out_vcf = Command::new(cargo_bin("ska"))
.current_dir(sandbox.get_wd())
.arg("map")
.arg(sandbox.file_string("test_ref.fa", TestDir::Input))
.arg("assemblies.skf")
.args(&["-f", "vcf"])
.output()
.unwrap()
.stdout;

assert_eq!(
String::from_utf8(fastq_map_out_vcf),
String::from_utf8(fasta_map_out_vcf)
);
}

// Add errors and low quality scores to split k-mers around
// CACT TTAA C,T
// zgrep 'TTAA.AGTG' test_1_fwd.fastq.gz
Expand Down

0 comments on commit d91f624

Please sign in to comment.