-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #25 from cauliyang/dev
feat: Add BAM to fastq conversion functionality
- Loading branch information
Showing
17 changed files
with
248 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
use anyhow::Result; | ||
use noodles::{bam, bgzf}; | ||
use rayon::prelude::*; | ||
use std::{fs::File, num::NonZeroUsize, path::Path, thread}; | ||
|
||
use noodles::fastq; | ||
|
||
// FIXME: The function has a bug since seq != qual | ||
|
||
pub fn bam2fq(bam: &Path, threads: Option<usize>) -> Result<Vec<fastq::Record>> { | ||
let file = File::open(bam)?; | ||
let worker_count = if let Some(threads) = threads { | ||
NonZeroUsize::new(threads) | ||
.unwrap() | ||
.min(thread::available_parallelism().unwrap_or(NonZeroUsize::MIN)) | ||
} else { | ||
thread::available_parallelism().unwrap_or(NonZeroUsize::MIN) | ||
}; | ||
|
||
let decoder = bgzf::MultithreadedReader::with_worker_count(worker_count, file); | ||
let mut reader = bam::io::Reader::from(decoder); | ||
let _header = reader.read_header()?; | ||
|
||
reader | ||
.records() | ||
.par_bridge() | ||
.map(|result| { | ||
let record = result.unwrap(); | ||
|
||
let seq = record.sequence().as_ref().to_vec(); | ||
let qual = record.quality_scores().as_ref().to_vec(); | ||
|
||
if seq.len() != qual.len() { | ||
let name = String::from_utf8_lossy(record.name().unwrap().as_ref()).to_string(); | ||
return Err(anyhow::anyhow!( | ||
"{} seq and qual length are not equal", | ||
name | ||
)); | ||
} | ||
|
||
let fq_record = fastq::Record::new( | ||
fastq::record::Definition::new(record.name().unwrap().to_vec(), ""), | ||
seq, | ||
qual, | ||
); | ||
Ok(fq_record) | ||
}) | ||
.collect::<Result<Vec<fastq::Record>>>() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
pub mod chimeric; | ||
pub mod cigar; | ||
pub mod io; | ||
|
||
#[cfg(feature = "python")] | ||
pub mod python; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
use anyhow::Result; | ||
use clap::Parser; | ||
use deepbiop_bam as bam; | ||
use deepbiop_fq as fq; | ||
use noodles::fastq; | ||
|
||
use std::path::{Path, PathBuf}; | ||
|
||
use super::set_up_threads; | ||
|
||
#[derive(Debug, Parser)] | ||
pub struct BamToFq { | ||
/// path to the bam file | ||
#[arg(value_name = "bam", action=clap::ArgAction::Append)] | ||
bam: Vec<PathBuf>, | ||
|
||
/// threads number | ||
#[arg(short, long, default_value = "2")] | ||
threads: Option<usize>, | ||
|
||
/// output bgzip compressed fastq file | ||
#[arg(short, long, action=clap::ArgAction::SetTrue)] | ||
compressed: bool, | ||
} | ||
|
||
fn write_fq<P: AsRef<Path>>(data: &[fastq::Record], path: P) -> Result<()> { | ||
let file = std::fs::File::create(path.as_ref())?; | ||
let mut writer = fastq::io::Writer::new(file); | ||
for record in data { | ||
writer.write_record(record)?; | ||
} | ||
Ok(()) | ||
} | ||
|
||
impl BamToFq { | ||
pub fn run(&self) -> Result<()> { | ||
set_up_threads(self.threads)?; | ||
|
||
for bam in &self.bam { | ||
let fq_records = bam::io::bam2fq(bam, self.threads)?; | ||
|
||
if self.compressed { | ||
let file_path = bam.with_extension("fq.bgz"); | ||
fq::io::write_fq_parallel_for_noodle_record(&fq_records, file_path, self.threads)?; | ||
} else { | ||
let file_path = bam.with_extension("fq"); | ||
write_fq(&fq_records, file_path)?; | ||
} | ||
} | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
use anyhow::Result; | ||
use clap::Parser; | ||
|
||
use noodles::fasta; | ||
use noodles::fastq; | ||
use std::path::PathBuf; | ||
|
||
use super::set_up_threads; | ||
|
||
#[derive(Debug, Parser)] | ||
pub struct FaToFq { | ||
/// path to the fa file | ||
#[arg(value_name = "fa", action=clap::ArgAction::Append)] | ||
fa: Vec<PathBuf>, | ||
|
||
/// threads number | ||
#[arg(short, long, default_value = "2")] | ||
threads: Option<usize>, | ||
} | ||
|
||
impl FaToFq { | ||
pub fn run(&self) -> Result<()> { | ||
set_up_threads(self.threads)?; | ||
|
||
for fa in &self.fa { | ||
let fq_file_path = fa.with_extension("fa"); | ||
|
||
let mut reader = fasta::io::reader::Builder.build_from_path(fa)?; | ||
|
||
let fq_writer_handle = std::fs::File::create(fq_file_path)?; | ||
let mut fq_writer = fastq::io::Writer::new(fq_writer_handle); | ||
|
||
for record in reader.records() { | ||
let record = record?; | ||
let name = record.name(); | ||
let sequence = record.sequence().as_ref().to_vec(); | ||
let quality = vec![b'@'; sequence.len()]; | ||
let fq_record = fastq::Record::new( | ||
fastq::record::Definition::new(name.to_vec(), ""), | ||
sequence, | ||
quality, | ||
); | ||
fq_writer.write_record(&fq_record)?; | ||
} | ||
} | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
use anyhow::Result; | ||
use clap::Parser; | ||
use deepbiop_fq as fq; | ||
|
||
use noodles::fasta; | ||
use std::path::PathBuf; | ||
|
||
use super::set_up_threads; | ||
|
||
#[derive(Debug, Parser)] | ||
pub struct FqToFa { | ||
/// path to the fq file | ||
#[arg(value_name = "fq", action=clap::ArgAction::Append)] | ||
fq: Vec<PathBuf>, | ||
|
||
/// threads number | ||
#[arg(short, long, default_value = "2")] | ||
threads: Option<usize>, | ||
} | ||
|
||
impl FqToFa { | ||
pub fn run(&self) -> Result<()> { | ||
set_up_threads(self.threads)?; | ||
|
||
for fq in &self.fq { | ||
let fq_records = fq::io::fastq_to_fasta(fq)?; | ||
let fa_file_path = fq.with_extension("fa"); | ||
let fa_file_handler = std::fs::File::create(&fa_file_path)?; | ||
let mut fa_writer = fasta::io::Writer::new(fa_file_handler); | ||
for record in fq_records { | ||
fa_writer.write_record(&record)?; | ||
} | ||
} | ||
Ok(()) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters