Skip to content

Commit

Permalink
Feat: adding code outline for footprinting tool
Browse files Browse the repository at this point in the history
  • Loading branch information
mrvollger committed Dec 20, 2023
1 parent a7f24d8 commit c733189
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ regex = "1.9.1"
rust-htslib = "0.43"
serde = {version = "1.0.104", features = ["derive"], optional = false}
serde_json = {version = "1.0.48", optional = false}
serde_yaml = "0.9"
spin = "0.9.8"
tch = {version = "0.14.0", optional = true}
tempfile = "3.3.0"
Expand Down
14 changes: 14 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ pub enum Commands {
AddNucleosomes(AddNucleosomeOptions),
/// Add FIREs (Fiber-seq Inferred Regulatory Elements) to a bam file with m6a predictions
Fire(FireOptions),
/// Add footprints to a bam file with m6a predictions
Footprint(FootprintOptions),
/// Extract fiberseq data into plain text files.
/// See https://fiberseq.github.io/fibertools-rs/docs/extract.html for a description of the outputs.
#[clap(visible_aliases = &["ex", "e"])]
Expand Down Expand Up @@ -361,3 +363,15 @@ pub struct DecoratorOptions {
#[clap(short, long, default_value = MIN_ML_SCORE)]
pub min_ml_score: u8,
}

#[derive(Args, Debug, PartialEq, Eq)]
pub struct FootprintOptions {
/// Indexed and aligned bam file with m6A and MSP calls
pub bam: String,
/// BED file with the regions to footprint. Should all contain the same motif with proper strand information, and ideally be ChIP-seq peaks.
pub bed: String,
/// yaml describing the modules of the footprint
pub yaml: String,
/// Output bam
pub out: String,
}
57 changes: 57 additions & 0 deletions src/footprint.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
use super::bam_writer;
use super::cli::FootprintOptions;
use super::fiber::*;
use anyhow;
use serde::{Deserialize, Serialize};
use serde_yaml;

#[derive(Serialize, Deserialize, PartialEq, Debug)]
pub struct FootprintYaml {
modules: Vec<(i64, i64)>,
}

impl FootprintYaml {
pub fn check_for_valid_input(&self) -> Result<(), anyhow::Error> {
if self.modules.is_empty() || self.modules.len() > 8 {
return Err(anyhow::anyhow!(
"YAML SPECIFICATION ERROR: Number of modules must be between 1 and 8: {self:?}"
));
}
let mut last_end = 0;
for (start, end) in &self.modules {
if *start < last_end {
return Err(anyhow::anyhow!(
"YAML SPECIFICATION ERROR: modules are not sorted: {self:?}"
));
}
if *start != last_end {
return Err(anyhow::anyhow!(
"YAML SPECIFICATION ERROR: modules are not contiguous or do not start at 0: {self:?}"
));
}
if *start > *end {
return Err(anyhow::anyhow!(
"YAML SPECIFICATION ERROR: start > end: {self:?}"
));
}
last_end = *end;
}
Ok(())
}
}

pub fn start_finding_footprints(opts: &FootprintOptions) -> Result<(), anyhow::Error> {
let yaml_buff = bio_io::buffer_from(&opts.yaml)?;
let yaml: FootprintYaml = serde_yaml::from_reader(yaml_buff)?;
yaml.check_for_valid_input()?;
log::debug!("YAML: {:?}", yaml);

let mut bam = bio_io::bam_reader(&opts.bam, 8);
let mut out = bam_writer(&opts.out, &bam, 8);
for mut rec in FiberseqRecords::new(&mut bam, 0) {
// TODO
rec.m6a.starts = vec![];
out.write(&rec.record)?;
}
Ok(())
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub mod fire;
/// add decorators
pub mod decorator;

pub mod footprint;

use anyhow::Result;
use bio_io::*;
#[cfg(feature = "predict")]
Expand Down
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ pub fn main() -> Result<(), Error> {
Some(Commands::Fire(fire_opts)) => {
fibertools_rs::fire::add_fire_to_bam(fire_opts)?;
}
Some(Commands::Footprint(footprint_opts)) => {
fibertools_rs::footprint::start_finding_footprints(footprint_opts)?;
}
Some(Commands::TrackDecorators(decorator_opts)) => {
fibertools_rs::decorator::get_decorators_from_bam(decorator_opts)?;
}
Expand Down
10 changes: 10 additions & 0 deletions tests/data/ctcf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# modules of CTCF footprinting
# Will report using a bit flag if the module does not contain an m6A (is footprinted, 1) or does contain an m6A (is not footprinted, 0) for each module.
# modules must start at zero, be sorted, and be contiguous with one another.
# intervals are 0-based, half-open (like BED)
modules:
- [0, 8]
- [8, 16]
- [16, 23]
- [23, 29]
- [29, 34]

0 comments on commit c733189

Please sign in to comment.