Skip to content

Commit

Permalink
feat: allow user to select transcript source on CLI (#247)
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe committed Nov 9, 2023
1 parent b69e80d commit 38ce0e1
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 10 deletions.
49 changes: 43 additions & 6 deletions src/annotate/seqvars/csq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,35 @@ pub struct VcfVariant {
pub alternative: String,
}

/// Enum that allows to select the transcript source.
#[derive(
Debug,
Clone,
Copy,
PartialEq,
Eq,
Default,
serde::Deserialize,
serde::Serialize,
clap::ValueEnum,
)]
pub enum TranscriptSource {
/// ENSEMBL
Ensembl,
/// RefSeq
RefSeq,
/// Both
#[default]
Both,
}

/// Configuration for consequence prediction.
#[derive(Debug, Clone, derive_builder::Builder)]
#[builder(pattern = "immutable")]
pub struct Config {
/// The transcript source to use.
#[builder(default = "TranscriptSource::Both")]
pub transcript_source: TranscriptSource,
/// Whether to report consequences for all picked transcripts.
#[builder(default = "true")]
pub report_all_transcripts: bool,
Expand Down Expand Up @@ -152,11 +177,10 @@ impl ConsequencePredictor {
self.provider
.get_tx_for_region(chrom_acc, ALT_ALN_METHOD, qry_start, qry_end)?;
txs.sort_by(|a, b| a.tx_ac.cmp(&b.tx_ac));
// Filter transcripts to the picked ones.
tracing::info!(" txs = {:#?}", &txs);
self.filter_picked_txs(txs)
// Filter transcripts to the picked ones from the selected
// transcript source.
self.filter_picked_sourced_txs(txs)
};
tracing::info!(" txs = {:#?}", &txs);

// Compute annotations for all (picked) transcripts first, skipping `None`` results.
let anns_all_txs = txs
Expand All @@ -173,8 +197,21 @@ impl ConsequencePredictor {
Ok(Some(self.filter_ann_fields(anns_all_txs)))
}

// Filter transcripts to the picked ones.
fn filter_picked_txs(&self, txs: Vec<TxForRegionRecord>) -> Vec<TxForRegionRecord> {
// Filter transcripts to the picked ones from the selected transcript source.
fn filter_picked_sourced_txs(&self, txs: Vec<TxForRegionRecord>) -> Vec<TxForRegionRecord> {
fn is_ensembl(tx: &TxForRegionRecord) -> bool {
tx.tx_ac.starts_with("ENST")
}

let txs = match self.config.transcript_source {
TranscriptSource::Ensembl => txs.into_iter().filter(is_ensembl).collect::<Vec<_>>(),
TranscriptSource::RefSeq => txs
.into_iter()
.filter(|tx| !is_ensembl(tx))
.collect::<Vec<_>>(),
TranscriptSource::Both => txs,
};

// Short-circuit if transcript picking has been disabled.
if !self.provider.transcript_picking() {
return txs;
Expand Down
10 changes: 9 additions & 1 deletion src/annotate/seqvars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ pub struct Args {
#[command(flatten)]
pub output: PathOutput,

/// The transcript source.
#[arg(long, value_enum, default_value_t = csq::TranscriptSource::Both)]
pub transcript_source: csq::TranscriptSource,
/// Whether to report for all picked transcripts.
#[arg(long, default_value_t = true)]
pub report_all_transcripts: bool,
Expand Down Expand Up @@ -1480,6 +1483,7 @@ fn run_with_writer(writer: &mut dyn AnnotatedVcfWriter, args: &Args) -> Result<(
assembly,
ConsequencePredictorConfigBuilder::default()
.report_all_transcripts(args.report_all_transcripts)
.transcript_source(args.transcript_source)
.build()
.unwrap(),
);
Expand Down Expand Up @@ -1661,7 +1665,7 @@ mod test {

use super::binning::bin_from_range;

use super::{run, Args, PathOutput};
use super::{csq::TranscriptSource, run, Args, PathOutput};

#[test]
fn smoke_test_output_vcf() -> Result<(), anyhow::Error> {
Expand All @@ -1674,6 +1678,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: false,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from(
Expand Down Expand Up @@ -1711,6 +1716,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from(
Expand Down Expand Up @@ -1760,6 +1766,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from("tests/data/db/create/badly_formed_vcf_entry.vcf"),
Expand Down Expand Up @@ -1797,6 +1804,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from("tests/data/db/create/mitochondrial_variants.vcf"),
Expand Down
7 changes: 4 additions & 3 deletions src/annotate/seqvars/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ impl TxIntervalTrees {
#[derive(Debug, Clone, Default, derive_builder::Builder)]
#[builder(pattern = "immutable")]
pub struct Config {
/// * `transcript_picking` - Whether to use transcript picking. When
/// enabled, only use (a) ManeSelect+ManePlusClinical, (b) ManeSelect,
/// (c) longest transcript (the first available).
/// Whether to use transcript picking. When enabled, only use (a)
/// ManeSelect+ManePlusClinical, (b) ManeSelect, (c) longest transcript
/// (the first available).
#[builder(default = "false")]
pub transcript_picking: bool,
}

Expand Down

0 comments on commit 38ce0e1

Please sign in to comment.