Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow user to select transcript source on CLI (#247) #249

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions src/annotate/seqvars/csq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,35 @@ pub struct VcfVariant {
pub alternative: String,
}

/// Enum that allows to select the transcript source.
#[derive(
Debug,
Clone,
Copy,
PartialEq,
Eq,
Default,
serde::Deserialize,
serde::Serialize,
clap::ValueEnum,
)]
pub enum TranscriptSource {
/// ENSEMBL
Ensembl,
/// RefSeq
RefSeq,
/// Both
#[default]
Both,
}

/// Configuration for consequence prediction.
#[derive(Debug, Clone, derive_builder::Builder)]
#[builder(pattern = "immutable")]
pub struct Config {
/// The transcript source to use.
#[builder(default = "TranscriptSource::Both")]
pub transcript_source: TranscriptSource,
/// Whether to report consequences for all picked transcripts.
#[builder(default = "true")]
pub report_all_transcripts: bool,
Expand Down Expand Up @@ -152,11 +177,10 @@ impl ConsequencePredictor {
self.provider
.get_tx_for_region(chrom_acc, ALT_ALN_METHOD, qry_start, qry_end)?;
txs.sort_by(|a, b| a.tx_ac.cmp(&b.tx_ac));
// Filter transcripts to the picked ones.
tracing::info!(" txs = {:#?}", &txs);
self.filter_picked_txs(txs)
// Filter transcripts to the picked ones from the selected
// transcript source.
self.filter_picked_sourced_txs(txs)
};
tracing::info!(" txs = {:#?}", &txs);

// Compute annotations for all (picked) transcripts first, skipping `None`` results.
let anns_all_txs = txs
Expand All @@ -173,8 +197,21 @@ impl ConsequencePredictor {
Ok(Some(self.filter_ann_fields(anns_all_txs)))
}

// Filter transcripts to the picked ones.
fn filter_picked_txs(&self, txs: Vec<TxForRegionRecord>) -> Vec<TxForRegionRecord> {
// Filter transcripts to the picked ones from the selected transcript source.
fn filter_picked_sourced_txs(&self, txs: Vec<TxForRegionRecord>) -> Vec<TxForRegionRecord> {
fn is_ensembl(tx: &TxForRegionRecord) -> bool {
tx.tx_ac.starts_with("ENST")
}

let txs = match self.config.transcript_source {
TranscriptSource::Ensembl => txs.into_iter().filter(is_ensembl).collect::<Vec<_>>(),
TranscriptSource::RefSeq => txs
.into_iter()
.filter(|tx| !is_ensembl(tx))
.collect::<Vec<_>>(),
TranscriptSource::Both => txs,
};

// Short-circuit if transcript picking has been disabled.
if !self.provider.transcript_picking() {
return txs;
Expand Down
10 changes: 9 additions & 1 deletion src/annotate/seqvars/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ pub struct Args {
#[command(flatten)]
pub output: PathOutput,

/// The transcript source.
#[arg(long, value_enum, default_value_t = csq::TranscriptSource::Both)]
pub transcript_source: csq::TranscriptSource,
/// Whether to report for all picked transcripts.
#[arg(long, default_value_t = true)]
pub report_all_transcripts: bool,
Expand Down Expand Up @@ -1480,6 +1483,7 @@ fn run_with_writer(writer: &mut dyn AnnotatedVcfWriter, args: &Args) -> Result<(
assembly,
ConsequencePredictorConfigBuilder::default()
.report_all_transcripts(args.report_all_transcripts)
.transcript_source(args.transcript_source)
.build()
.unwrap(),
);
Expand Down Expand Up @@ -1661,7 +1665,7 @@ mod test {

use super::binning::bin_from_range;

use super::{run, Args, PathOutput};
use super::{csq::TranscriptSource, run, Args, PathOutput};

#[test]
fn smoke_test_output_vcf() -> Result<(), anyhow::Error> {
Expand All @@ -1674,6 +1678,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: false,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from(
Expand Down Expand Up @@ -1711,6 +1716,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from(
Expand Down Expand Up @@ -1760,6 +1766,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from("tests/data/db/create/badly_formed_vcf_entry.vcf"),
Expand Down Expand Up @@ -1797,6 +1804,7 @@ mod test {
let args = Args {
genome_release: None,
report_all_transcripts: true,
transcript_source: TranscriptSource::Both,
transcript_picking: false,
path_db: String::from("tests/data/annotate/db"),
path_input_vcf: String::from("tests/data/db/create/mitochondrial_variants.vcf"),
Expand Down
7 changes: 4 additions & 3 deletions src/annotate/seqvars/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,10 @@ impl TxIntervalTrees {
#[derive(Debug, Clone, Default, derive_builder::Builder)]
#[builder(pattern = "immutable")]
pub struct Config {
/// * `transcript_picking` - Whether to use transcript picking. When
/// enabled, only use (a) ManeSelect+ManePlusClinical, (b) ManeSelect,
/// (c) longest transcript (the first available).
/// Whether to use transcript picking. When enabled, only use (a)
/// ManeSelect+ManePlusClinical, (b) ManeSelect, (c) longest transcript
/// (the first available).
#[builder(default = "false")]
pub transcript_picking: bool,
}

Expand Down