Skip to content

Commit

Permalink
feat: update tx support to latest cdot with MANE label transfer (#245) (
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Nov 9, 2023
1 parent 5f43d65 commit b69e80d
Show file tree
Hide file tree
Showing 86 changed files with 3,053 additions and 1,279 deletions.
367 changes: 261 additions & 106 deletions Cargo.lock

Large diffs are not rendered by default.

25 changes: 14 additions & 11 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,17 +33,18 @@ async-compression = { version = "0.4", features = ["tokio", "gzip"] }
bgzip = "0.3"
bio = "1.3"
biocommons-bioutils = "0.1.4"
byte-unit = "4.0"
byteorder = "1.4"
byte-unit = "4.0"
chrono = "0.4"
clap = { version = "4.4", features = ["derive"] }
clap-verbosity-flag = "2.0"
clap = { version = "4.4", features = ["derive"] }
csv = "1.3"
derivative = "2.2"
derive_builder = { version = "0.12", features = ["clippy"] }
env_logger = "0.10"
flate2 = "1.0"
futures = "0.3.29"
hgvs = "0.12"
futures = "0.3"
hgvs = "0.13"
indexmap = { version = "2.1", features = ["serde"] }
indicatif = "0.17"
jsonl = "4.0"
Expand All @@ -52,10 +53,10 @@ log = "0.4"
nom = "7.1"
noodles-bgzf = { version = "0.25", features = ["async"] }
noodles-core = "0.12"
noodles-csi = "0.25"
noodles-csi = "0.26"
noodles-fasta = "0.30"
noodles-tabix = "0.31"
noodles-vcf = { version = "0.43", features = ["async"] }
noodles-tabix = "0.32"
noodles-vcf = { version = "0.45", features = ["async"] }
parse-display = "0.8"
procfs = "0.16"
prost = "0.12"
Expand All @@ -64,16 +65,17 @@ rand = "0.8"
rand_core = "0.6"
rocksdb = { version = "0.21", features = ["multi-threaded-cf"] }
rustc-hash = "1.1"
seqrepo = "0.8"
serde = { version = "1.0", features = ["derive"] }
seqrepo = "0.9"
serde_json = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_with = { version = "3.3", features = ["indexmap_2"] }
serde_yaml = "0.9"
strum = { version = "0.25", features = ["derive"] }
tempfile = "3"
thousands = "0.2"
tokio = { version = "1.33", features = ["full"] }
tracing = { version = "0.1", features = ["log"] }
tracing-subscriber = "0.3"
tracing = { version = "0.1", features = ["log"] }
uuid = { version = "1.4", features = ["fast-rng", "serde"] }
zstd = "0.13"

Expand All @@ -83,8 +85,9 @@ prost-build = "0.12"
[dev-dependencies]
async-std = { version = "1.12", features = ["attributes"] }
csv = "1.3"
hxdmp = "0.2.1"
hxdmp = "0.2"
insta = { version = "1.34", features = ["yaml"] }
pretty_assertions = "1.4"
rstest = "0.18"
temp_testdir = "0.2"
tracing-test = "0.2"
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,9 @@ cargo run --release -- \
-v \
db create txs \
--path-out /tmp/txs-out.bin.zst \
--path-cdot-json ../cdot-0.2.12.ensembl.grch37_grch38.json.gz \
--path-cdot-json ../cdot-0.2.12.refseq.grch37_grch38.json.gz \
--path-lable-tsv PATH_TO_MANE_LABEL.tsv \
--path-cdot-json ../cdot-0.2.21.ensembl.grch37_grch38.json.gz \
--path-cdot-json ../cdot-0.2.21.refseq.grch37_grch38.json.gz \
--path-seqrepo-instance ../hgvs-rs-data/seqrepo-data/master/master
```

Expand Down
12 changes: 10 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
// The custom build script, needed as we use prost.

fn main() {
println!("cargo:rerun-if-changed=src/db/create/txs/data.proto3");
prost_build::compile_protos(&["src/db/create/txs/data.proto3"], &["src/"]).unwrap();
println!("cargo:rerun-if-changed=src/proto/data.proto3");
prost_build::Config::new()
.protoc_arg("-Isrc/proto")
// Add serde serialization and deserialization to the generated code.
.type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]")
// Skip serializing `None` values.
.type_attribute(".", "#[serde_with::skip_serializing_none]")
// Define the protobuf files to compile.
.compile_protos(&["src/proto/data.proto3"], &["src/"])
.unwrap();
}
4 changes: 2 additions & 2 deletions docs/db_build.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ $ mehari db create txs \
\
--path-seqrepo-instance path/to/seqrepo-data/master \
\
--path-cdot-json cdot-0.2.12.refseq.grch37_grch38.json \
--path-cdot-json cdot-0.2.12.ensembl.grch37_grch38.json \
--path-cdot-json cdot-0.2.21.refseq.grch37_grch38.json \
--path-cdot-json cdot-0.2.21.ensembl.grch37_grch38.json \
\
--path-seqrepo-instance path/to/seqrepo-data/master \
\
Expand Down
35 changes: 26 additions & 9 deletions src/annotate/seqvars/ann.rs
Original file line number Diff line number Diff line change
Expand Up @@ -361,16 +361,19 @@ impl FromStr for FeatureType {
strum::EnumIter,
)]
pub enum FeatureBiotype {
/// Is coding transcript.
Coding,
/// Is non-coding transcript.
Noncoding,
/// Is in MANE Select set.
ManeSelect,
/// Is in MANE Plus Clinical set.
ManePlusClinical,
}

impl FeatureBiotype {
pub fn is_coding(&self) -> bool {
match self {
FeatureBiotype::Coding => true,
FeatureBiotype::Noncoding => false,
}
matches!(self, FeatureBiotype::Coding)
}
}

Expand Down Expand Up @@ -509,7 +512,7 @@ pub struct AnnField {
/// The feature identifier.
pub feature_id: String,
/// The feature biotype.
pub feature_biotype: FeatureBiotype,
pub feature_biotype: Vec<FeatureBiotype>,
/// The exon / intron rank.
pub rank: Option<Rank>,
/// HGVS c. notation.
Expand Down Expand Up @@ -542,7 +545,7 @@ impl Default for AnnField {
term: SoFeature::Transcript,
},
feature_id: Default::default(),
feature_biotype: FeatureBiotype::Coding,
feature_biotype: vec![FeatureBiotype::Coding],
rank: Default::default(),
hgvs_t: Default::default(),
hgvs_p: Default::default(),
Expand Down Expand Up @@ -572,7 +575,13 @@ impl FromStr for AnnField {
let gene_id = fields.next().unwrap().to_string();
let feature_type = fields.next().unwrap().parse()?;
let feature_id = fields.next().unwrap().to_string();
let feature_biotype = fields.next().unwrap().parse()?;
let feature_biotype = fields
.next()
.unwrap()
.split('&')
.map(|s| s.parse())
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("could not parse feature biotype: {}", e))?;
let rank = fields.next().unwrap();
let rank = if rank.is_empty() {
None
Expand Down Expand Up @@ -669,7 +678,15 @@ impl std::fmt::Display for AnnField {
write!(f, "|")?;
write!(f, "{}", self.feature_id)?;
write!(f, "|")?;
write!(f, "{}", self.feature_biotype)?;
write!(
f,
"{}",
self.feature_biotype
.iter()
.map(|t| format!("{}", t))
.collect::<Vec<_>>()
.join("&")
)?;
write!(f, "|")?;
if let Some(rank) = &self.rank {
write!(f, "{}", rank)?;
Expand Down Expand Up @@ -1082,7 +1099,7 @@ mod test {
term: SoFeature::Transcript,
},
feature_id: String::from("feature_id"),
feature_biotype: FeatureBiotype::Coding,
feature_biotype: vec![FeatureBiotype::Coding],
rank: Some(Rank { ord: 1, total: 2 }),
hgvs_t: Some(String::from("HGVS.c")),
hgvs_p: Some(String::from("HGVS.p")),
Expand Down
Loading

0 comments on commit b69e80d

Please sign in to comment.