Skip to content

Commit

Permalink
Merge pull request #36 from cauliyang/dev
Browse files Browse the repository at this point in the history
feat: Update dependencies versions and add new module
  • Loading branch information
cauliyang authored Jan 6, 2025
2 parents 0cd4013 + e3a4868 commit efff13f
Show file tree
Hide file tree
Showing 40 changed files with 1,562 additions and 217 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ jobs:
strategy:
matrix:
platform:
- runner: macos-12
- runner: macos-13
target: x86_64
- runner: macos-14
target: aarch64
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ jobs:
needs: Formatting
runs-on: ubuntu-latest
env:
MSRV_VERSION: 1.75.0
MSRV_VERSION: 1.83.0
steps:
- name: Checkout repository
uses: actions/checkout@v4
Expand Down
18 changes: 9 additions & 9 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,21 @@ default_language_version:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
rev: v5.0.0
hooks:
- id: check-yaml
stages: [commit]
stages: [pre-commit]
exclude: "mkdocs.yml"
- id: check-json
stages: [commit]
stages: [pre-commit]
- id: check-toml
stages: [commit]
stages: [pre-commit]
- id: check-merge-conflict
stages: [commit]
stages: [pre-commit]
- id: check-case-conflict
stages: [commit]
stages: [pre-commit]
- id: detect-private-key
stages: [commit]
stages: [pre-commit]
- id: check-added-large-files
args: ["--maxkb=150000"]
- id: end-of-file-fixer
Expand All @@ -30,7 +30,7 @@ repos:
- id: cargo-check

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.6.3"
rev: "v0.8.6"
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
Expand Down Expand Up @@ -62,7 +62,7 @@ repos:
- id: shellcheck

- repo: https://github.com/DevinR528/cargo-sort
rev: v1.0.9
rev: v1.1.0
hooks:
- id: cargo-sort

Expand Down
33 changes: 17 additions & 16 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,19 @@ repository = "https://github.com/cauliyang/DeepBioP"
license = "Apache-2.0"

[workspace.dependencies]
pyo3 = { version = "0.21.0", features = [
pyo3 = { version = "0.21.2", features = [
"abi3-py39",
"extension-module",
"anyhow",
] }
pyo3-stub-gen = "0.6.0"
thiserror = "1.0"
pyo3-stub-gen = "0.6.2"
thiserror = "2.0"
anyhow = "1.0"
walkdir = { version = "2.4" }
rayon = { version = "1.8" }
walkdir = { version = "2.5" }
rayon = { version = "1.10" }
log = "0.4"
pyo3-log = "0.11"
noodles = { version = "0.82.0", features = [
noodles = { version = "0.87.0", features = [
"bgzf",
"core",
"csi",
Expand All @@ -35,7 +35,7 @@ noodles = { version = "0.82.0", features = [
] }

bio = "2.0"
needletail = "0.5"
needletail = "0.6"

ahash = "0.8.11"
numpy = "0.21"
Expand All @@ -48,22 +48,23 @@ rand_distr = "0.4"
bitvec = "1.0"
itertools = "0.13.0"
derive_builder = "0.20"
lexical = "6.1"
bstr = "1.9.1"
lazy_static = "1.4.0"
tempfile = "3.10"
parquet = "52.0.0"
arrow = "52.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.6.0" }
colored = "2.1"
lexical = "7.0"
bstr = "1.11.3"
lazy_static = "1.5.0"
tempfile = "3.15"
parquet = "52.2.0"
arrow = "52.2"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.8.1" }
colored = "2.2"
textwrap = "0.16"
flate2 = { version = "1.0.30", features = [
flate2 = { version = "1.0.35", features = [
"zlib-ng",
], default-features = false }

deepbiop-fq = { version = "0.1.11", path = "crates/deepbiop-fq" }
deepbiop-bam = { version = "0.1.11", path = "crates/deepbiop-bam" }
deepbiop-utils = { version = "0.1.11", path = "crates/deepbiop-utils" }
deepbiop-fa = { version = "0.1.11", path = "crates/deepbiop-fa" }

[profile.opt-dev]
inherits = "dev"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dbp -h
# Minimum Supported Rust Version (MSRV)

This project adheres to a Minimum Supported Rust Version (MSRV) policy.
The Minimum Supported Rust Version (MSRV) is 1.75.0.
The Minimum Supported Rust Version (MSRV) is 1.83.0.
We ensure that all code within the project is compatible with this version or newer to maintain stability and compatibility.

# Contribute 🤝
Expand Down
62 changes: 60 additions & 2 deletions crates/deepbiop-bam/src/chimeric/event.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use log::debug;
use pyo3::prelude::*;
use std::str::FromStr;

use super::is_retain_record;
use super::{is_chimeric_record, is_retain_record};

/// A chimeric event.
#[pyclass]
Expand Down Expand Up @@ -121,6 +121,41 @@ impl ChimericEvent {

Ok(chimeric_event)
}

/// Construct a ChimericEvent from a string chr:start-end,chr:start-end
/// # Example
/// ```
/// use deepbiop_bam as bam;
/// use bam::chimeric::ChimericEvent;
/// let value = "chr1:103959-104483,chr1:280386-280637";
/// let chimeric_event: ChimericEvent = ChimericEvent::parse_list_pos(value, "value").unwrap();
/// assert_eq!(chimeric_event.len(),2);
/// ```
pub fn parse_list_pos(s: &str, name: &str) -> Result<Self> {
let intervals = s
.par_split(',')
.map(|event| {
let mut splits = event.split(':');
let chr = splits.next().unwrap();
let positions: Vec<&str> = splits.next().unwrap().split('-').collect();
let start: usize = lexical::parse(positions[0]).unwrap();
let end: usize = lexical::parse(positions[1]).unwrap();
GenomicIntervalBuilder::default()
.chr(chr.into())
.start(start)
.end(end)
.build()
.unwrap()
})
.collect::<Vec<GenomicInterval>>();

Ok(ChimericEventBuilder::default()
.name(Some(name.into()))
.intervals(intervals)
.build()?)

// Ok(res)
}
}

impl FromStr for ChimericEvent {
Expand Down Expand Up @@ -169,7 +204,7 @@ where
.par_bridge()
.filter_map(|result| {
let record = result.unwrap();
if is_retain_record(&record) {
if is_retain_record(&record) && is_chimeric_record(&record) {
if let Some(predict_function) = &predict {
if predict_function(&record) {
Some(record)
Expand All @@ -186,3 +221,26 @@ where
.map(|record| ChimericEvent::parse_noodle_bam_record(&record, references))
.collect()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_create_chimeric_events_from_bam() {
// Create a test BAM file
let bam = "tests/data/test_chimric_reads.bam";

// Define a predict function for testing
let predict_fn = |_record: &bam::Record| -> bool {
// Define predict function logic here for testing
true
};
// Call the function with test parameters
let result = create_chimeric_events_from_bam(bam, Some(2), Some(predict_fn));
// Assert on the result
assert!(result.is_ok());
let chimeric_events = result.unwrap();
assert_eq!(chimeric_events.len(), 100);
}
}
Binary file not shown.
2 changes: 1 addition & 1 deletion crates/deepbiop-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ clap_complete = "4.5"
clap-verbosity-flag = "2.2"
ctrlc = "3.4"
human-panic = "2.0"
env_logger = "0.11.5"
env_logger = "0.11.6"

[[bin]]
path = "src/main.rs"
Expand Down
47 changes: 47 additions & 0 deletions crates/deepbiop-fa/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
[package]
name = "deepbiop-fa"
version = { workspace = true }
authors = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
repository = { workspace = true }
keywords = ["parquet", "fasta", "deep-learning"]
license = { workspace = true }
readme = "../../README.md"
description = "Deep Learning Preprocessing Library for Fastq Format"

[dependencies]
thiserror = { workspace = true }
anyhow = { workspace = true }
noodles = { workspace = true }
rayon = { workspace = true }
log = { workspace = true }
needletail = { workspace = true }
ahash = { workspace = true }
numpy = { workspace = true }
ndarray = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
derive_builder = { workspace = true }
itertools = { workspace = true }
lexical = { workspace = true }
flate2 = { workspace = true }
bstr = { workspace = true }
walkdir = { workspace = true }

parquet = { workspace = true }
arrow = { workspace = true }
candle-core = { workspace = true }

pyo3 = { workspace = true }
pyo3-stub-gen = { workspace = true }

deepbiop-utils = { path = "../deepbiop-utils", version = "0.1" }

[dev-dependencies]
bio = { workspace = true }
tempfile = { workspace = true }


[features]
python = []
18 changes: 18 additions & 0 deletions crates/deepbiop-fa/src/encode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use ahash::HashMap;
use ndarray::{Array2, Array3};

pub type Element = i32;
pub type Matrix = Array2<Element>;
pub type Tensor = Array3<Element>;
pub type Kmer2IdTable = HashMap<Vec<u8>, Element>;
pub type Id2KmerTable = HashMap<Element, Vec<u8>>;

mod option;
mod parquet;
mod record;
mod traits;

pub use option::*;
pub use parquet::*;
pub use record::*;
pub use traits::*;
50 changes: 50 additions & 0 deletions crates/deepbiop-fa/src/encode/option.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
use derive_builder::Builder;
use serde::{Deserialize, Serialize};
use std::fmt::{self, Display, Formatter};

use pyo3::prelude::*;
use pyo3_stub_gen::derive::*;

pub const BASES: &[u8] = b"ATCGN";
pub const QUAL_OFFSET: u8 = 33;

#[gen_stub_pyclass]
#[pyclass(module = "deepbiop.fa")]
#[derive(Debug, Builder, Default, Clone, Serialize, Deserialize)]
pub struct FaEncoderOption {
#[pyo3(get, set)]
#[builder(default = "QUAL_OFFSET")]
pub qual_offset: u8,

#[pyo3(get, set)]
#[builder(default = "BASES.to_vec()")]
pub bases: Vec<u8>,

#[pyo3(get, set)]
#[builder(default = "2")]
pub threads: usize,
}

#[gen_stub_pymethods]
#[pymethods]
impl FaEncoderOption {
#[new]
fn py_new(qual_offset: u8, bases: String, threads: Option<usize>) -> Self {
FaEncoderOptionBuilder::default()
.qual_offset(qual_offset)
.bases(bases.as_bytes().to_vec())
.threads(threads.unwrap_or(2))
.build()
.expect("Failed to build FqEncoderOption from Python arguments.")
}
}

impl Display for FaEncoderOption {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(
f,
"FaEncoderOption {{ qual_offset: {}, bases: {:?}}}",
self.qual_offset, self.bases
)
}
}
Loading

0 comments on commit efff13f

Please sign in to comment.