Skip to content

Commit

Permalink
Merge branch 'dev' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob Patro committed Dec 29, 2024
2 parents 0d107a8 + d127f02 commit 07fe08a
Show file tree
Hide file tree
Showing 10 changed files with 287 additions and 118 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ niffler = { version = "2.6.0", default-features = false, features = [
"gz_cloudflare_zlib",
] }
blake3 = { version = "1.5.5", features = ["mmap", "serde"] }
regex = { version = "1.11.1", default-features = false, features = ["perf", "std"] }

[profile.release]
lto = "thin"
Expand Down
1 change: 1 addition & 0 deletions src/atac/defaults.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/// default parameters for ATAC-seq
use crate::defaults::DefaultParams;

pub(super) struct AtacIndexParams;
Expand Down
1 change: 1 addition & 0 deletions src/defaults.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/// default parameters for RNA-seq
pub trait DefaultMappingParams {
const MAX_EC_CARD: u32;
const MAX_HIT_OCC: u32;
Expand Down
6 changes: 6 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,21 +78,27 @@ fn main() -> anyhow::Result<()> {
alevin_fry,
},
),

Commands::Chemistry(ChemistryCommand::Add(add_opts)) => {
add_chemistry(af_home_path, add_opts)
}

Commands::Chemistry(ChemistryCommand::Remove(rem_opts)) => {
remove_chemistry(af_home_path, rem_opts)
}

Commands::Chemistry(ChemistryCommand::Clean(clean_opts)) => {
clean_chemistries(af_home_path, clean_opts)
}

Commands::Chemistry(ChemistryCommand::Lookup(lookup_opts)) => {
lookup_chemistry(af_home_path, lookup_opts)
}

Commands::Chemistry(ChemistryCommand::Refresh(refresh_opts)) => {
refresh_chemistries(af_home_path, refresh_opts)
}

Commands::Chemistry(ChemistryCommand::Fetch(fetch_opts)) => {
fetch_chemistries(af_home_path, fetch_opts)
}
Expand Down
9 changes: 8 additions & 1 deletion src/simpleaf_commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,12 @@ pub struct IndexOpts {
#[derive(Args, Clone, Debug)]
#[command(arg_required_else_help = true)]
pub struct ChemistryRemoveOpts {
/// the name of the chemistry you wish to remove
/// the name of the chemistry you wish to remove (can be a rege;x)
#[arg(short, long)]
pub name: String,
/// print out the action that would be taken rather than taking it
#[arg(short, long)]
pub dry_run: bool,
}

/// Download the corresponding permit lists for the chemistry/ies
Expand Down Expand Up @@ -520,6 +523,10 @@ pub struct ChemistryRefreshOpts {
/// overwrite an existing matched chemistry even if the version isn't newer
#[arg(short, long)]
pub force: bool,
/// report what would happen with a refresh without actually performing one on the
/// actual chemistry registry.
#[arg(short, long)]
pub dry_run: bool,
}

#[derive(Debug, Subcommand)]
Expand Down
167 changes: 130 additions & 37 deletions src/simpleaf_commands/chemistry.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::utils::af_utils::*;
use crate::utils::chem_utils::{
custom_chem_hm_into_json, get_custom_chem_hm, get_single_custom_chem_from_file,
CustomChemistry, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY,
CustomChemistry, ExpectedOri, LOCAL_PL_PATH_KEY, REMOTE_PL_URL_KEY,
};
use crate::utils::constants::*;
use crate::utils::prog_utils::{self, download_to_file_compute_hash};
use regex::Regex;

use anyhow::{bail, Context, Result};
use semver::Version;
Expand Down Expand Up @@ -202,25 +203,57 @@ pub fn refresh_chemistries(
af_home: PathBuf,
refresh_opts: crate::simpleaf_commands::ChemistryRefreshOpts,
) -> Result<()> {
let dry_run = refresh_opts.dry_run;
let dry_run_pref = if dry_run { "[dry_run] : " } else { "" };
let dry_run_dir = af_home.join("plist_dryrun");

// if the old custom chem file exists, then warn the user about it
// but read it in and attempt to populate.
let custom_chem_file = af_home.join(CUSTOM_CHEMISTRIES_PATH);
let merge_custom_chem = if custom_chem_file.exists() {
warn!("The \"custom_chemistries.json\" file is deprecated, and in the future, these chemistries should be
warn!("{}The \"custom_chemistries.json\" file is deprecated, and in the future, these chemistries should be
regustered in the \"chemistries.json\" file instead. We will attempt to automatically migrate over the old
chemistries into the new file");
chemistries into the new file", dry_run_pref);
true
} else {
false
};

// check if the chemistry file is absent altogether
// if so, then download it
let chem_path = af_home.join(CHEMISTRIES_PATH);
if !chem_path.is_file() {
let fresh_download = if !chem_path.is_file() {
prog_utils::download_to_file(CHEMISTRIES_URL, &chem_path)?;
true
} else {
false
};

// check if the chemistry file is absent altogether
// if so, then download it
let chem_path = if dry_run {
std::fs::create_dir_all(&dry_run_dir).with_context(|| {
format!(
"could not create dry run directory {}",
dry_run_dir.display()
)
})?;
let dry_run_chem_path = dry_run_dir.join(CHEMISTRIES_PATH);
std::fs::copy(chem_path, &dry_run_chem_path)?;
dry_run_chem_path
} else {
af_home.join(CHEMISTRIES_PATH)
};

// if it's a dry-run, copy over the custom chems if we have one
let custom_chem_file = if merge_custom_chem && dry_run {
let p = dry_run_dir.join(CUSTOM_CHEMISTRIES_PATH);
std::fs::copy(custom_chem_file, &p)?;
p
} else {
let tmp_chem_path = af_home.join(CHEMISTRIES_PATH).with_extension("tmp.json");
custom_chem_file
};

if !fresh_download {
let tmp_chem_path = chem_path.with_extension("tmp.json");
prog_utils::download_to_file(CHEMISTRIES_URL, &tmp_chem_path)?;
if let Some(existing_chem) = parse_resource_json_file(&chem_path, None)?.as_object_mut() {
if let Some(new_chem) = parse_resource_json_file(&tmp_chem_path, None)?.as_object() {
Expand All @@ -243,7 +276,7 @@ pub fn refresh_chemistries(
.expect("version should be a string"),
)?;
if refresh_opts.force || new_ver > curr_ver {
info!("updating {}", k);
info!("{}updating {}", dry_run_pref, k);
existing_chem.insert(k.clone(), v.clone());
}
}
Expand Down Expand Up @@ -280,15 +313,15 @@ pub fn refresh_chemistries(
{
for (k, v) in old_custom_chem.iter() {
if new_chem.contains_key(k) {
warn!("The newly downloaded \"chemistries.json\" file already contained the key {}, skipping entry from the existing \"custom_chemistries.json\" file.", k);
warn!("{}The newly downloaded \"chemistries.json\" file already contained the key {}, skipping entry from the existing \"custom_chemistries.json\" file.", dry_run_pref, k);
} else {
let new_ent = json!({
"geometry": v,
"expected_ori": "both",
"version" : "0.1.0"
});
new_chem.insert(k.to_owned(), new_ent);
info!("successfully inserted {} from old custom chemistries file into the new chemistries registry", k);
info!("{}successfully inserted {} from old custom chemistries file into the new chemistries registry", dry_run_pref, k);
}
}

Expand All @@ -310,6 +343,17 @@ pub fn refresh_chemistries(
bail!("Could not parse newly downloaded \"chemistries.json\" file as a JSON object, something is wrong. Please report this on GitHub.");
}
}

// if it's a dry run, remove the whole directory we created
if dry_run {
std::fs::remove_dir_all(&dry_run_dir).with_context(|| {
format!(
"couldn't remove the dry run directory {}",
dry_run_dir.display()
)
})?;
}

Ok(())
}

Expand Down Expand Up @@ -355,7 +399,7 @@ pub fn clean_chemistries(
let rem_pls = &present_pls - &used_pls;
// check if the chemistry already exists and log
if dry_run {
info!("The following files in the permit list directory are unused and would be removed: {:#?}", rem_pls);
info!("[dry_run] : The following files in the permit list directory are unused and would be removed: {:#?}", rem_pls);
} else {
for pl in rem_pls {
info!("removing {}", pl.display());
Expand All @@ -366,7 +410,7 @@ pub fn clean_chemistries(
Ok(())
}

/// Remove the entry for the provided chemistry in `chemistries.json` if it is present.
/// Remove the entry (or entries matching the provided regex) for the provided chemistry in `chemistries.json` if it is present.
pub fn remove_chemistry(
af_home_path: PathBuf,
remove_opts: crate::simpleaf_commands::ChemistryRemoveOpts,
Expand All @@ -376,12 +420,37 @@ pub fn remove_chemistry(
let chem_p = af_home_path.join(CHEMISTRIES_PATH);

let mut chem_hm = get_custom_chem_hm(&chem_p)?;
let mut num_matched = 0;
let keys = chem_hm.keys().cloned().collect::<Vec<String>>();

if let Ok(name_re) = regex::Regex::new(&name) {
for k in keys {
if name_re.is_match(&k) {
num_matched += 1;
if remove_opts.dry_run {
info!(
"[dry_run] : would remove chemistry {} from the registry.",
k
);
} else {
info!("chemistry {} found in the registry; removing it!", k);
chem_hm.remove(&k);
}
}
}
} else {
bail!(
"The provided chemistry name {} was neither a valid chemistry name nor a valid regex",
name
);
}

// check if the chemistry already exists and log
if chem_hm.contains_key(&name) {
info!("chemistry {} found in the registry; removing it!", name);
chem_hm.remove(&name);

if num_matched == 0 {
info!(
"no chemistry with name {} (or matching this as a regex) was found in the registry; nothing to remove",
name
);
} else if !remove_opts.dry_run {
// convert the custom chemistry hashmap to json
let v = custom_chem_hm_into_json(chem_hm)?;

Expand All @@ -393,16 +462,13 @@ pub fn remove_chemistry(
custom_chem_file
.write_all(serde_json::to_string_pretty(&v).unwrap().as_bytes())
.with_context(|| format!("could not write {}", chem_p.display()))?;
} else {
info!(
"no chemistry with name {} was found in the registry; nothing to remove",
name
);
}

Ok(())
}

/// Lookup the chemistry, or the chemistries matching the provided regex in the
/// chemistry registry.
pub fn lookup_chemistry(
af_home_path: PathBuf,
lookup_opts: crate::simpleaf_commands::ChemistryLookupOpts,
Expand All @@ -420,26 +486,56 @@ pub fn lookup_chemistry(
println!("{:#?}", cc);
} else {
info!("no chemistry with name {} was found in the registry!", name);
info!(
"treating {} as a regex and searching for matching chemistries",
name
);
if let Ok(re) = Regex::new(&name) {
for (cname, cval) in chem_hm.iter() {
if re.is_match(cname) {
println!("chemistry name : {}", cname);
println!("==============");
println!("{:#?}", cval);
}
}
}
}

Ok(())
}

struct FetchSet<'a> {
pub m: HashSet<&'a String>,
pub fetch_all: bool,
pub re: Option<Regex>,
}

impl<'a> FetchSet<'a> {
pub fn from_re(s: &str) -> Result<Self> {
if let Ok(re) = regex::Regex::new(s) {
Ok(Self {
m: HashSet::new(),
re: Some(re),
})
} else {
bail!("could not compile regex : [{}]", s)
}
}

pub fn from_hash_set(m: HashSet<&'a String>) -> Self {
Self { m, re: None }
}

pub fn contains(&self, k: &String) -> bool {
if self.fetch_all {
true
if let Some(ref re) = self.re {
re.is_match(k)
} else {
self.m.contains(k)
}
}
}

/// Fetch the permit lists for the provided chemistry (or the chemistries matching the provided
/// regex) in the registry.
pub fn fetch_chemistries(
af_home: PathBuf,
refresh_opts: crate::simpleaf_commands::ChemistryFetchOpts,
Expand All @@ -463,20 +559,17 @@ pub fn fetch_chemistries(

if let Some(chem_obj) = parse_resource_json_file(&chem_path, None)?.as_object() {
// if the user used the special `*`, then we lookup all chemistries
let fetch_chems: FetchSet = if refresh_opts.chemistries.len() == 1
&& matches!(refresh_opts.chemistries.first(), Some(x) if x == "*")
{
FetchSet {
m: HashSet::new(),
fetch_all: true,
}
let fetch_chems: FetchSet = if refresh_opts.chemistries.len() == 1 {
FetchSet::from_re(
refresh_opts
.chemistries
.first()
.expect("first entry is valid"),
)?
} else {
// otherwise, collect just the set they requested
let hs = HashSet::from_iter(refresh_opts.chemistries.iter());
FetchSet {
m: hs,
fetch_all: false,
}
FetchSet::from_hash_set(hs)
};

for (k, v) in chem_obj.iter() {
Expand All @@ -491,7 +584,7 @@ pub fn fetch_chemistries(
if let Some(serde_json::Value::String(rpath)) = v.get(REMOTE_PL_URL_KEY) {
if refresh_opts.dry_run {
info!(
"fetch would fetch missing file {} for {} from {}",
"[dry_run] : fetch would fetch missing file {} for {} from {}",
pfile, k, rpath
);
} else {
Expand Down
1 change: 1 addition & 0 deletions src/simpleaf_commands/quant.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use std::time::{Duration, Instant};
use tracing::{error, info, warn};

use super::MapQuantOpts;
use crate::utils::chem_utils::ExpectedOri;
use crate::utils::constants::{CHEMISTRIES_PATH, NUM_SAMPLE_LINES};

fn get_generic_buf_reader(ipath: &PathBuf) -> anyhow::Result<impl BufRead> {
Expand Down
Loading

0 comments on commit 07fe08a

Please sign in to comment.