-
Notifications
You must be signed in to change notification settings - Fork 66
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add index functionality to rattler and create python bindings (#…
…436) Co-authored-by: Benjamin Lowry <[email protected]>
- Loading branch information
1 parent
c85cc84
commit 3bedecd
Showing
12 changed files
with
599 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
[package] | ||
name = "rattler_index" | ||
version.workspace = true | ||
edition.workspace = true | ||
authors = [] | ||
description = "A crate that indexes directories containing conda packages to create local conda channels" | ||
categories.workspace = true | ||
homepage.workspace = true | ||
repository.workspace = true | ||
license.workspace = true | ||
readme.workspace = true | ||
|
||
[dependencies] | ||
fs-err = "2.11.0" | ||
rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types", default-features = false } | ||
rattler_digest = { version = "0.14.0", path = "../rattler_digest", default-features = false } | ||
rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming", default-features = false } | ||
serde_json = "1.0.108" | ||
tracing = "0.1.40" | ||
walkdir = "2.4.0" | ||
|
||
[dev-dependencies] | ||
tempfile = "3.8.0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
//! Indexing of packages in a output folder to create up to date repodata.json files | ||
#![deny(missing_docs)] | ||
|
||
use rattler_conda_types::package::ArchiveType; | ||
use rattler_conda_types::package::IndexJson; | ||
use rattler_conda_types::package::PackageFile; | ||
use rattler_conda_types::ChannelInfo; | ||
use rattler_conda_types::PackageRecord; | ||
use rattler_conda_types::Platform; | ||
use rattler_conda_types::RepoData; | ||
use rattler_package_streaming::read; | ||
use rattler_package_streaming::seek; | ||
|
||
use fs_err::File; | ||
use std::ffi::OsStr; | ||
use std::io::Read; | ||
use std::io::Write; | ||
use std::path::Path; | ||
use std::path::PathBuf; | ||
use walkdir::WalkDir; | ||
|
||
fn package_record_from_index_json<T: Read>( | ||
file: &Path, | ||
index_json_reader: &mut T, | ||
) -> Result<PackageRecord, std::io::Error> { | ||
let index = IndexJson::from_reader(index_json_reader)?; | ||
|
||
let sha256_result = rattler_digest::compute_file_digest::<rattler_digest::Sha256>(file)?; | ||
let md5_result = rattler_digest::compute_file_digest::<rattler_digest::Md5>(file)?; | ||
let size = std::fs::metadata(file)?.len(); | ||
|
||
let package_record = PackageRecord { | ||
name: index.name, | ||
version: index.version, | ||
build: index.build, | ||
build_number: index.build_number, | ||
subdir: index.subdir.unwrap_or_else(|| "unknown".to_string()), | ||
md5: Some(md5_result), | ||
sha256: Some(sha256_result), | ||
size: Some(size), | ||
arch: index.arch, | ||
platform: index.platform, | ||
depends: index.depends, | ||
constrains: index.constrains, | ||
track_features: index.track_features, | ||
features: index.features, | ||
noarch: index.noarch, | ||
license: index.license, | ||
license_family: index.license_family, | ||
timestamp: index.timestamp, | ||
legacy_bz2_md5: None, | ||
legacy_bz2_size: None, | ||
purls: Default::default(), | ||
}; | ||
Ok(package_record) | ||
} | ||
|
||
fn package_record_from_tar_bz2(file: &Path) -> Result<PackageRecord, std::io::Error> { | ||
let reader = std::fs::File::open(file)?; | ||
let mut archive = read::stream_tar_bz2(reader); | ||
for entry in archive.entries()?.flatten() { | ||
let mut entry = entry; | ||
let path = entry.path()?; | ||
if path.as_os_str().eq("info/index.json") { | ||
return package_record_from_index_json(file, &mut entry); | ||
} | ||
} | ||
Err(std::io::Error::new( | ||
std::io::ErrorKind::Other, | ||
"No index.json found", | ||
)) | ||
} | ||
|
||
fn package_record_from_conda(file: &Path) -> Result<PackageRecord, std::io::Error> { | ||
let reader = std::fs::File::open(file)?; | ||
let mut archive = seek::stream_conda_info(reader).expect("Could not open conda file"); | ||
|
||
for entry in archive.entries()?.flatten() { | ||
let mut entry = entry; | ||
let path = entry.path()?; | ||
if path.as_os_str().eq("info/index.json") { | ||
return package_record_from_index_json(file, &mut entry); | ||
} | ||
} | ||
Err(std::io::Error::new( | ||
std::io::ErrorKind::Other, | ||
"No index.json found", | ||
)) | ||
} | ||
|
||
/// Create a new `repodata.json` for all packages in the given output folder. If `target_platform` is | ||
/// `Some`, only that specific subdir is indexed. Otherwise indexes all subdirs and creates a | ||
/// `repodata.json` for each. | ||
pub fn index( | ||
output_folder: &Path, | ||
target_platform: Option<&Platform>, | ||
) -> Result<(), std::io::Error> { | ||
let entries = WalkDir::new(output_folder).into_iter(); | ||
let entries: Vec<(PathBuf, ArchiveType)> = entries | ||
.filter_entry(|e| e.depth() <= 2) | ||
.filter_map(|e| e.ok()) | ||
.filter_map(|e| { | ||
ArchiveType::split_str(e.path().to_string_lossy().as_ref()) | ||
.map(|(p, t)| (PathBuf::from(format!("{}{}", p, t.extension())), t)) | ||
}) | ||
.collect(); | ||
|
||
// find all subdirs | ||
let mut platforms = entries | ||
.iter() | ||
.filter_map(|(p, _)| { | ||
p.parent() | ||
.and_then(|parent| parent.file_name()) | ||
.and_then(|file_name| { | ||
let name = file_name.to_string_lossy().to_string(); | ||
if name != "src_cache" { | ||
Some(name) | ||
} else { | ||
None | ||
} | ||
}) | ||
}) | ||
.collect::<std::collections::HashSet<_>>(); | ||
|
||
// Always create noarch subdir | ||
if !output_folder.join("noarch").exists() { | ||
std::fs::create_dir(output_folder.join("noarch"))?; | ||
platforms.insert("noarch".to_string()); | ||
} | ||
|
||
// Create target platform dir if needed | ||
if let Some(target_platform) = target_platform { | ||
let platform_str = target_platform.to_string(); | ||
if !output_folder.join(&platform_str).exists() { | ||
std::fs::create_dir(output_folder.join(&platform_str))?; | ||
platforms.insert(platform_str); | ||
} | ||
} | ||
|
||
for platform in platforms { | ||
if let Some(target_platform) = target_platform { | ||
if platform != target_platform.to_string() { | ||
if platform != "noarch" { | ||
continue; | ||
} else { | ||
// check that noarch is already indexed if it is not the target platform | ||
if output_folder.join("noarch/repodata.json").exists() { | ||
continue; | ||
} | ||
} | ||
} | ||
} | ||
|
||
let mut repodata = RepoData { | ||
info: Some(ChannelInfo { | ||
subdir: platform.clone(), | ||
base_url: None, | ||
}), | ||
packages: Default::default(), | ||
conda_packages: Default::default(), | ||
removed: Default::default(), | ||
version: Some(2), | ||
}; | ||
|
||
for (p, t) in entries.iter().filter_map(|(p, t)| { | ||
p.parent().and_then(|parent| { | ||
parent.file_name().and_then(|file_name| { | ||
if file_name == OsStr::new(&platform) { | ||
// If the file_name is the platform we're looking for, return Some((p, t)) | ||
Some((p, t)) | ||
} else { | ||
// Otherwise, we return None to filter out this item | ||
None | ||
} | ||
}) | ||
}) | ||
}) { | ||
let record = match t { | ||
ArchiveType::TarBz2 => package_record_from_tar_bz2(p), | ||
ArchiveType::Conda => package_record_from_conda(p), | ||
}; | ||
let (Ok(record), Some(file_name)) = (record, p.file_name()) else { | ||
tracing::info!("Could not read package record from {:?}", p); | ||
continue; | ||
}; | ||
repodata | ||
.conda_packages | ||
.insert(file_name.to_string_lossy().to_string(), record); | ||
} | ||
let out_file = output_folder.join(platform).join("repodata.json"); | ||
File::create(&out_file)?.write_all(serde_json::to_string_pretty(&repodata)?.as_bytes())?; | ||
} | ||
|
||
Ok(()) | ||
} | ||
|
||
// TODO: write proper unit tests for above functions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
use rattler_conda_types::Platform; | ||
use rattler_index::index; | ||
use serde_json::Value; | ||
use std::fs; | ||
use std::fs::File; | ||
use std::path::{Path, PathBuf}; | ||
|
||
fn test_data_dir() -> PathBuf { | ||
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data") | ||
} | ||
|
||
#[test] | ||
fn test_index() { | ||
let temp_dir = tempfile::tempdir().unwrap(); | ||
let subdir_path = Path::new("win-64"); | ||
let file_path = Path::new("conda-22.11.1-py38haa244fe_1.conda"); | ||
let index_json_path = Path::new("conda-22.11.1-py38haa244fe_1-index.json"); | ||
fs::create_dir(temp_dir.path().join(subdir_path)).unwrap(); | ||
fs::copy( | ||
test_data_dir().join(file_path), | ||
temp_dir.path().join(subdir_path).join(file_path), | ||
) | ||
.unwrap(); | ||
|
||
let res = index(temp_dir.path(), Some(&Platform::Win64)); | ||
assert_eq!(res.is_ok(), true); | ||
|
||
let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json"); | ||
let repodata_json: Value = serde_json::from_reader(File::open(repodata_path).unwrap()).unwrap(); | ||
|
||
let expected_repodata_entry: Value = | ||
serde_json::from_reader(File::open(test_data_dir().join(index_json_path)).unwrap()) | ||
.unwrap(); | ||
|
||
assert_eq!( | ||
repodata_json | ||
.get("info") | ||
.unwrap() | ||
.get("subdir") | ||
.unwrap() | ||
.as_str(), | ||
Some("win-64") | ||
); | ||
assert_eq!(repodata_json.get("packages").is_some(), true); | ||
assert_eq!( | ||
repodata_json | ||
.get("packages.conda") | ||
.unwrap() | ||
.get("conda-22.11.1-py38haa244fe_1.conda") | ||
.unwrap(), | ||
&expected_repodata_entry | ||
); | ||
} | ||
|
||
#[test] | ||
fn test_index_empty_directory() { | ||
let temp_dir = tempfile::tempdir().unwrap(); | ||
let res = index(temp_dir.path(), None); | ||
assert_eq!(res.is_ok(), true); | ||
assert_eq!(fs::read_dir(temp_dir).unwrap().count(), 0); | ||
} |
Oops, something went wrong.