Skip to content

Commit

Permalink
feat: add index functionality to rattler and create python bindings (#…
Browse files Browse the repository at this point in the history
…436)

Co-authored-by: Benjamin Lowry <[email protected]>
  • Loading branch information
BenjaminLowry and Benjamin Lowry authored Dec 15, 2023
1 parent c85cc84 commit 3bedecd
Show file tree
Hide file tree
Showing 12 changed files with 599 additions and 19 deletions.
23 changes: 23 additions & 0 deletions crates/rattler_index/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
[package]
name = "rattler_index"
version.workspace = true
edition.workspace = true
authors = []
description = "A crate that indexes directories containing conda packages to create local conda channels"
categories.workspace = true
homepage.workspace = true
repository.workspace = true
license.workspace = true
readme.workspace = true

[dependencies]
fs-err = "2.11.0"
rattler_conda_types = { version = "0.14.0", path = "../rattler_conda_types", default-features = false }
rattler_digest = { version = "0.14.0", path = "../rattler_digest", default-features = false }
rattler_package_streaming = { version = "0.14.0", path = "../rattler_package_streaming", default-features = false }
serde_json = "1.0.108"
tracing = "0.1.40"
walkdir = "2.4.0"

[dev-dependencies]
tempfile = "3.8.0"
197 changes: 197 additions & 0 deletions crates/rattler_index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
//! Indexing of packages in a output folder to create up to date repodata.json files
#![deny(missing_docs)]

use rattler_conda_types::package::ArchiveType;
use rattler_conda_types::package::IndexJson;
use rattler_conda_types::package::PackageFile;
use rattler_conda_types::ChannelInfo;
use rattler_conda_types::PackageRecord;
use rattler_conda_types::Platform;
use rattler_conda_types::RepoData;
use rattler_package_streaming::read;
use rattler_package_streaming::seek;

use fs_err::File;
use std::ffi::OsStr;
use std::io::Read;
use std::io::Write;
use std::path::Path;
use std::path::PathBuf;
use walkdir::WalkDir;

fn package_record_from_index_json<T: Read>(
file: &Path,
index_json_reader: &mut T,
) -> Result<PackageRecord, std::io::Error> {
let index = IndexJson::from_reader(index_json_reader)?;

let sha256_result = rattler_digest::compute_file_digest::<rattler_digest::Sha256>(file)?;
let md5_result = rattler_digest::compute_file_digest::<rattler_digest::Md5>(file)?;
let size = std::fs::metadata(file)?.len();

let package_record = PackageRecord {
name: index.name,
version: index.version,
build: index.build,
build_number: index.build_number,
subdir: index.subdir.unwrap_or_else(|| "unknown".to_string()),
md5: Some(md5_result),
sha256: Some(sha256_result),
size: Some(size),
arch: index.arch,
platform: index.platform,
depends: index.depends,
constrains: index.constrains,
track_features: index.track_features,
features: index.features,
noarch: index.noarch,
license: index.license,
license_family: index.license_family,
timestamp: index.timestamp,
legacy_bz2_md5: None,
legacy_bz2_size: None,
purls: Default::default(),
};
Ok(package_record)
}

fn package_record_from_tar_bz2(file: &Path) -> Result<PackageRecord, std::io::Error> {
let reader = std::fs::File::open(file)?;
let mut archive = read::stream_tar_bz2(reader);
for entry in archive.entries()?.flatten() {
let mut entry = entry;
let path = entry.path()?;
if path.as_os_str().eq("info/index.json") {
return package_record_from_index_json(file, &mut entry);
}
}
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"No index.json found",
))
}

fn package_record_from_conda(file: &Path) -> Result<PackageRecord, std::io::Error> {
let reader = std::fs::File::open(file)?;
let mut archive = seek::stream_conda_info(reader).expect("Could not open conda file");

for entry in archive.entries()?.flatten() {
let mut entry = entry;
let path = entry.path()?;
if path.as_os_str().eq("info/index.json") {
return package_record_from_index_json(file, &mut entry);
}
}
Err(std::io::Error::new(
std::io::ErrorKind::Other,
"No index.json found",
))
}

/// Create a new `repodata.json` for all packages in the given output folder. If `target_platform` is
/// `Some`, only that specific subdir is indexed. Otherwise indexes all subdirs and creates a
/// `repodata.json` for each.
pub fn index(
output_folder: &Path,
target_platform: Option<&Platform>,
) -> Result<(), std::io::Error> {
let entries = WalkDir::new(output_folder).into_iter();
let entries: Vec<(PathBuf, ArchiveType)> = entries
.filter_entry(|e| e.depth() <= 2)
.filter_map(|e| e.ok())
.filter_map(|e| {
ArchiveType::split_str(e.path().to_string_lossy().as_ref())
.map(|(p, t)| (PathBuf::from(format!("{}{}", p, t.extension())), t))
})
.collect();

// find all subdirs
let mut platforms = entries
.iter()
.filter_map(|(p, _)| {
p.parent()
.and_then(|parent| parent.file_name())
.and_then(|file_name| {
let name = file_name.to_string_lossy().to_string();
if name != "src_cache" {
Some(name)
} else {
None
}
})
})
.collect::<std::collections::HashSet<_>>();

// Always create noarch subdir
if !output_folder.join("noarch").exists() {
std::fs::create_dir(output_folder.join("noarch"))?;
platforms.insert("noarch".to_string());
}

// Create target platform dir if needed
if let Some(target_platform) = target_platform {
let platform_str = target_platform.to_string();
if !output_folder.join(&platform_str).exists() {
std::fs::create_dir(output_folder.join(&platform_str))?;
platforms.insert(platform_str);
}
}

for platform in platforms {
if let Some(target_platform) = target_platform {
if platform != target_platform.to_string() {
if platform != "noarch" {
continue;
} else {
// check that noarch is already indexed if it is not the target platform
if output_folder.join("noarch/repodata.json").exists() {
continue;
}
}
}
}

let mut repodata = RepoData {
info: Some(ChannelInfo {
subdir: platform.clone(),
base_url: None,
}),
packages: Default::default(),
conda_packages: Default::default(),
removed: Default::default(),
version: Some(2),
};

for (p, t) in entries.iter().filter_map(|(p, t)| {
p.parent().and_then(|parent| {
parent.file_name().and_then(|file_name| {
if file_name == OsStr::new(&platform) {
// If the file_name is the platform we're looking for, return Some((p, t))
Some((p, t))
} else {
// Otherwise, we return None to filter out this item
None
}
})
})
}) {
let record = match t {
ArchiveType::TarBz2 => package_record_from_tar_bz2(p),
ArchiveType::Conda => package_record_from_conda(p),
};
let (Ok(record), Some(file_name)) = (record, p.file_name()) else {
tracing::info!("Could not read package record from {:?}", p);
continue;
};
repodata
.conda_packages
.insert(file_name.to_string_lossy().to_string(), record);
}
let out_file = output_folder.join(platform).join("repodata.json");
File::create(&out_file)?.write_all(serde_json::to_string_pretty(&repodata)?.as_bytes())?;
}

Ok(())
}

// TODO: write proper unit tests for above functions
61 changes: 61 additions & 0 deletions crates/rattler_index/tests/test_index.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use rattler_conda_types::Platform;
use rattler_index::index;
use serde_json::Value;
use std::fs;
use std::fs::File;
use std::path::{Path, PathBuf};

fn test_data_dir() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR")).join("../../test-data")
}

#[test]
fn test_index() {
let temp_dir = tempfile::tempdir().unwrap();
let subdir_path = Path::new("win-64");
let file_path = Path::new("conda-22.11.1-py38haa244fe_1.conda");
let index_json_path = Path::new("conda-22.11.1-py38haa244fe_1-index.json");
fs::create_dir(temp_dir.path().join(subdir_path)).unwrap();
fs::copy(
test_data_dir().join(file_path),
temp_dir.path().join(subdir_path).join(file_path),
)
.unwrap();

let res = index(temp_dir.path(), Some(&Platform::Win64));
assert_eq!(res.is_ok(), true);

let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json");
let repodata_json: Value = serde_json::from_reader(File::open(repodata_path).unwrap()).unwrap();

let expected_repodata_entry: Value =
serde_json::from_reader(File::open(test_data_dir().join(index_json_path)).unwrap())
.unwrap();

assert_eq!(
repodata_json
.get("info")
.unwrap()
.get("subdir")
.unwrap()
.as_str(),
Some("win-64")
);
assert_eq!(repodata_json.get("packages").is_some(), true);
assert_eq!(
repodata_json
.get("packages.conda")
.unwrap()
.get("conda-22.11.1-py38haa244fe_1.conda")
.unwrap(),
&expected_repodata_entry
);
}

#[test]
fn test_index_empty_directory() {
let temp_dir = tempfile::tempdir().unwrap();
let res = index(temp_dir.path(), None);
assert_eq!(res.is_ok(), true);
assert_eq!(fs::read_dir(temp_dir).unwrap().count(), 0);
}
Loading

0 comments on commit 3bedecd

Please sign in to comment.