Skip to content

Commit

Permalink
Use globwalk
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Sep 12, 2024
1 parent f22e5ef commit 3ad6a71
Show file tree
Hide file tree
Showing 7 changed files with 72 additions and 75 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ fs-err = { version = "2.11.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.30" }
glob = { version = "0.3.1" }
globwalk = { version = "0.9.1" }
goblin = { version = "0.8.2", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] }
hex = { version = "0.4.3" }
home = { version = "0.5.9" }
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-cache-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workspace = true

[dependencies]
fs-err = { workspace = true }
glob = { workspace = true }
globwalk = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
Expand Down
134 changes: 65 additions & 69 deletions crates/uv-cache-info/src/cache_info.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
use crate::commit_info::CacheCommit;
use crate::timestamp::Timestamp;

use glob::MatchOptions;
use serde::Deserialize;
use std::cmp::max;
use std::io;
use std::path::{Path, PathBuf};
use tracing::{debug, warn};

#[derive(Debug, thiserror::Error)]
pub enum CacheInfoError {
#[error("Failed to parse glob patterns for `cache-keys`: {0}")]
Glob(#[from] globwalk::GlobError),
#[error(transparent)]
Io(#[from] std::io::Error),
}

/// The information used to determine whether a built distribution is up-to-date, based on the
/// timestamps of relevant files, the current commit of a repository, etc.
#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
Expand All @@ -33,17 +39,17 @@ impl CacheInfo {
}

/// Compute the cache info for a given path, which may be a file or a directory.
pub fn from_path(path: &Path) -> io::Result<Self> {
pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
let metadata = fs_err::metadata(path)?;
if metadata.is_file() {
Self::from_file(path)
Ok(Self::from_file(path)?)
} else {
Self::from_directory(path)
}
}

/// Compute the cache info for a given directory.
pub fn from_directory(directory: &Path) -> io::Result<Self> {
pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
let mut commit = None;
let mut timestamp = None;

Expand Down Expand Up @@ -71,75 +77,34 @@ impl CacheInfo {
]
});

// Incorporate any additional timestamps or VCS information.
// Incorporate timestamps from any direct filepaths.
let mut globs = vec![];
for cache_key in &cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) {
// Treat the path as a glob.
let path = directory.join(file);
let Some(pattern) = path.to_str() else {
warn!("Failed to convert pattern to string: {}", path.display());
if file.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')) {
// Defer globs to a separate pass.
globs.push(file);
continue;
}

// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
};
let paths = match glob::glob_with(
pattern,
MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
},
) {
Ok(paths) => paths,
Err(err) => {
warn!("Failed to parse glob pattern: {err}");
continue;
}
};
for entry in paths {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if metadata.is_file() {
timestamp =
max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.display()
);
}
}
} else {
// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
}
};
if metadata.is_file() {
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
}
};
if !metadata.is_file() {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
continue;
}
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
}
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) {
Ok(commit_info) => commit = Some(commit_info),
Expand All @@ -151,12 +116,43 @@ impl CacheInfo {
}
}

// If we have any globs, process them in a single pass.
if !globs.is_empty() {
let walker = globwalk::GlobWalkerBuilder::from_patterns(directory, &globs)
.file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
.build()?;
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if !metadata.is_file() {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.path().display()
);
continue;
}
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
}
}

Ok(Self { timestamp, commit })
}

/// Compute the cache info for a given file, assumed to be a binary or source distribution
/// represented as (e.g.) a `.whl` or `.tar.gz` archive.
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, io::Error> {
pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
let metadata = fs_err::metadata(path.as_ref())?;
let timestamp = Timestamp::from_metadata(&metadata);
Ok(Self {
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub enum Error {
CacheEncode(#[from] rmp_serde::encode::Error),
#[error("Failed to walk the distribution cache")]
CacheWalk(#[source] walkdir::Error),
#[error(transparent)]
CacheInfo(#[from] uv_cache_info::CacheInfoError),

// Build error
#[error(transparent)]
Expand Down
3 changes: 1 addition & 2 deletions crates/uv-distribution/src/index/built_wheel_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ impl<'a> BuiltWheelIndex<'a> {
};

// If the distribution is stale, omit it from the index.
let cache_info =
CacheInfo::from_directory(&source_dist.install_path).map_err(Error::CacheRead)?;
let cache_info = CacheInfo::from_directory(&source_dist.install_path)?;

if cache_info != *pointer.cache_info() {
return Ok(None);
Expand Down
3 changes: 1 addition & 2 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1112,8 +1112,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}

// Determine the last-modified time of the source distribution.
let cache_info =
CacheInfo::from_directory(&resource.install_path).map_err(Error::CacheRead)?;
let cache_info = CacheInfo::from_directory(&resource.install_path)?;

// Read the existing metadata from the cache.
let entry = cache_shard.entry(LOCAL_REVISION);
Expand Down

0 comments on commit 3ad6a71

Please sign in to comment.