Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use globwalk for cache-keys matching #7337

Merged
merged 1 commit into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ fs-err = { version = "2.11.0" }
fs2 = { version = "0.4.3" }
futures = { version = "0.3.30" }
glob = { version = "0.3.1" }
globwalk = { version = "0.9.1" }
goblin = { version = "0.8.2", default-features = false, features = ["std", "elf32", "elf64", "endian_fd"] }
hex = { version = "0.4.3" }
home = { version = "0.5.9" }
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-cache-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workspace = true

[dependencies]
fs-err = { workspace = true }
glob = { workspace = true }
globwalk = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, features = ["derive"] }
thiserror = { workspace = true }
Expand Down
134 changes: 65 additions & 69 deletions crates/uv-cache-info/src/cache_info.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
use crate::commit_info::CacheCommit;
use crate::timestamp::Timestamp;

use glob::MatchOptions;
use serde::Deserialize;
use std::cmp::max;
use std::io;
use std::path::{Path, PathBuf};
use tracing::{debug, warn};

#[derive(Debug, thiserror::Error)]
pub enum CacheInfoError {
#[error("Failed to parse glob patterns for `cache-keys`: {0}")]
Glob(#[from] globwalk::GlobError),
#[error(transparent)]
Io(#[from] std::io::Error),
}

/// The information used to determine whether a built distribution is up-to-date, based on the
/// timestamps of relevant files, the current commit of a repository, etc.
#[derive(Default, Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
Expand All @@ -33,17 +39,17 @@ impl CacheInfo {
}

/// Compute the cache info for a given path, which may be a file or a directory.
pub fn from_path(path: &Path) -> io::Result<Self> {
pub fn from_path(path: &Path) -> Result<Self, CacheInfoError> {
let metadata = fs_err::metadata(path)?;
if metadata.is_file() {
Self::from_file(path)
Ok(Self::from_file(path)?)
} else {
Self::from_directory(path)
}
}

/// Compute the cache info for a given directory.
pub fn from_directory(directory: &Path) -> io::Result<Self> {
pub fn from_directory(directory: &Path) -> Result<Self, CacheInfoError> {
let mut commit = None;
let mut timestamp = None;

Expand Down Expand Up @@ -71,75 +77,34 @@ impl CacheInfo {
]
});

// Incorporate any additional timestamps or VCS information.
// Incorporate timestamps from any direct filepaths.
let mut globs = vec![];
for cache_key in &cache_keys {
match cache_key {
CacheKey::Path(file) | CacheKey::File { file } => {
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) {
// Treat the path as a glob.
let path = directory.join(file);
let Some(pattern) = path.to_str() else {
warn!("Failed to convert pattern to string: {}", path.display());
if file.chars().any(|c| matches!(c, '*' | '?' | '[' | '{')) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice.

// Defer globs to a separate pass.
globs.push(file);
continue;
}

// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
};
let paths = match glob::glob_with(
pattern,
MatchOptions {
case_sensitive: true,
require_literal_separator: true,
require_literal_leading_dot: false,
},
) {
Ok(paths) => paths,
Err(err) => {
warn!("Failed to parse glob pattern: {err}");
continue;
}
};
for entry in paths {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if metadata.is_file() {
timestamp =
max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.display()
);
}
}
} else {
// Treat the path as a file.
let path = directory.join(file);
let metadata = match path.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for file: {err}");
continue;
}
};
if metadata.is_file() {
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
} else {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
}
};
if !metadata.is_file() {
warn!(
"Expected file for cache key, but found directory: `{}`",
path.display()
);
continue;
}
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
}
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) {
Ok(commit_info) => commit = Some(commit_info),
Expand All @@ -151,12 +116,43 @@ impl CacheInfo {
}
}

// If we have any globs, process them in a single pass.
if !globs.is_empty() {
let walker = globwalk::GlobWalkerBuilder::from_patterns(directory, &globs)
.file_type(globwalk::FileType::FILE | globwalk::FileType::SYMLINK)
.build()?;
for entry in walker {
let entry = match entry {
Ok(entry) => entry,
Err(err) => {
warn!("Failed to read glob entry: {err}");
continue;
}
};
let metadata = match entry.metadata() {
Ok(metadata) => metadata,
Err(err) => {
warn!("Failed to read metadata for glob entry: {err}");
continue;
}
};
if !metadata.is_file() {
warn!(
"Expected file for cache key, but found directory: `{}`",
entry.path().display()
);
continue;
}
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata)));
}
}

Ok(Self { timestamp, commit })
}

/// Compute the cache info for a given file, assumed to be a binary or source distribution
/// represented as (e.g.) a `.whl` or `.tar.gz` archive.
pub fn from_file(path: impl AsRef<Path>) -> Result<Self, io::Error> {
pub fn from_file(path: impl AsRef<Path>) -> std::io::Result<Self> {
let metadata = fs_err::metadata(path.as_ref())?;
let timestamp = Timestamp::from_metadata(&metadata);
Ok(Self {
Expand Down
2 changes: 2 additions & 0 deletions crates/uv-distribution/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ pub enum Error {
CacheEncode(#[from] rmp_serde::encode::Error),
#[error("Failed to walk the distribution cache")]
CacheWalk(#[source] walkdir::Error),
#[error(transparent)]
CacheInfo(#[from] uv_cache_info::CacheInfoError),

// Build error
#[error(transparent)]
Expand Down
3 changes: 1 addition & 2 deletions crates/uv-distribution/src/index/built_wheel_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,7 @@ impl<'a> BuiltWheelIndex<'a> {
};

// If the distribution is stale, omit it from the index.
let cache_info =
CacheInfo::from_directory(&source_dist.install_path).map_err(Error::CacheRead)?;
let cache_info = CacheInfo::from_directory(&source_dist.install_path)?;

if cache_info != *pointer.cache_info() {
return Ok(None);
Expand Down
3 changes: 1 addition & 2 deletions crates/uv-distribution/src/source/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1112,8 +1112,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> {
}

// Determine the last-modified time of the source distribution.
let cache_info =
CacheInfo::from_directory(&resource.install_path).map_err(Error::CacheRead)?;
let cache_info = CacheInfo::from_directory(&resource.install_path)?;

// Read the existing metadata from the cache.
let entry = cache_shard.entry(LOCAL_REVISION);
Expand Down
Loading