-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support globs as cache keys in tool.uv.cache-keys
#7268
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,12 @@ | ||
use crate::commit_info::CacheCommit; | ||
use crate::timestamp::Timestamp; | ||
|
||
use glob::MatchOptions; | ||
use serde::Deserialize; | ||
use std::cmp::max; | ||
use std::io; | ||
use std::path::{Path, PathBuf}; | ||
use tracing::debug; | ||
use tracing::{debug, warn}; | ||
|
||
/// The information used to determine whether a built distribution is up-to-date, based on the | ||
/// timestamps of relevant files, the current commit of a repository, etc. | ||
|
@@ -64,24 +65,81 @@ impl CacheInfo { | |
// If no cache keys were defined, use the defaults. | ||
let cache_keys = cache_keys.unwrap_or_else(|| { | ||
vec![ | ||
CacheKey::Path(directory.join("pyproject.toml")), | ||
CacheKey::Path(directory.join("setup.py")), | ||
CacheKey::Path(directory.join("setup.cfg")), | ||
CacheKey::Path("pyproject.toml".to_string()), | ||
CacheKey::Path("setup.py".to_string()), | ||
CacheKey::Path("setup.cfg".to_string()), | ||
] | ||
}); | ||
|
||
// Incorporate any additional timestamps or VCS information. | ||
for cache_key in &cache_keys { | ||
match cache_key { | ||
CacheKey::Path(file) | CacheKey::File { file } => { | ||
timestamp = max( | ||
timestamp, | ||
file.metadata() | ||
.ok() | ||
.filter(std::fs::Metadata::is_file) | ||
.as_ref() | ||
.map(Timestamp::from_metadata), | ||
); | ||
if file.chars().any(|c| matches!(c, '*' | '?' | '[')) { | ||
// Treat the path as a glob. | ||
let path = directory.join(file); | ||
let Some(pattern) = path.to_str() else { | ||
warn!("Failed to convert pattern to string: {}", path.display()); | ||
continue; | ||
}; | ||
let paths = match glob::glob_with( | ||
pattern, | ||
MatchOptions { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the motivation for these? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @BurntSushi would know though. Without There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that since |
||
case_sensitive: true, | ||
require_literal_separator: true, | ||
require_literal_leading_dot: false, | ||
}, | ||
) { | ||
Ok(paths) => paths, | ||
Err(err) => { | ||
warn!("Failed to parse glob pattern: {err}"); | ||
continue; | ||
} | ||
}; | ||
for entry in paths { | ||
let entry = match entry { | ||
Ok(entry) => entry, | ||
Err(err) => { | ||
warn!("Failed to read glob entry: {err}"); | ||
continue; | ||
} | ||
}; | ||
let metadata = match entry.metadata() { | ||
Ok(metadata) => metadata, | ||
Err(err) => { | ||
warn!("Failed to read metadata for glob entry: {err}"); | ||
continue; | ||
} | ||
}; | ||
if metadata.is_file() { | ||
timestamp = | ||
max(timestamp, Some(Timestamp::from_metadata(&metadata))); | ||
} else { | ||
warn!( | ||
"Expected file for cache key, but found directory: `{}`", | ||
entry.display() | ||
); | ||
} | ||
} | ||
} else { | ||
// Treat the path as a file. | ||
let path = directory.join(file); | ||
let metadata = match path.metadata() { | ||
Ok(metadata) => metadata, | ||
Err(err) => { | ||
warn!("Failed to read metadata for file: {err}"); | ||
continue; | ||
} | ||
}; | ||
if metadata.is_file() { | ||
timestamp = max(timestamp, Some(Timestamp::from_metadata(&metadata))); | ||
} else { | ||
warn!( | ||
"Expected file for cache key, but found directory: `{}`", | ||
path.display() | ||
); | ||
} | ||
} | ||
} | ||
CacheKey::Git { git: true } => match CacheCommit::from_repository(directory) { | ||
Ok(commit_info) => commit = Some(commit_info), | ||
|
@@ -165,10 +223,15 @@ struct ToolUv { | |
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] | ||
#[serde(untagged, rename_all = "kebab-case", deny_unknown_fields)] | ||
pub enum CacheKey { | ||
/// Ex) `"Cargo.lock"` | ||
Path(PathBuf), | ||
/// Ex) `{ file = "Cargo.lock" }` | ||
File { file: PathBuf }, | ||
/// Ex) `"Cargo.lock"` or `"**/*.toml"` | ||
Path(String), | ||
/// Ex) `{ file = "Cargo.lock" }` or `{ file = "**/*.toml" }` | ||
File { file: String }, | ||
/// Ex) `{ git = true }` | ||
Git { git: bool }, | ||
} | ||
|
||
pub enum FilePattern { | ||
Glob(String), | ||
Path(PathBuf), | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,13 +58,20 @@ pub struct Options { | |
/// to ensure that the project is rebuilt whenever the `requirements.txt` file is modified (in | ||
/// addition to watching the `pyproject.toml`). | ||
/// | ||
/// Globs are supported, following the syntax of the [`glob`](https://docs.rs/glob/0.3.1/glob/struct.Pattern.html) | ||
/// crate. For example, to invalidate the cache whenever a `.toml` file in the project directory | ||
/// or any of its subdirectories is modified, you can specify `cache-keys = [{ file = "**/*.toml" }]`. | ||
/// Note that the use of globs can be expensive, as uv may need to walk the filesystem to | ||
/// determine whether any files have changed. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
/// | ||
/// Cache keys can also include version control information. For example, if a project uses | ||
/// `setuptools_scm` to read its version from a Git tag, you can specify `cache-keys = [{ git = true }, { file = "pyproject.toml" }]` | ||
/// to include the current Git commit hash in the cache key (in addition to the | ||
/// `pyproject.toml`). | ||
/// | ||
/// Cache keys only affect the project defined by the `pyproject.toml` in which they're | ||
/// specified (as opposed to, e.g., affecting all members in a workspace). | ||
/// specified (as opposed to, e.g., affecting all members in a workspace), and all paths and | ||
/// globs are interpreted as relative to the project directory. | ||
#[option( | ||
default = r#"[{ file = "pyproject.toml" }, { file = "setup.py" }, { file = "setup.cfg" }]"#, | ||
value_type = "list[dict]", | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How come you ended up going this route instead of
globwalk
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seemed a bit simpler given that we tend to have one glob if any. What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aside from this needing to do a directory traversal for each glob (which would be mitigated I suppose if only one glob is used, but I'm not sure if we know that's the common case), my main gripe would be that
glob
doesn't support curly brackets. And switching to another glob implementation could have subtle breaking changes in the future.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are probably super rare cases, but e.g.,
foo{a,b}bar
is interpreted literally byglob
(I believe), where asglobset
will treat it asfooabar OR
foobbar`.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok I'm gonna switch it up. But probably in a separate PR.