From 4c027bccd8db6c400808e8211c44420f4c5e35cf Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 19 Nov 2024 17:01:02 +0100 Subject: [PATCH 1/3] Fix license file globbing early stop --- crates/uv-build-backend/src/metadata.rs | 32 ++++++++++++++++--------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index 18f72dad9abc..d5d547a3f261 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -1,5 +1,5 @@ use crate::Error; -use globset::{Glob, GlobSetBuilder}; +use globset::Glob; use itertools::Itertools; use serde::Deserialize; use std::collections::{BTreeMap, Bound}; @@ -8,7 +8,7 @@ use std::path::{Path, PathBuf}; use std::str::FromStr; use tracing::{debug, trace}; use uv_fs::Simplified; -use uv_globfilter::parse_portable_glob; +use uv_globfilter::{parse_portable_glob, GlobDirFilter}; use uv_normalize::{ExtraName, PackageName}; use uv_pep440::{Version, VersionSpecifiers}; use uv_pep508::{Requirement, VersionOrUrl}; @@ -328,7 +328,7 @@ impl PyProjectToml { }; let mut license_files = Vec::new(); - let mut license_glob_builder = GlobSetBuilder::new(); + let mut license_globs_parsed = Vec::new(); for license_glob in license_globs { let pep639_glob = parse_portable_glob(license_glob).map_err(|err| Error::PortableGlob { @@ -341,28 +341,38 @@ impl PyProjectToml { .join(pep639_glob.to_string()) .to_string_lossy() .to_string(); - license_glob_builder.add(Glob::new(&absolute_glob).map_err(|err| { + license_globs_parsed.push(Glob::new(&absolute_glob).map_err(|err| { Error::GlobSet { field: "project.license-files".to_string(), err, } })?); } - let license_globs = license_glob_builder.build().map_err(|err| Error::GlobSet { - field: "project.license-files".to_string(), - err, - })?; + let license_globs = + GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| { + Error::GlobSetTooLarge { + field: "tool.uv.source-dist.include".to_string(), + source: err, + } + })?; - for entry in WalkDir::new(".") { + for entry in WalkDir::new(root).into_iter().filter_entry(|entry| { + license_globs.match_directory( + entry + .path() + .strip_prefix(root) + .expect("walkdir starts with root"), + ) + }) { let entry = entry.map_err(|err| Error::WalkDir { root: PathBuf::from("."), err, })?; let relative = entry .path() - .strip_prefix("./") + .strip_prefix(root) .expect("walkdir starts with root"); - if !license_globs.is_match(relative) { + if !license_globs.match_path(relative) { trace!("Not a license files match: `{}`", relative.user_display()); continue; } From 95c3bd6b895740542c86c791ec69e5ed18399654 Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 19 Nov 2024 22:52:43 +0100 Subject: [PATCH 2/3] Improve logging --- crates/uv-build-backend/src/lib.rs | 25 +++++++++++++++---------- crates/uv-globfilter/src/main.rs | 2 +- 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index ddaae8998b29..19a3880c0b64 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -484,7 +484,12 @@ fn wheel_subdir_from_globs( let license_files_globs: Vec<_> = globs .iter() .map(|license_files| { - trace!("Including license files at: `{license_files}`"); + trace!( + "Including {} at `{}` with `{}`", + globs_field, + src.user_display(), + license_files + ); parse_portable_glob(license_files) }) .collect::>() @@ -521,7 +526,7 @@ fn wheel_subdir_from_globs( .expect("walkdir starts with root"); if !license_files_matcher.match_path(relative) { - trace!("Excluding {}", relative.user_display()); + trace!("Excluding: `{}`", relative.user_display()); continue; }; @@ -735,7 +740,7 @@ pub fn build_source_dist( .expect("walkdir starts with root"); if !include_matcher.match_path(relative) || exclude_matcher.is_match(relative) { - trace!("Excluding {}", relative.user_display()); + trace!("Excluding: `{}`", relative.user_display()); continue; }; @@ -1159,13 +1164,6 @@ mod tests { ) .unwrap(); - // Check that we write deterministic wheels. - let wheel_filename = "built_by_uv-0.1.0-py3-none-any.whl"; - assert_eq!( - fs_err::read(direct_output_dir.path().join(wheel_filename)).unwrap(), - fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap() - ); - // Check the contained files and directories assert_snapshot!(source_dist_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r" built_by_uv-0.1.0/LICENSE-APACHE @@ -1220,5 +1218,12 @@ mod tests { built_by_uv/arithmetic/circle.py built_by_uv/arithmetic/pi.txt "); + + // Check that we write deterministic wheels. + let wheel_filename = "built_by_uv-0.1.0-py3-none-any.whl"; + assert_eq!( + fs_err::read(direct_output_dir.path().join(wheel_filename)).unwrap(), + fs_err::read(indirect_output_dir.path().join(wheel_filename)).unwrap() + ); } } diff --git a/crates/uv-globfilter/src/main.rs b/crates/uv-globfilter/src/main.rs index 9cdba3dadb5f..30a12b665152 100644 --- a/crates/uv-globfilter/src/main.rs +++ b/crates/uv-globfilter/src/main.rs @@ -54,7 +54,7 @@ fn main() { .to_path_buf(); if !include_matcher.match_path(&relative) || exclude_matcher.is_match(&relative) { - trace!("Excluding: {}", relative.display()); + trace!("Excluding: `{}`", relative.display()); continue; }; println!("{}", relative.display()); From 98199960167007c09296752459331198eebc7a25 Mon Sep 17 00:00:00 2001 From: konstin Date: Tue, 19 Nov 2024 23:20:26 +0100 Subject: [PATCH 3/3] Add wheel excludes --- crates/uv-build-backend/src/lib.rs | 89 +++++++++++++++++-------- crates/uv-build-backend/src/metadata.rs | 11 +++ 2 files changed, 71 insertions(+), 29 deletions(-) diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 19a3880c0b64..12a3e1b09592 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -4,7 +4,7 @@ use crate::metadata::{PyProjectToml, ValidationError}; use flate2::write::GzEncoder; use flate2::Compression; use fs_err::File; -use globset::{Glob, GlobSetBuilder}; +use globset::{Glob, GlobSet, GlobSetBuilder}; use itertools::Itertools; use sha2::{Digest, Sha256}; use std::fs::FileType; @@ -318,6 +318,20 @@ pub fn build_wheel( debug!("Writing wheel at {}", wheel_path.user_display()); let mut wheel_writer = ZipDirectoryWriter::new_wheel(File::create(&wheel_path)?); + // Wheel excludes + // TODO(konstin): The must be stronger than the source dist excludes, otherwise we can get more + // files in source tree -> wheel than for source tree -> source dist -> wheel. + let default_excludes: &[String] = &[ + "__pycache__".to_string(), + "*.pyc".to_string(), + "*.pyo".to_string(), + ]; + let excludes = pyproject_toml + .wheel_settings() + .and_then(|settings| settings.exclude.as_deref()) + .unwrap_or(default_excludes); + let exclude_matcher = build_exclude_matcher(excludes)?; + debug!("Adding content files to {}", wheel_path.user_display()); let module_root = pyproject_toml .wheel_settings() @@ -331,7 +345,10 @@ pub fn build_wheel( if !module_root.join("__init__.py").is_file() { return Err(Error::MissingModule(module_root)); } - for entry in WalkDir::new(module_root) { + for entry in WalkDir::new(module_root) + .into_iter() + .filter_entry(|entry| !exclude_matcher.is_match(entry.path())) + { let entry = entry.map_err(|err| Error::WalkDir { root: source_tree.to_path_buf(), err, @@ -340,9 +357,11 @@ pub fn build_wheel( let relative_path = entry .path() .strip_prefix(&strip_root) - .expect("walkdir starts with root") - .user_display() - .to_string(); + .expect("walkdir starts with root"); + if exclude_matcher.is_match(relative_path) { + trace!("Excluding from module: `{}`", relative_path.user_display()); + } + let relative_path = relative_path.user_display().to_string(); debug!("Adding to wheel: `{relative_path}`"); @@ -497,7 +516,7 @@ fn wheel_subdir_from_globs( field: globs_field.to_string(), source: err, })?; - let license_files_matcher = + let matcher = GlobDirFilter::from_globs(&license_files_globs).map_err(|err| Error::GlobSetTooLarge { field: globs_field.to_string(), source: err, @@ -513,7 +532,7 @@ fn wheel_subdir_from_globs( .expect("walkdir starts with root"); // Fast path: Don't descend into a directory that can't be included. - license_files_matcher.match_directory(relative) + matcher.match_directory(relative) }) { let entry = entry.map_err(|err| Error::WalkDir { root: src.to_path_buf(), @@ -525,8 +544,8 @@ fn wheel_subdir_from_globs( .strip_prefix(src) .expect("walkdir starts with root"); - if !license_files_matcher.match_path(relative) { - trace!("Excluding: `{}`", relative.user_display()); + if !matcher.match_path(relative) { + trace!("Excluding {}: `{}`", globs_field, relative.user_display()); continue; }; @@ -691,26 +710,7 @@ pub fn build_source_dist( source: err, })?; - let mut exclude_builder = GlobSetBuilder::new(); - for exclude in settings.exclude { - // Excludes are unanchored - let exclude = if let Some(exclude) = exclude.strip_prefix("/") { - exclude.to_string() - } else { - format!("**/{exclude}").to_string() - }; - let glob = parse_portable_glob(&exclude).map_err(|err| Error::PortableGlob { - field: "tool.uv.source-dist.exclude".to_string(), - source: err, - })?; - exclude_builder.add(glob); - } - let exclude_matcher = exclude_builder - .build() - .map_err(|err| Error::GlobSetTooLarge { - field: "tool.uv.source-dist.exclude".to_string(), - source: err, - })?; + let exclude_matcher = build_exclude_matcher(&settings.exclude)?; // TODO(konsti): Add files linked by pyproject.toml @@ -753,6 +753,31 @@ pub fn build_source_dist( Ok(filename) } +/// Build a globset matcher for excludes. +fn build_exclude_matcher(excludes: &[String]) -> Result { + let mut exclude_builder = GlobSetBuilder::new(); + for exclude in excludes { + // Excludes are unanchored + let exclude = if let Some(exclude) = exclude.strip_prefix("/") { + exclude.to_string() + } else { + format!("**/{exclude}").to_string() + }; + let glob = parse_portable_glob(&exclude).map_err(|err| Error::PortableGlob { + field: "tool.uv.source-dist.exclude".to_string(), + source: err, + })?; + exclude_builder.add(glob); + } + let exclude_matcher = exclude_builder + .build() + .map_err(|err| Error::GlobSetTooLarge { + field: "tool.uv.source-dist.exclude".to_string(), + source: err, + })?; + Ok(exclude_matcher) +} + /// Add a file or a directory to a source distribution. fn add_source_dist_entry( tar: &mut tar::Builder>, @@ -1112,6 +1137,12 @@ mod tests { fs_err::copy(built_by_uv.join(dir), src.path().join(dir)).unwrap(); } + // Add some files to be excluded + let module_root = src.path().join("src").join("built_by_uv"); + fs_err::create_dir_all(module_root.join("__pycache__")).unwrap(); + File::create(module_root.join("__pycache__").join("compiled.pyc")).unwrap(); + File::create(module_root.join("arithmetic").join("circle.pyc")).unwrap(); + // Build a wheel from the source tree let direct_output_dir = TempDir::new().unwrap(); build_wheel(src.path(), direct_output_dir.path(), None, "1.0.0+test").unwrap(); diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index d5d547a3f261..7886639e78a8 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -721,6 +721,17 @@ pub(crate) struct WheelSettings { /// The directory that contains the module directory, usually `src`, or an empty path when /// using the flat layout over the src layout. pub(crate) module_root: Option, + + /// Glob expressions which files and directories to exclude from the previous source + /// distribution includes. + /// + /// Excludes are not anchored, which means that `__pycache__` excludes all directories named + /// `__pycache__` and it's children anywhere. To anchor a directory, use a `/` prefix, e.g., + /// `/dist` will exclude only `/dist`. + /// + /// The glob syntax is the reduced portable glob from + /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). + pub(crate) exclude: Option>, /// Data includes for wheels. pub(crate) data: Option, }