From c77745982465d09587a9f5e6153753a19033bed0 Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 28 Nov 2024 14:43:10 +0100 Subject: [PATCH] Build backend: Revamp include/exclude When building the source distribution, we always need to include `pyproject.toml` and the module, when building the wheel, we always include the module but nothing else at top level. Since we only allow a single module per wheel, that means that there are no specific wheel includes. This means we have source includes, source excludes, wheel excludes, but no wheel includes: This is defined by the module root, plus the metadata files and data directories separately. Extra source dist includes are currently unused (they can't end up in the wheel currently), but it makes sense to model them here, they will be needed for any sort of procedural build step. This results in the following fields being relevant for inclusions and exclusion: * project.readme: PEP 621 * project.license-files: PEP 639 * module_root: `Path` * source_include: `Vec` * source_exclude: `Vec` * wheel_exclude: `Vec` * data: `Map` An opinionated choice is that that wheel excludes always contain the source excludes: Otherwise you could have a path A in the source tree that gets included when building the wheel directly from the source tree, but not when going through the source dist as intermediary, because A is in source excludes, but not in the wheel excludes. This has been a source of errors previously. In the process, i fixed a bug where we would skip directories and only include the files and were missing license due to absolute globs. --- crates/uv-build-backend/src/lib.rs | 308 +++++++++--------- crates/uv-build-backend/src/metadata.rs | 131 ++++++-- crates/uv-globfilter/src/glob_dir_filter.rs | 11 +- crates/uv-settings/src/settings.rs | 7 +- crates/uv/src/commands/build_backend.rs | 2 - crates/uv/tests/it/pip_install.rs | 2 +- crates/uv/tests/it/show_settings.rs | 2 +- .../built-by-uv/data-dir/build-script.py | 1 + scripts/packages/built-by-uv/pyproject.toml | 10 +- .../built-by-uv/src/built_by_uv/build-only.h | 4 + .../src/built_by_uv/not-packaged.txt | 1 + 11 files changed, 272 insertions(+), 207 deletions(-) create mode 100644 scripts/packages/built-by-uv/data-dir/build-script.py create mode 100644 scripts/packages/built-by-uv/src/built_by_uv/build-only.h create mode 100644 scripts/packages/built-by-uv/src/built_by_uv/not-packaged.txt diff --git a/crates/uv-build-backend/src/lib.rs b/crates/uv-build-backend/src/lib.rs index 21c864b8db948..616669bf2229a 100644 --- a/crates/uv-build-backend/src/lib.rs +++ b/crates/uv-build-backend/src/lib.rs @@ -1,6 +1,6 @@ mod metadata; -use crate::metadata::{PyProjectToml, ValidationError}; +use crate::metadata::{BuildBackendSettings, PyProjectToml, ValidationError}; use flate2::write::GzEncoder; use flate2::Compression; use fs_err::File; @@ -49,6 +49,8 @@ pub enum Error { #[source] err: globset::Error, }, + #[error("`pyproject.toml` must not be excluded from source distribution build")] + PyprojectTomlExcluded, #[error("Failed to walk source tree: `{}`", root.user_display())] WalkDir { root: PathBuf, @@ -303,6 +305,10 @@ pub fn build_wheel( let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?; let pyproject_toml = PyProjectToml::parse(&contents)?; pyproject_toml.check_build_system("1.0.0+test"); + let settings = pyproject_toml + .settings() + .cloned() + .unwrap_or_else(BuildBackendSettings::default); check_metadata_directory(source_tree, metadata_directory, &pyproject_toml)?; @@ -320,28 +326,23 @@ pub fn build_wheel( let mut wheel_writer = ZipDirectoryWriter::new_wheel(File::create(&wheel_path)?); // Wheel excludes - // TODO(konstin): The must be stronger than the source dist excludes, otherwise we can get more - // files in source tree -> wheel than for source tree -> source dist -> wheel. - let default_excludes: &[String] = &[ - "__pycache__".to_string(), - "*.pyc".to_string(), - "*.pyo".to_string(), - ]; - let excludes = pyproject_toml - .wheel_settings() - .and_then(|settings| settings.exclude.as_deref()) - .unwrap_or(default_excludes); + let mut excludes: Vec = settings.wheel_exclude; + // The wheel must not include any files excluded by the source distribution (at least until we + // have files generated in the source dist -> wheel build step). + for exclude in settings.source_exclude { + // Avoid duplicate entries. + if !excludes.contains(&exclude) { + excludes.push(exclude); + } + } + debug!("Wheel excludes: {:?}", excludes); let exclude_matcher = build_exclude_matcher(excludes)?; debug!("Adding content files to {}", wheel_path.user_display()); - let module_root = pyproject_toml - .wheel_settings() - .and_then(|wheel_settings| wheel_settings.module_root.as_deref()) - .unwrap_or_else(|| Path::new("src")); - if module_root.is_absolute() { - return Err(Error::AbsoluteModuleRoot(module_root.to_path_buf())); + if settings.module_root.is_absolute() { + return Err(Error::AbsoluteModuleRoot(settings.module_root.clone())); } - let strip_root = source_tree.join(module_root); + let strip_root = source_tree.join(settings.module_root); let module_root = strip_root.join(pyproject_toml.name().as_dist_info_name().as_ref()); if !module_root.join("__init__.py").is_file() { return Err(Error::MissingModule(module_root)); @@ -364,21 +365,28 @@ pub fn build_wheel( ); } - let relative_path = entry + // We only want to take the module root, but since excludes start at the source tree root, + // we strip higher than we iterate. + let match_path = entry + .path() + .strip_prefix(source_tree) + .expect("walkdir starts with root"); + let wheel_path = entry .path() .strip_prefix(&strip_root) .expect("walkdir starts with root"); - if exclude_matcher.is_match(relative_path) { - trace!("Excluding from module: `{}`", relative_path.user_display()); + if exclude_matcher.is_match(match_path) { + trace!("Excluding from module: `{}`", match_path.user_display()); + continue; } - let relative_path = relative_path.user_display().to_string(); + let wheel_path = wheel_path.user_display().to_string(); - debug!("Adding to wheel: `{relative_path}`"); + debug!("Adding to wheel: `{wheel_path}`"); if entry.file_type().is_dir() { - wheel_writer.write_directory(&relative_path)?; + wheel_writer.write_directory(&wheel_path)?; } else if entry.file_type().is_file() { - wheel_writer.write_file(&relative_path, entry.path())?; + wheel_writer.write_file(&wheel_path, entry.path())?; } else { // TODO(konsti): We may want to support symlinks, there is support for installing them. return Err(Error::UnsupportedFileType( @@ -408,12 +416,7 @@ pub fn build_wheel( } // Add the data files - for (name, directory) in pyproject_toml - .wheel_settings() - .and_then(|wheel_settings| wheel_settings.data.clone()) - .unwrap_or_default() - .iter() - { + for (name, directory) in settings.data.iter() { debug!("Adding {name} data files from: `{directory}`"); let data_dir = format!( "{}-{}.data/{}/", @@ -427,7 +430,7 @@ pub fn build_wheel( &data_dir, &["**".to_string()], &mut wheel_writer, - &format!("tool.uv.wheel.data.{name}"), + &format!("tool.uv.build-backend.data.{name}"), )?; } @@ -454,6 +457,10 @@ pub fn build_editable( let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?; let pyproject_toml = PyProjectToml::parse(&contents)?; pyproject_toml.check_build_system("1.0.0+test"); + let settings = pyproject_toml + .settings() + .cloned() + .unwrap_or_else(BuildBackendSettings::default); check_metadata_directory(source_tree, metadata_directory, &pyproject_toml)?; @@ -471,14 +478,10 @@ pub fn build_editable( let mut wheel_writer = ZipDirectoryWriter::new_wheel(File::create(&wheel_path)?); debug!("Adding pth file to {}", wheel_path.user_display()); - let module_root = pyproject_toml - .wheel_settings() - .and_then(|wheel_settings| wheel_settings.module_root.as_deref()) - .unwrap_or_else(|| Path::new("src")); - if module_root.is_absolute() { - return Err(Error::AbsoluteModuleRoot(module_root.to_path_buf())); + if settings.module_root.is_absolute() { + return Err(Error::AbsoluteModuleRoot(settings.module_root.clone())); } - let src_root = source_tree.join(module_root); + let src_root = source_tree.join(settings.module_root); let module_root = src_root.join(pyproject_toml.name().as_dist_info_name().as_ref()); if !module_root.join("__init__.py").is_file() { return Err(Error::MissingModule(module_root)); @@ -581,57 +584,19 @@ fn wheel_subdir_from_globs( Ok(()) } -/// TODO(konsti): Wire this up with actual settings and remove this struct. -/// -/// To select which files to include in the source distribution, we first add the includes, then -/// remove the excludes from that. -pub struct SourceDistSettings { - /// Glob expressions which files and directories to include in the source distribution. - /// - /// Includes are anchored, which means that `pyproject.toml` includes only - /// `/pyproject.toml`. Use for example `assets/**/sample.csv` to include for all - /// `sample.csv` files in `/assets` or any child directory. To recursively include - /// all files under a directory, use a `/**` suffix, e.g. `src/**`. For performance and - /// reproducibility, avoid unanchored matches such as `**/sample.csv`. - /// - /// The glob syntax is the reduced portable glob from - /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). - include: Vec, - /// Glob expressions which files and directories to exclude from the previous source - /// distribution includes. - /// - /// Excludes are not anchored, which means that `__pycache__` excludes all directories named - /// `__pycache__` and it's children anywhere. To anchor a directory, use a `/` prefix, e.g., - /// `/dist` will exclude only `/dist`. - /// - /// The glob syntax is the reduced portable glob from - /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). - exclude: Vec, -} - -impl Default for SourceDistSettings { - fn default() -> Self { - Self { - include: vec!["src/**".to_string(), "pyproject.toml".to_string()], - exclude: vec![ - "__pycache__".to_string(), - "*.pyc".to_string(), - "*.pyo".to_string(), - ], - } - } -} - /// Build a source distribution from the source tree and place it in the output directory. pub fn build_source_dist( source_tree: &Path, source_dist_directory: &Path, - settings: SourceDistSettings, uv_version: &str, ) -> Result { let contents = fs_err::read_to_string(source_tree.join("pyproject.toml"))?; let pyproject_toml = PyProjectToml::parse(&contents)?; pyproject_toml.check_build_system(uv_version); + let settings = pyproject_toml + .settings() + .cloned() + .unwrap_or_else(BuildBackendSettings::default); let filename = SourceDistFilename { name: pyproject_toml.name().clone(), @@ -664,11 +629,22 @@ pub fn build_source_dist( ) .map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?; - // The user (or default) includes + // File and directories to include in the source directory let mut include_globs = Vec::new(); - for include in settings.include { + let mut includes: Vec = settings.source_include; + // pyproject.toml is always included. + includes.push(globset::escape("pyproject.toml")); + // The wheel must not include any files included by the source distribution (at least until we + // have files generated in the source dist -> wheel build step). + let import_path = &settings + .module_root + .join(pyproject_toml.name().as_dist_info_name().as_ref()) + .simplified_display() + .to_string(); + includes.push(format!("{}/**", globset::escape(import_path))); + for include in includes { let glob = parse_portable_glob(&include).map_err(|err| Error::PortableGlob { - field: "tool.uv.source-dist.include".to_string(), + field: "tool.uv.build-backend.source-include".to_string(), source: err, })?; include_globs.push(glob.clone()); @@ -698,16 +674,11 @@ pub fn build_source_dist( } // Include the data files - for (name, directory) in pyproject_toml - .wheel_settings() - .and_then(|wheel_settings| wheel_settings.data.clone()) - .unwrap_or_default() - .iter() - { + for (name, directory) in settings.data.iter() { let glob = parse_portable_glob(&format!("{}/**", globset::escape(directory))).map_err(|err| { Error::PortableGlob { - field: format!("tool.uv.wheel.data.{name}"), + field: format!("tool.uv.build-backend.data.{name}"), source: err, } })?; @@ -717,11 +688,17 @@ pub fn build_source_dist( let include_matcher = GlobDirFilter::from_globs(&include_globs).map_err(|err| Error::GlobSetTooLarge { - field: "tool.uv.source-dist.include".to_string(), + field: "tool.uv.build-backend.source-include".to_string(), source: err, })?; - let exclude_matcher = build_exclude_matcher(&settings.exclude)?; + let mut excludes: Vec = Vec::new(); + excludes.extend(settings.source_exclude); + debug!("Source dist excludes: {:?}", excludes); + let exclude_matcher = build_exclude_matcher(excludes)?; + if exclude_matcher.is_match("pyproject.toml") { + return Err(Error::PyprojectTomlExcluded); + } let mut files_visited = 0; for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| { @@ -773,9 +750,12 @@ pub fn build_source_dist( } /// Build a globset matcher for excludes. -fn build_exclude_matcher(excludes: &[String]) -> Result { +fn build_exclude_matcher( + excludes: impl IntoIterator>, +) -> Result { let mut exclude_builder = GlobSetBuilder::new(); for exclude in excludes { + let exclude = exclude.as_ref(); // Excludes are unanchored let exclude = if let Some(exclude) = exclude.strip_prefix("/") { exclude.to_string() @@ -783,7 +763,7 @@ fn build_exclude_matcher(excludes: &[String]) -> Result { format!("**/{exclude}").to_string() }; let glob = parse_portable_glob(&exclude).map_err(|err| Error::PortableGlob { - field: "tool.uv.source-dist.exclude".to_string(), + field: "tool.uv.build-backend.*-exclude".to_string(), source: err, })?; exclude_builder.add(glob); @@ -791,7 +771,7 @@ fn build_exclude_matcher(excludes: &[String]) -> Result { let exclude_matcher = exclude_builder .build() .map_err(|err| Error::GlobSetTooLarge { - field: "tool.uv.source-dist.exclude".to_string(), + field: "tool.uv.build-backend.*-exclude".to_string(), source: err, })?; Ok(exclude_matcher) @@ -1096,27 +1076,30 @@ mod tests { .path() .join("built_by_uv-0.1.0.dist-info/METADATA"); assert_snapshot!(fs_err::read_to_string(metadata_file).unwrap(), @r###" - Metadata-Version: 2.4 - Name: built-by-uv - Version: 0.1.0 - Summary: A package to be built with the uv build backend that uses all features exposed by the build backend - Requires-Dist: anyio>=4,<5 - Requires-Python: >=3.12 - Description-Content-Type: text/markdown - - # built_by_uv - - A package to be built with the uv build backend that uses all features exposed by the build backend. - "###); + Metadata-Version: 2.4 + Name: built-by-uv + Version: 0.1.0 + Summary: A package to be built with the uv build backend that uses all features exposed by the build backend + License-File: LICENSE-APACHE + License-File: LICENSE-MIT + License-File: third-party-licenses/PEP-401.txt + Requires-Dist: anyio>=4,<5 + Requires-Python: >=3.12 + Description-Content-Type: text/markdown + + # built_by_uv + + A package to be built with the uv build backend that uses all features exposed by the build backend. + "###); let record_file = metadata_dir .path() .join("built_by_uv-0.1.0.dist-info/RECORD"); assert_snapshot!(fs_err::read_to_string(record_file).unwrap(), @r###" - built_by_uv-0.1.0.dist-info/WHEEL,sha256=3da1bfa0e8fd1b6cc246aa0b2b44a35815596c600cb485c39a6f8c106c3d5a8d,83 - built_by_uv-0.1.0.dist-info/METADATA,sha256=acb91f5a18cb53fa57b45eb4590ea13195a774c856a9dd8cf27cc5435d6451b6,372 - built_by_uv-0.1.0.dist-info/RECORD,, - "###); + built_by_uv-0.1.0.dist-info/WHEEL,sha256=3da1bfa0e8fd1b6cc246aa0b2b44a35815596c600cb485c39a6f8c106c3d5a8d,83 + built_by_uv-0.1.0.dist-info/METADATA,sha256=9ba12456f2ab1a6ab1e376ff551e392c70f7ec86713d80b4348e90c7dfd45cb1,474 + built_by_uv-0.1.0.dist-info/RECORD,, + "###); let wheel_file = metadata_dir .path() @@ -1180,13 +1163,7 @@ mod tests { // Build a source dist from the source tree let source_dist_dir = TempDir::new().unwrap(); - build_source_dist( - src.path(), - source_dist_dir.path(), - SourceDistSettings::default(), - "1.0.0+test", - ) - .unwrap(); + build_source_dist(src.path(), source_dist_dir.path(), "1.0.0+test").unwrap(); // Build a wheel from the source dist let sdist_tree = TempDir::new().unwrap(); @@ -1213,26 +1190,6 @@ mod tests { "1.0.0+test", ) .unwrap(); - - // Check the contained files and directories - assert_snapshot!(source_dist_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r" - built_by_uv-0.1.0/LICENSE-APACHE - built_by_uv-0.1.0/LICENSE-MIT - built_by_uv-0.1.0/PKG-INFO - built_by_uv-0.1.0/README.md - built_by_uv-0.1.0/assets/data.csv - built_by_uv-0.1.0/header/built_by_uv.h - built_by_uv-0.1.0/pyproject.toml - built_by_uv-0.1.0/scripts/whoami.sh - built_by_uv-0.1.0/src/built_by_uv - built_by_uv-0.1.0/src/built_by_uv/__init__.py - built_by_uv-0.1.0/src/built_by_uv/arithmetic - built_by_uv-0.1.0/src/built_by_uv/arithmetic/__init__.py - built_by_uv-0.1.0/src/built_by_uv/arithmetic/circle.py - built_by_uv-0.1.0/src/built_by_uv/arithmetic/pi.txt - built_by_uv-0.1.0/third-party-licenses/PEP-401.txt - "); - let wheel = zip::ZipArchive::new( File::open( indirect_output_dir @@ -1246,28 +1203,55 @@ mod tests { indirect_wheel_contents.sort_unstable(); assert_eq!(indirect_wheel_contents, direct_wheel_contents); - assert_snapshot!(indirect_wheel_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r" - built_by_uv-0.1.0.data/data/ - built_by_uv-0.1.0.data/data/data.csv - built_by_uv-0.1.0.data/headers/ - built_by_uv-0.1.0.data/headers/built_by_uv.h - built_by_uv-0.1.0.data/scripts/ - built_by_uv-0.1.0.data/scripts/whoami.sh - built_by_uv-0.1.0.dist-info/ - built_by_uv-0.1.0.dist-info/METADATA - built_by_uv-0.1.0.dist-info/RECORD - built_by_uv-0.1.0.dist-info/WHEEL - built_by_uv-0.1.0.dist-info/licenses/ - built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE - built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT - built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt - built_by_uv/ - built_by_uv/__init__.py - built_by_uv/arithmetic/ - built_by_uv/arithmetic/__init__.py - built_by_uv/arithmetic/circle.py - built_by_uv/arithmetic/pi.txt - "); + // Check the contained files and directories + assert_snapshot!(source_dist_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r###" + built_by_uv-0.1.0/ + built_by_uv-0.1.0/LICENSE-APACHE + built_by_uv-0.1.0/LICENSE-MIT + built_by_uv-0.1.0/PKG-INFO + built_by_uv-0.1.0/README.md + built_by_uv-0.1.0/assets + built_by_uv-0.1.0/assets/data.csv + built_by_uv-0.1.0/header + built_by_uv-0.1.0/header/built_by_uv.h + built_by_uv-0.1.0/pyproject.toml + built_by_uv-0.1.0/scripts + built_by_uv-0.1.0/scripts/whoami.sh + built_by_uv-0.1.0/src + built_by_uv-0.1.0/src/built_by_uv + built_by_uv-0.1.0/src/built_by_uv/__init__.py + built_by_uv-0.1.0/src/built_by_uv/arithmetic + built_by_uv-0.1.0/src/built_by_uv/arithmetic/__init__.py + built_by_uv-0.1.0/src/built_by_uv/arithmetic/circle.py + built_by_uv-0.1.0/src/built_by_uv/arithmetic/pi.txt + built_by_uv-0.1.0/src/built_by_uv/build-only.h + built_by_uv-0.1.0/third-party-licenses + built_by_uv-0.1.0/third-party-licenses/PEP-401.txt + "###); + + assert_snapshot!(indirect_wheel_contents.iter().map(|path| path.replace('\\', "/")).join("\n"), @r###" + built_by_uv-0.1.0.data/data/ + built_by_uv-0.1.0.data/data/data.csv + built_by_uv-0.1.0.data/headers/ + built_by_uv-0.1.0.data/headers/built_by_uv.h + built_by_uv-0.1.0.data/scripts/ + built_by_uv-0.1.0.data/scripts/whoami.sh + built_by_uv-0.1.0.dist-info/ + built_by_uv-0.1.0.dist-info/METADATA + built_by_uv-0.1.0.dist-info/RECORD + built_by_uv-0.1.0.dist-info/WHEEL + built_by_uv-0.1.0.dist-info/licenses/ + built_by_uv-0.1.0.dist-info/licenses/LICENSE-APACHE + built_by_uv-0.1.0.dist-info/licenses/LICENSE-MIT + built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/ + built_by_uv-0.1.0.dist-info/licenses/third-party-licenses/PEP-401.txt + built_by_uv/ + built_by_uv/__init__.py + built_by_uv/arithmetic/ + built_by_uv/arithmetic/__init__.py + built_by_uv/arithmetic/circle.py + built_by_uv/arithmetic/pi.txt + "###); // Check that we write deterministic wheels. let wheel_filename = "built_by_uv-0.1.0-py3-none-any.whl"; diff --git a/crates/uv-build-backend/src/metadata.rs b/crates/uv-build-backend/src/metadata.rs index 7886639e78a8f..a4476e01138d9 100644 --- a/crates/uv-build-backend/src/metadata.rs +++ b/crates/uv-build-backend/src/metadata.rs @@ -1,5 +1,4 @@ use crate::Error; -use globset::Glob; use itertools::Itertools; use serde::Deserialize; use std::collections::{BTreeMap, Bound}; @@ -17,6 +16,9 @@ use uv_warnings::warn_user_once; use version_ranges::Ranges; use walkdir::WalkDir; +/// By default, we ignore generated python files. +const DEFAULT_EXCLUDES: &[&str] = &["__pycache__", "*.pyc", "*.pyo"]; + #[derive(Debug, Error)] pub enum ValidationError { /// The spec isn't clear about what the values in that field would be, and we only support the @@ -86,8 +88,8 @@ impl PyProjectToml { self.project.license_files.as_deref() } - pub(crate) fn wheel_settings(&self) -> Option<&WheelSettings> { - self.tool.as_ref()?.uv.as_ref()?.wheel.as_ref() + pub(crate) fn settings(&self) -> Option<&BuildBackendSettings> { + self.tool.as_ref()?.uv.as_ref()?.build_backend.as_ref() } /// Warn if the `[build-system]` table looks suspicious. @@ -335,23 +337,12 @@ impl PyProjectToml { field: license_glob.to_string(), source: err, })?; - let absolute_glob = PathBuf::from(globset::escape( - root.simplified().to_string_lossy().as_ref(), - )) - .join(pep639_glob.to_string()) - .to_string_lossy() - .to_string(); - license_globs_parsed.push(Glob::new(&absolute_glob).map_err(|err| { - Error::GlobSet { - field: "project.license-files".to_string(), - err, - } - })?); + license_globs_parsed.push(pep639_glob); } let license_globs = GlobDirFilter::from_globs(&license_globs_parsed).map_err(|err| { Error::GlobSetTooLarge { - field: "tool.uv.source-dist.include".to_string(), + field: "tool.uv.build-backend.source-include".to_string(), source: err, } })?; @@ -365,7 +356,7 @@ impl PyProjectToml { ) }) { let entry = entry.map_err(|err| Error::WalkDir { - root: PathBuf::from("."), + root: root.to_path_buf(), err, })?; let relative = entry @@ -376,13 +367,18 @@ impl PyProjectToml { trace!("Not a license files match: `{}`", relative.user_display()); continue; } + if !entry.file_type().is_file() { + trace!( + "Not a file in license files match: `{}`", + relative.user_display() + ); + continue; + } debug!("License files match: `{}`", relative.user_display()); let license_file = relative.to_string_lossy().to_string(); - if !license_files.contains(&license_file) { - license_files.push(license_file); - } + license_files.push(license_file); } // The glob order may be unstable @@ -707,23 +703,74 @@ pub(crate) struct Tool { #[derive(Deserialize, Debug, Clone)] #[serde(rename_all = "kebab-case")] pub(crate) struct ToolUv { - /// Configuration for building source dists with the uv build backend - #[allow(dead_code)] - source_dist: Option, - /// Configuration for building wheels with the uv build backend - wheel: Option, + /// Configuration for building source distributions and wheels with the uv build backend + build_backend: Option, } -/// The `tool.uv.wheel` section with wheel build configuration. +/// To select which files to include in the source distribution, we first add the includes, then +/// remove the excludes from that. +/// +/// When building the source distribution, the following files and directories are included: +/// * `pyproject.toml` +/// * The module under `tool.uv.build-backend.module-root`, by default +/// `src//**`. +/// * `project.license-files` and `project.readme`. +/// * All directories under `tool.uv.build-backend.data`. +/// * All patterns from `tool.uv.build-backend.source-include`. +/// +/// From these, we remove the `tool.uv.build-backend.source-exclude` matches. +/// +/// When building the wheel, the following files and directories are included: +/// * The module under `tool.uv.build-backend.module-root`, by default +/// `src//**`. +/// * `project.license-files` and `project.readme`, as part of the project metadata. +/// * Each directory under `tool.uv.build-backend.data`, as data directories. +/// +/// From these, we remove the `tool.uv.build-backend.source-exclude` and +/// `tool.uv.build-backend.wheel-exclude` matches. The source dist excludes are applied to avoid +/// source tree -> wheel source including more files than +/// source tree -> source distribution -> wheel. +/// +/// There are no specific wheel includes. There must only be one top level module, and all data +/// files must either be under the module root or in a data directory. Most packages store small +/// data in the module root alongside the source code. #[derive(Deserialize, Debug, Clone)] -#[serde(rename_all = "kebab-case")] -pub(crate) struct WheelSettings { +#[serde(default, rename_all = "kebab-case")] +pub(crate) struct BuildBackendSettings { /// The directory that contains the module directory, usually `src`, or an empty path when /// using the flat layout over the src layout. - pub(crate) module_root: Option, + pub(crate) module_root: PathBuf, + + /// Glob expressions which files and directories to additionally include in the source + /// distribution. + /// + /// `pyproject.toml` and the contents of the module directory are always included. + /// + /// Includes are anchored, which means that `pyproject.toml` includes only + /// `/pyproject.toml`. Use for example `assets/**/sample.csv` to include for all + /// `sample.csv` files in `/assets` or any child directory. To recursively include + /// all files under a directory, use a `/**` suffix, e.g. `src/**`. For performance and + /// reproducibility, avoid unanchored matches such as `**/sample.csv`. + /// + /// The glob syntax is the reduced portable glob from + /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). + pub(crate) source_include: Vec, - /// Glob expressions which files and directories to exclude from the previous source - /// distribution includes. + /// Glob expressions which files and directories to exclude from the source distribution. + /// + /// Default: `__pycache__`, `*.pyc`, and `*.pyo`. + /// + /// Excludes are not anchored, which means that `__pycache__` excludes all directories named + /// `__pycache__` and it's children anywhere. To anchor a directory, use a `/` prefix, e.g., + /// `/dist` will exclude only `/dist`. + /// + /// The glob syntax is the reduced portable glob from + /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). + pub(crate) source_exclude: Vec, + + /// Glob expressions which files and directories to exclude from the wheel. + /// + /// Default: `__pycache__`, `*.pyc`, and `*.pyo`. /// /// Excludes are not anchored, which means that `__pycache__` excludes all directories named /// `__pycache__` and it's children anywhere. To anchor a directory, use a `/` prefix, e.g., @@ -731,9 +778,25 @@ pub(crate) struct WheelSettings { /// /// The glob syntax is the reduced portable glob from /// [PEP 639](https://peps.python.org/pep-0639/#add-license-FILES-key). - pub(crate) exclude: Option>, + pub(crate) wheel_exclude: Vec, + /// Data includes for wheels. - pub(crate) data: Option, + /// + /// The directories included here are also included in the source distribution. They are copied + /// to the right wheel subdirectory on build. + pub(crate) data: WheelDataIncludes, +} + +impl Default for BuildBackendSettings { + fn default() -> Self { + Self { + module_root: PathBuf::from("src"), + source_include: Vec::new(), + source_exclude: DEFAULT_EXCLUDES.iter().map(ToString::to_string).collect(), + wheel_exclude: DEFAULT_EXCLUDES.iter().map(ToString::to_string).collect(), + data: WheelDataIncludes::default(), + } + } } /// Data includes for wheels. @@ -754,7 +817,7 @@ pub(crate) struct WheelSettings { /// uses these two options. #[derive(Default, Deserialize, Debug, Clone)] // `deny_unknown_fields` to catch typos such as `header` vs `headers`. -#[serde(rename_all = "kebab-case", deny_unknown_fields)] +#[serde(default, rename_all = "kebab-case", deny_unknown_fields)] pub(crate) struct WheelDataIncludes { purelib: Option, platlib: Option, diff --git a/crates/uv-globfilter/src/glob_dir_filter.rs b/crates/uv-globfilter/src/glob_dir_filter.rs index 0aa3364b2efbd..9ed022fafe7c9 100644 --- a/crates/uv-globfilter/src/glob_dir_filter.rs +++ b/crates/uv-globfilter/src/glob_dir_filter.rs @@ -74,8 +74,10 @@ impl GlobDirFilter { } /// Whether the path (file or directory) matches any of the globs. + /// + /// We include a directory if we are potentially including files it contains. pub fn match_path(&self, path: &Path) -> bool { - self.glob_set.is_match(path) + self.match_directory(path) || self.glob_set.is_match(path) } /// Check whether a directory or any of its children can be matched by any of the globs. @@ -261,9 +263,16 @@ mod tests { assert_eq!( matches, [ + "", + "path1", "path1/dir1", + "path2", "path2/dir2", + "path3", + "path3/dir3", + "path3/dir3/subdir", "path3/dir3/subdir/a.txt", + "path4", "path4/dir4", "path4/dir4/subdir", "path4/dir4/subdir/a.txt", diff --git a/crates/uv-settings/src/settings.rs b/crates/uv-settings/src/settings.rs index 7d898d46af609..9c9ffe77f3750 100644 --- a/crates/uv-settings/src/settings.rs +++ b/crates/uv-settings/src/settings.rs @@ -1646,9 +1646,7 @@ pub struct OptionsWire { // Build backend #[allow(dead_code)] - source_dist: Option, - #[allow(dead_code)] - wheel: Option, + build_backend: Option, } impl From for Options { @@ -1707,8 +1705,7 @@ impl From for Options { managed, package, // Used by the build backend - source_dist: _, - wheel: _, + build_backend: _, } = value; Self { diff --git a/crates/uv/src/commands/build_backend.rs b/crates/uv/src/commands/build_backend.rs index b23c17855f9ee..e71d4f2a90392 100644 --- a/crates/uv/src/commands/build_backend.rs +++ b/crates/uv/src/commands/build_backend.rs @@ -5,14 +5,12 @@ use anyhow::{Context, Result}; use std::env; use std::io::Write; use std::path::Path; -use uv_build_backend::SourceDistSettings; /// PEP 517 hook to build a source distribution. pub(crate) fn build_sdist(sdist_directory: &Path) -> Result { let filename = uv_build_backend::build_source_dist( &env::current_dir()?, sdist_directory, - SourceDistSettings::default(), uv_version::version(), )?; // Tell the build frontend about the name of the artifact we built diff --git a/crates/uv/tests/it/pip_install.rs b/crates/uv/tests/it/pip_install.rs index 78e17d569e061..9a64581bdaea7 100644 --- a/crates/uv/tests/it/pip_install.rs +++ b/crates/uv/tests/it/pip_install.rs @@ -191,7 +191,7 @@ fn invalid_pyproject_toml_option_unknown_field() -> Result<()> { | 2 | unknown = "field" | ^^^^^^^ - unknown field `unknown`, expected one of `native-tls`, `offline`, `no-cache`, `cache-dir`, `preview`, `python-preference`, `python-downloads`, `concurrent-downloads`, `concurrent-builds`, `concurrent-installs`, `index`, `index-url`, `extra-index-url`, `no-index`, `find-links`, `index-strategy`, `keyring-provider`, `allow-insecure-host`, `resolution`, `prerelease`, `dependency-metadata`, `config-settings`, `no-build-isolation`, `no-build-isolation-package`, `exclude-newer`, `link-mode`, `compile-bytecode`, `no-sources`, `upgrade`, `upgrade-package`, `reinstall`, `reinstall-package`, `no-build`, `no-build-package`, `no-binary`, `no-binary-package`, `python-install-mirror`, `pypy-install-mirror`, `publish-url`, `trusted-publishing`, `pip`, `cache-keys`, `override-dependencies`, `constraint-dependencies`, `environments`, `conflicts`, `workspace`, `sources`, `managed`, `package`, `default-groups`, `dev-dependencies`, `source-dist`, `wheel` + unknown field `unknown`, expected one of `native-tls`, `offline`, `no-cache`, `cache-dir`, `preview`, `python-preference`, `python-downloads`, `concurrent-downloads`, `concurrent-builds`, `concurrent-installs`, `index`, `index-url`, `extra-index-url`, `no-index`, `find-links`, `index-strategy`, `keyring-provider`, `allow-insecure-host`, `resolution`, `prerelease`, `dependency-metadata`, `config-settings`, `no-build-isolation`, `no-build-isolation-package`, `exclude-newer`, `link-mode`, `compile-bytecode`, `no-sources`, `upgrade`, `upgrade-package`, `reinstall`, `reinstall-package`, `no-build`, `no-build-package`, `no-binary`, `no-binary-package`, `python-install-mirror`, `pypy-install-mirror`, `publish-url`, `trusted-publishing`, `pip`, `cache-keys`, `override-dependencies`, `constraint-dependencies`, `environments`, `conflicts`, `workspace`, `sources`, `managed`, `package`, `default-groups`, `dev-dependencies`, `build-backend` Resolved in [TIME] Audited in [TIME] diff --git a/crates/uv/tests/it/show_settings.rs b/crates/uv/tests/it/show_settings.rs index 3b5ab3dc9ca1c..9f36667335622 100644 --- a/crates/uv/tests/it/show_settings.rs +++ b/crates/uv/tests/it/show_settings.rs @@ -3443,7 +3443,7 @@ fn resolve_config_file() -> anyhow::Result<()> { | 1 | [project] | ^^^^^^^ - unknown field `project`, expected one of `native-tls`, `offline`, `no-cache`, `cache-dir`, `preview`, `python-preference`, `python-downloads`, `concurrent-downloads`, `concurrent-builds`, `concurrent-installs`, `index`, `index-url`, `extra-index-url`, `no-index`, `find-links`, `index-strategy`, `keyring-provider`, `allow-insecure-host`, `resolution`, `prerelease`, `dependency-metadata`, `config-settings`, `no-build-isolation`, `no-build-isolation-package`, `exclude-newer`, `link-mode`, `compile-bytecode`, `no-sources`, `upgrade`, `upgrade-package`, `reinstall`, `reinstall-package`, `no-build`, `no-build-package`, `no-binary`, `no-binary-package`, `python-install-mirror`, `pypy-install-mirror`, `publish-url`, `trusted-publishing`, `pip`, `cache-keys`, `override-dependencies`, `constraint-dependencies`, `environments`, `conflicts`, `workspace`, `sources`, `managed`, `package`, `default-groups`, `dev-dependencies`, `source-dist`, `wheel` + unknown field `project`, expected one of `native-tls`, `offline`, `no-cache`, `cache-dir`, `preview`, `python-preference`, `python-downloads`, `concurrent-downloads`, `concurrent-builds`, `concurrent-installs`, `index`, `index-url`, `extra-index-url`, `no-index`, `find-links`, `index-strategy`, `keyring-provider`, `allow-insecure-host`, `resolution`, `prerelease`, `dependency-metadata`, `config-settings`, `no-build-isolation`, `no-build-isolation-package`, `exclude-newer`, `link-mode`, `compile-bytecode`, `no-sources`, `upgrade`, `upgrade-package`, `reinstall`, `reinstall-package`, `no-build`, `no-build-package`, `no-binary`, `no-binary-package`, `python-install-mirror`, `pypy-install-mirror`, `publish-url`, `trusted-publishing`, `pip`, `cache-keys`, `override-dependencies`, `constraint-dependencies`, `environments`, `conflicts`, `workspace`, `sources`, `managed`, `package`, `default-groups`, `dev-dependencies`, `build-backend` "### ); diff --git a/scripts/packages/built-by-uv/data-dir/build-script.py b/scripts/packages/built-by-uv/data-dir/build-script.py new file mode 100644 index 0000000000000..055cec0d822a7 --- /dev/null +++ b/scripts/packages/built-by-uv/data-dir/build-script.py @@ -0,0 +1 @@ +print("Build script (currently unused)") diff --git a/scripts/packages/built-by-uv/pyproject.toml b/scripts/packages/built-by-uv/pyproject.toml index e106d64ff0406..852db280c68fb 100644 --- a/scripts/packages/built-by-uv/pyproject.toml +++ b/scripts/packages/built-by-uv/pyproject.toml @@ -7,7 +7,15 @@ requires-python = ">=3.12" dependencies = ["anyio>=4,<5"] license-files = ["LICENSE*", "third-party-licenses/*"] -[tool.uv.wheel.data] +[tool.uv.build-backend] +# A file we need for the source dist -> wheel step, but not in the wheel itself (currently unused) +source-include = ["data/build-script.py"] +# A temporary or generated file we want to ignore +source-exclude = ["/src/built_by_uv/not-packaged.txt", "__pycache__", "*.pyc", "*.pyo"] +# Headers are build-only +wheel-exclude = ["build-*.h", "__pycache__", "*.pyc", "*.pyo"] + +[tool.uv.build-backend.data] scripts = "scripts" data = "assets" headers = "header" diff --git a/scripts/packages/built-by-uv/src/built_by_uv/build-only.h b/scripts/packages/built-by-uv/src/built_by_uv/build-only.h new file mode 100644 index 0000000000000..ce57ea36148bc --- /dev/null +++ b/scripts/packages/built-by-uv/src/built_by_uv/build-only.h @@ -0,0 +1,4 @@ +// There is no build step yet, but we're already modelling the basis for it by allowing files only in the source dist, +// but not in the wheel. + +#include diff --git a/scripts/packages/built-by-uv/src/built_by_uv/not-packaged.txt b/scripts/packages/built-by-uv/src/built_by_uv/not-packaged.txt new file mode 100644 index 0000000000000..d45d86ba401e6 --- /dev/null +++ b/scripts/packages/built-by-uv/src/built_by_uv/not-packaged.txt @@ -0,0 +1 @@ +This file should only exist locally.