Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build backend: Include readme and license files #9149

Merged
merged 11 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 118 additions & 19 deletions crates/uv-build-backend/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@ use crate::metadata::{PyProjectToml, ValidationError};
use flate2::write::GzEncoder;
use flate2::Compression;
use fs_err::File;
use globset::GlobSetBuilder;
use globset::{Glob, GlobSetBuilder};
use itertools::Itertools;
use sha2::{Digest, Sha256};
use std::fs::FileType;
use std::io::{BufReader, Cursor, Read, Write};
use std::path::{Path, PathBuf, StripPrefixError};
use std::{io, mem};
use tar::{Builder, EntryType, Header};
use tar::{EntryType, Header};
use thiserror::Error;
use tracing::{debug, trace};
use uv_distribution_filename::{SourceDistExtension, SourceDistFilename, WheelFilename};
Expand Down Expand Up @@ -54,8 +54,6 @@ pub enum Error {
#[source]
err: walkdir::Error,
},
#[error("Non-UTF-8 paths are not supported: `{}`", _0.user_display())]
NotUtf8Path(PathBuf),
#[error("Failed to walk source tree")]
StripPrefix(#[from] StripPrefixError),
#[error("Unsupported file type {1:?}: `{}`", _0.user_display())]
Expand Down Expand Up @@ -356,17 +354,16 @@ pub fn build_wheel(
let relative_path = entry
.path()
.strip_prefix(&strip_root)
.expect("walkdir starts with root");
let relative_path_str = relative_path
.to_str()
.ok_or_else(|| Error::NotUtf8Path(relative_path.to_path_buf()))?;
.expect("walkdir starts with root")
.user_display()
.to_string();

debug!("Adding to wheel: `{relative_path_str}`");
debug!("Adding to wheel: `{relative_path}`");

if entry.file_type().is_dir() {
wheel_writer.write_directory(relative_path_str)?;
wheel_writer.write_directory(&relative_path)?;
} else if entry.file_type().is_file() {
wheel_writer.write_file(relative_path_str, entry.path())?;
wheel_writer.write_file(&relative_path, entry.path())?;
} else {
// TODO(konsti): We may want to support symlinks, there is support for installing them.
return Err(Error::UnsupportedFileType(
Expand All @@ -378,7 +375,80 @@ pub fn build_wheel(
entry.path();
}

debug!("Adding metadata files to {}", wheel_path.user_display());
if let Some(license_files) = &pyproject_toml.project().license_files {
let license_files_globs: Vec<_> = license_files
.iter()
.map(|license_files| {
trace!("Including license files at: `{license_files}`");
parse_portable_glob(license_files)
})
.collect::<Result<_, _>>()
.map_err(|err| Error::PortableGlob {
field: "project.license-files".to_string(),
source: err,
})?;
let license_files_matcher =
GlobDirFilter::from_globs(&license_files_globs).map_err(|err| {
Error::GlobSetTooLarge {
field: "project.license-files".to_string(),
source: err,
}
})?;

let license_dir = format!(
"{}-{}.dist-info/licenses/",
pyproject_toml.name().as_dist_info_name(),
pyproject_toml.version()
);

wheel_writer.write_directory(&license_dir)?;

for entry in WalkDir::new(source_tree).into_iter().filter_entry(|entry| {
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");

// Fast path: Don't descend into a directory that can't be included.
license_files_matcher.match_directory(relative)
}) {
let entry = entry.map_err(|err| Error::WalkDir {
root: source_tree.to_path_buf(),
err,
})?;
// TODO(konsti): This should be prettier.
let relative = entry
.path()
.strip_prefix(source_tree)
.expect("walkdir starts with root");

if !license_files_matcher.match_path(relative) {
trace!("Excluding {}", relative.user_display());
continue;
};

let relative_licenses = Path::new(&license_dir)
.join(relative)
.portable_display()
.to_string();

if entry.file_type().is_dir() {
wheel_writer.write_directory(&relative_licenses)?;
} else if entry.file_type().is_file() {
debug!("Adding license file: `{}`", relative.user_display());
wheel_writer.write_file(&relative_licenses, entry.path())?;
} else {
// TODO(konsti): We may want to support symlinks, there is support for installing them.
return Err(Error::UnsupportedFileType(
entry.path().to_path_buf(),
entry.file_type(),
));
}
}
}

debug!("Adding metadata files to: `{}`", wheel_path.user_display());
let dist_info_dir = write_dist_info(
&mut wheel_writer,
&pyproject_toml,
Expand Down Expand Up @@ -449,28 +519,32 @@ pub fn build_source_dist(
extension: SourceDistExtension::TarGz,
};

let top_level = format!("{}-{}", pyproject_toml.name(), pyproject_toml.version());
let top_level = format!(
"{}-{}",
pyproject_toml.name().as_dist_info_name(),
pyproject_toml.version()
);

let source_dist_path = source_dist_directory.join(filename.to_string());
let tar_gz = File::create(&source_dist_path)?;
let enc = GzEncoder::new(tar_gz, Compression::default());
let mut tar = tar::Builder::new(enc);

let metadata = pyproject_toml
.to_metadata(source_tree)?
.core_metadata_format();
let metadata = pyproject_toml.to_metadata(source_tree)?;
let metadata_email = metadata.core_metadata_format();

let mut header = Header::new_gnu();
header.set_size(metadata.bytes().len() as u64);
header.set_size(metadata_email.bytes().len() as u64);
header.set_mode(0o644);
header.set_cksum();
tar.append_data(
&mut header,
Path::new(&top_level).join("PKG-INFO"),
Cursor::new(metadata),
Cursor::new(metadata_email),
)
.map_err(|err| Error::TarWrite(source_dist_path.clone(), err))?;

// The user (or default) includes
let mut include_globs = Vec::new();
for include in settings.include {
let glob = parse_portable_glob(&include).map_err(|err| Error::PortableGlob {
Expand All @@ -479,6 +553,31 @@ pub fn build_source_dist(
})?;
include_globs.push(glob.clone());
}

// Include the Readme
if let Some(readme) = pyproject_toml
.project()
.readme
.as_ref()
.and_then(|readme| readme.path())
{
trace!("Including readme at: `{}`", readme.user_display());
include_globs.push(
Glob::new(&globset::escape(&readme.portable_display().to_string()))
.expect("escaped globset is parseable"),
);
}

// Include the license files
for license_files in pyproject_toml.project().license_files.iter().flatten() {
trace!("Including license files at: `{license_files}`");
let glob = parse_portable_glob(license_files).map_err(|err| Error::PortableGlob {
field: "project.license-files".to_string(),
source: err,
})?;
include_globs.push(glob);
}

let include_matcher =
GlobDirFilter::from_globs(&include_globs).map_err(|err| Error::GlobSetTooLarge {
field: "tool.uv.source-dist.include".to_string(),
Expand Down Expand Up @@ -549,7 +648,7 @@ pub fn build_source_dist(

/// Add a file or a directory to a source distribution.
fn add_source_dist_entry(
tar: &mut Builder<GzEncoder<File>>,
tar: &mut tar::Builder<GzEncoder<File>>,
entry: &DirEntry,
top_level: &str,
source_dist_path: &Path,
Expand Down
59 changes: 37 additions & 22 deletions crates/uv-build-backend/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ impl PyProjectToml {
Ok(toml::from_str(contents)?)
}

pub(crate) fn project(&self) -> &Project {
&self.project
}

/// Warn if the `[build-system]` table looks suspicious.
///
/// Example of a valid table:
Expand Down Expand Up @@ -530,68 +534,68 @@ impl PyProjectToml {
/// should update the shared schema instead.
#[derive(Deserialize, Debug, Clone)]
#[serde(rename_all = "kebab-case")]
struct Project {
pub(crate) struct Project {
/// The name of the project.
name: PackageName,
pub(crate) name: PackageName,
/// The version of the project.
version: Version,
pub(crate) version: Version,
/// The summary description of the project in one line.
description: Option<String>,
pub(crate) description: Option<String>,
/// The full description of the project (i.e. the README).
readme: Option<Readme>,
pub(crate) readme: Option<Readme>,
/// The Python version requirements of the project.
requires_python: Option<VersionSpecifiers>,
pub(crate) requires_python: Option<VersionSpecifiers>,
/// The license under which the project is distributed.
///
/// Supports both the current standard and the provisional PEP 639.
license: Option<License>,
pub(crate) license: Option<License>,
/// The paths to files containing licenses and other legal notices to be distributed with the
/// project.
///
/// From the provisional PEP 639
license_files: Option<Vec<String>>,
pub(crate) license_files: Option<Vec<String>>,
/// The people or organizations considered to be the "authors" of the project.
authors: Option<Vec<Contact>>,
pub(crate) authors: Option<Vec<Contact>>,
/// The people or organizations considered to be the "maintainers" of the project.
maintainers: Option<Vec<Contact>>,
pub(crate) maintainers: Option<Vec<Contact>>,
/// The keywords for the project.
keywords: Option<Vec<String>>,
pub(crate) keywords: Option<Vec<String>>,
/// Trove classifiers which apply to the project.
classifiers: Option<Vec<String>>,
pub(crate) classifiers: Option<Vec<String>>,
/// A table of URLs where the key is the URL label and the value is the URL itself.
///
/// PyPI shows all URLs with their name. For some known patterns, they add favicons.
/// main: <https://github.com/pypi/warehouse/blob/main/warehouse/templates/packaging/detail.html>
/// archived: <https://github.com/pypi/warehouse/blob/e3bd3c3805ff47fff32b67a899c1ce11c16f3c31/warehouse/templates/packaging/detail.html>
urls: Option<BTreeMap<String, String>>,
pub(crate) urls: Option<BTreeMap<String, String>>,
/// The console entrypoints of the project.
///
/// The key of the table is the name of the entry point and the value is the object reference.
scripts: Option<BTreeMap<String, String>>,
pub(crate) scripts: Option<BTreeMap<String, String>>,
/// The GUI entrypoints of the project.
///
/// The key of the table is the name of the entry point and the value is the object reference.
gui_scripts: Option<BTreeMap<String, String>>,
pub(crate) gui_scripts: Option<BTreeMap<String, String>>,
/// Entrypoints groups of the project.
///
/// The key of the table is the name of the entry point and the value is the object reference.
entry_points: Option<BTreeMap<String, BTreeMap<String, String>>>,
pub(crate) entry_points: Option<BTreeMap<String, BTreeMap<String, String>>>,
/// The dependencies of the project.
dependencies: Option<Vec<Requirement>>,
pub(crate) dependencies: Option<Vec<Requirement>>,
/// The optional dependencies of the project.
optional_dependencies: Option<BTreeMap<ExtraName, Vec<Requirement>>>,
pub(crate) optional_dependencies: Option<BTreeMap<ExtraName, Vec<Requirement>>>,
/// Specifies which fields listed by PEP 621 were intentionally unspecified so another tool
/// can/will provide such metadata dynamically.
///
/// Not supported, an error if anything but the default empty list.
dynamic: Option<Vec<String>>,
pub(crate) dynamic: Option<Vec<String>>,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need access to everything? Would it be reasonable to expose only what you need through methods?

(I suggest this for abstract "encapsulation is good" reasons, and nothing in particular that is concrete. And I do sympathize with just making internals crate-public as that can often be easier.)

}

/// The optional `project.readme` key in a pyproject.toml as specified in
/// <https://packaging.python.org/en/latest/specifications/pyproject-toml/#readme>.
#[derive(Deserialize, Debug, Clone)]
#[serde(untagged, rename_all = "kebab-case")]
enum Readme {
pub(crate) enum Readme {
/// Relative path to the README.
String(PathBuf),
/// Relative path to the README.
Expand All @@ -608,11 +612,22 @@ enum Readme {
},
}

impl Readme {
/// If the readme is a file, return the path to the file.
pub(crate) fn path(&self) -> Option<&Path> {
match self {
Readme::String(path) => Some(path),
Readme::File { file, .. } => Some(file),
Readme::Text { .. } => None,
}
}
}

/// The optional `project.license` key in a pyproject.toml as specified in
/// <https://packaging.python.org/en/latest/specifications/pyproject-toml/#license>.
#[derive(Deserialize, Debug, Clone)]
#[serde(untagged)]
enum License {
pub(crate) enum License {
/// An SPDX Expression.
///
/// From the provisional PEP 639.
Expand All @@ -639,7 +654,7 @@ enum License {
deny_unknown_fields,
expecting = "a table with 'name' and/or 'email' keys"
)]
enum Contact {
pub(crate) enum Contact {
/// TODO(konsti): RFC 822 validation.
NameEmail { name: String, email: String },
/// TODO(konsti): RFC 822 validation.
Expand Down
Loading
Loading