From ec5c1f6df6cfcfa3e417eadec0b9a603c39ff69f Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Thu, 8 Aug 2024 21:39:47 -0400 Subject: [PATCH] Enforce extension validity at parse time (#5888) This PR adds a `DistExtension` field to some of our distribution types, which requires that we validate that the file type is known and supported when parsing (rather than when attempting to unzip). It removes a bunch of extension parsing from the code too, in favor of doing it once upfront. Closes https://github.com/astral-sh/uv/issues/5858. --- Cargo.lock | 3 + crates/distribution-filename/src/extension.rs | 99 +++++++ crates/distribution-filename/src/lib.rs | 23 +- .../distribution-filename/src/source_dist.rs | 115 +++----- crates/distribution-types/src/buildable.rs | 5 + crates/distribution-types/src/lib.rs | 120 ++++---- crates/distribution-types/src/resolution.rs | 8 +- crates/pep508-rs/src/lib.rs | 7 +- crates/pypi-types/Cargo.toml | 1 + crates/pypi-types/src/parsed_url.rs | 66 +++-- crates/pypi-types/src/requirement.rs | 43 ++- crates/requirements-txt/src/lib.rs | 37 ++- ...xt__test__line-endings-whitespace.txt.snap | 40 +-- ...ments_txt__test__parse-whitespace.txt.snap | 40 +-- .../test-data/requirements-txt/whitespace.txt | 2 +- crates/uv-cache/src/lib.rs | 2 +- crates/uv-client/src/registry_client.rs | 48 ++-- crates/uv-distribution/src/error.rs | 4 +- .../uv-distribution/src/metadata/lowering.rs | 16 +- crates/uv-distribution/src/source/mod.rs | 63 +++-- crates/uv-extract/Cargo.toml | 1 + crates/uv-extract/src/stream.rs | 92 ++---- crates/uv-installer/src/plan.rs | 263 +++++++++--------- crates/uv-installer/src/satisfies.rs | 2 + crates/uv-python/Cargo.toml | 3 +- crates/uv-python/src/downloads.rs | 12 +- crates/uv-requirements/src/lookahead.rs | 4 + crates/uv-requirements/src/unnamed.rs | 31 ++- crates/uv-resolver/src/error.rs | 3 + crates/uv-resolver/src/flat_index.rs | 1 + crates/uv-resolver/src/lock.rs | 20 +- .../uv-resolver/src/pubgrub/dependencies.rs | 4 + crates/uv-resolver/src/version_map.rs | 1 + crates/uv/tests/cache_clean.rs | 4 +- crates/uv/tests/pip_compile.rs | 62 ++++- crates/uv/tests/pip_install.rs | 42 ++- 36 files changed, 805 insertions(+), 482 deletions(-) create mode 100644 crates/distribution-filename/src/extension.rs diff --git a/Cargo.lock b/Cargo.lock index a6c18b383f618..569fcb18cfcf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2812,6 +2812,7 @@ version = "0.0.1" dependencies = [ "anyhow", "chrono", + "distribution-filename", "indexmap", "itertools 0.13.0", "mailparse", @@ -4816,6 +4817,7 @@ version = "0.0.1" dependencies = [ "async-compression", "async_zip", + "distribution-filename", "fs-err", "futures", "md-5", @@ -4945,6 +4947,7 @@ dependencies = [ "cache-key", "clap", "configparser", + "distribution-filename", "fs-err", "futures", "indoc", diff --git a/crates/distribution-filename/src/extension.rs b/crates/distribution-filename/src/extension.rs new file mode 100644 index 0000000000000..5c06cff994179 --- /dev/null +++ b/crates/distribution-filename/src/extension.rs @@ -0,0 +1,99 @@ +use std::fmt::{Display, Formatter}; +use std::path::Path; + +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum DistExtension { + Wheel, + Source(SourceDistExtension), +} + +#[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + rkyv::Archive, + rkyv::Deserialize, + rkyv::Serialize, +)] +#[archive(check_bytes)] +#[archive_attr(derive(Debug))] +pub enum SourceDistExtension { + Zip, + TarGz, + TarBz2, + TarXz, + TarZst, +} + +impl DistExtension { + /// Extract the [`DistExtension`] from a path. + pub fn from_path(path: impl AsRef) -> Result { + let Some(extension) = path.as_ref().extension().and_then(|ext| ext.to_str()) else { + return Err(ExtensionError::Dist); + }; + + match extension { + "whl" => Ok(Self::Wheel), + _ => SourceDistExtension::from_path(path) + .map(Self::Source) + .map_err(|_| ExtensionError::Dist), + } + } +} + +impl SourceDistExtension { + /// Extract the [`SourceDistExtension`] from a path. + pub fn from_path(path: impl AsRef) -> Result { + /// Returns true if the path is a tar file (e.g., `.tar.gz`). + fn is_tar(path: &Path) -> bool { + path.file_stem().is_some_and(|stem| { + Path::new(stem) + .extension() + .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) + }) + } + + let Some(extension) = path.as_ref().extension().and_then(|ext| ext.to_str()) else { + return Err(ExtensionError::SourceDist); + }; + + match extension { + "zip" => Ok(Self::Zip), + "gz" if is_tar(path.as_ref()) => Ok(Self::TarGz), + "bz2" if is_tar(path.as_ref()) => Ok(Self::TarBz2), + "xz" if is_tar(path.as_ref()) => Ok(Self::TarXz), + "zst" if is_tar(path.as_ref()) => Ok(Self::TarZst), + _ => Err(ExtensionError::SourceDist), + } + } +} + +impl Display for SourceDistExtension { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Self::Zip => f.write_str("zip"), + Self::TarGz => f.write_str("tar.gz"), + Self::TarBz2 => f.write_str("tar.bz2"), + Self::TarXz => f.write_str("tar.xz"), + Self::TarZst => f.write_str("tar.zst"), + } + } +} + +#[derive(Error, Debug)] +pub enum ExtensionError { + #[error("`.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst`")] + Dist, + #[error("`.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst`")] + SourceDist, +} diff --git a/crates/distribution-filename/src/lib.rs b/crates/distribution-filename/src/lib.rs index 0736e5914658e..dd06a89363b83 100644 --- a/crates/distribution-filename/src/lib.rs +++ b/crates/distribution-filename/src/lib.rs @@ -5,11 +5,13 @@ use uv_normalize::PackageName; pub use build_tag::{BuildTag, BuildTagError}; pub use egg::{EggInfoFilename, EggInfoFilenameError}; -pub use source_dist::{SourceDistExtension, SourceDistFilename, SourceDistFilenameError}; +pub use extension::{DistExtension, ExtensionError, SourceDistExtension}; +pub use source_dist::SourceDistFilename; pub use wheel::{WheelFilename, WheelFilenameError}; mod build_tag; mod egg; +mod extension; mod source_dist; mod wheel; @@ -22,13 +24,20 @@ pub enum DistFilename { impl DistFilename { /// Parse a filename as wheel or source dist name. pub fn try_from_filename(filename: &str, package_name: &PackageName) -> Option { - if let Ok(filename) = WheelFilename::from_str(filename) { - Some(Self::WheelFilename(filename)) - } else if let Ok(filename) = SourceDistFilename::parse(filename, package_name) { - Some(Self::SourceDistFilename(filename)) - } else { - None + match DistExtension::from_path(filename) { + Ok(DistExtension::Wheel) => { + if let Ok(filename) = WheelFilename::from_str(filename) { + return Some(Self::WheelFilename(filename)); + } + } + Ok(DistExtension::Source(extension)) => { + if let Ok(filename) = SourceDistFilename::parse(filename, extension, package_name) { + return Some(Self::SourceDistFilename(filename)); + } + } + Err(_) => {} } + None } /// Like [`DistFilename::try_from_normalized_filename`], but without knowing the package name. diff --git a/crates/distribution-filename/src/source_dist.rs b/crates/distribution-filename/src/source_dist.rs index e9f22926ba3da..99fecbb4fc38a 100644 --- a/crates/distribution-filename/src/source_dist.rs +++ b/crates/distribution-filename/src/source_dist.rs @@ -1,69 +1,12 @@ use std::fmt::{Display, Formatter}; use std::str::FromStr; +use crate::SourceDistExtension; +use pep440_rs::{Version, VersionParseError}; use serde::{Deserialize, Serialize}; use thiserror::Error; - -use pep440_rs::{Version, VersionParseError}; use uv_normalize::{InvalidNameError, PackageName}; -#[derive( - Clone, - Debug, - PartialEq, - Eq, - Serialize, - Deserialize, - rkyv::Archive, - rkyv::Deserialize, - rkyv::Serialize, -)] -#[archive(check_bytes)] -#[archive_attr(derive(Debug))] -pub enum SourceDistExtension { - Zip, - TarGz, - TarBz2, -} - -impl FromStr for SourceDistExtension { - type Err = String; - - fn from_str(s: &str) -> Result { - Ok(match s { - "zip" => Self::Zip, - "tar.gz" => Self::TarGz, - "tar.bz2" => Self::TarBz2, - other => return Err(other.to_string()), - }) - } -} - -impl Display for SourceDistExtension { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::Zip => f.write_str("zip"), - Self::TarGz => f.write_str("tar.gz"), - Self::TarBz2 => f.write_str("tar.bz2"), - } - } -} - -impl SourceDistExtension { - pub fn from_filename(filename: &str) -> Option<(&str, Self)> { - if let Some(stem) = filename.strip_suffix(".zip") { - return Some((stem, Self::Zip)); - } - if let Some(stem) = filename.strip_suffix(".tar.gz") { - return Some((stem, Self::TarGz)); - } - if let Some(stem) = filename.strip_suffix(".tar.bz2") { - return Some((stem, Self::TarBz2)); - } - None - } -} - /// Note that this is a normalized and not an exact representation, keep the original string if you /// need the latter. #[derive( @@ -90,14 +33,18 @@ impl SourceDistFilename { /// these (consider e.g. `a-1-1.zip`) pub fn parse( filename: &str, + extension: SourceDistExtension, package_name: &PackageName, ) -> Result { - let Some((stem, extension)) = SourceDistExtension::from_filename(filename) else { + // Drop the extension (e.g., given `tar.gz`, drop `.tar.gz`). + if filename.len() <= extension.to_string().len() + 1 { return Err(SourceDistFilenameError { filename: filename.to_string(), kind: SourceDistFilenameErrorKind::Extension, }); - }; + } + + let stem = &filename[..(filename.len() - (extension.to_string().len() + 1))]; if stem.len() <= package_name.as_ref().len() + "-".len() { return Err(SourceDistFilenameError { @@ -138,13 +85,23 @@ impl SourceDistFilename { /// Source dist filenames can be ambiguous, e.g. `a-1-1.tar.gz`. Without knowing the package name, we assume that /// source dist filename version doesn't contain minus (the version is normalized). pub fn parsed_normalized_filename(filename: &str) -> Result { - let Some((stem, extension)) = SourceDistExtension::from_filename(filename) else { + let Ok(extension) = SourceDistExtension::from_path(filename) else { return Err(SourceDistFilenameError { filename: filename.to_string(), kind: SourceDistFilenameErrorKind::Extension, }); }; + // Drop the extension (e.g., given `tar.gz`, drop `.tar.gz`). + if filename.len() <= extension.to_string().len() + 1 { + return Err(SourceDistFilenameError { + filename: filename.to_string(), + kind: SourceDistFilenameErrorKind::Extension, + }); + } + + let stem = &filename[..(filename.len() - (extension.to_string().len() + 1))]; + let Some((package_name, version)) = stem.rsplit_once('-') else { return Err(SourceDistFilenameError { filename: filename.to_string(), @@ -197,7 +154,7 @@ impl Display for SourceDistFilenameError { enum SourceDistFilenameErrorKind { #[error("Name doesn't start with package name {0}")] Filename(PackageName), - #[error("Source distributions filenames must end with .zip, .tar.gz, or .tar.bz2")] + #[error("File extension is invalid")] Extension, #[error("Version section is invalid")] Version(#[from] VersionParseError), @@ -213,7 +170,7 @@ mod tests { use uv_normalize::PackageName; - use crate::SourceDistFilename; + use crate::{SourceDistExtension, SourceDistFilename}; /// Only test already normalized names since the parsing is lossy #[test] @@ -223,11 +180,17 @@ mod tests { "foo-lib-1.2.3a3.zip", "foo-lib-1.2.3.tar.gz", "foo-lib-1.2.3.tar.bz2", + "foo-lib-1.2.3.tar.zst", ] { + let ext = SourceDistExtension::from_path(normalized).unwrap(); assert_eq!( - SourceDistFilename::parse(normalized, &PackageName::from_str("foo_lib").unwrap()) - .unwrap() - .to_string(), + SourceDistFilename::parse( + normalized, + ext, + &PackageName::from_str("foo_lib").unwrap() + ) + .unwrap() + .to_string(), normalized ); } @@ -235,18 +198,22 @@ mod tests { #[test] fn errors() { - for invalid in ["b-1.2.3.zip", "a-1.2.3-gamma.3.zip", "a-1.2.3.tar.zstd"] { + for invalid in ["b-1.2.3.zip", "a-1.2.3-gamma.3.zip"] { + let ext = SourceDistExtension::from_path(invalid).unwrap(); assert!( - SourceDistFilename::parse(invalid, &PackageName::from_str("a").unwrap()).is_err() + SourceDistFilename::parse(invalid, ext, &PackageName::from_str("a").unwrap()) + .is_err() ); } } #[test] - fn name_to_long() { - assert!( - SourceDistFilename::parse("foo.zip", &PackageName::from_str("foo-lib").unwrap()) - .is_err() - ); + fn name_too_long() { + assert!(SourceDistFilename::parse( + "foo.zip", + SourceDistExtension::Zip, + &PackageName::from_str("foo-lib").unwrap() + ) + .is_err()); } } diff --git a/crates/distribution-types/src/buildable.rs b/crates/distribution-types/src/buildable.rs index bf681325fbb82..61a37407e9225 100644 --- a/crates/distribution-types/src/buildable.rs +++ b/crates/distribution-types/src/buildable.rs @@ -1,6 +1,7 @@ use std::borrow::Cow; use std::path::Path; +use distribution_filename::SourceDistExtension; use pep440_rs::Version; use pep508_rs::VerbatimUrl; use url::Url; @@ -109,6 +110,8 @@ impl std::fmt::Display for SourceUrl<'_> { #[derive(Debug, Clone)] pub struct DirectSourceUrl<'a> { pub url: &'a Url, + pub subdirectory: Option<&'a Path>, + pub ext: SourceDistExtension, } impl std::fmt::Display for DirectSourceUrl<'_> { @@ -146,6 +149,7 @@ impl<'a> From<&'a GitSourceDist> for GitSourceUrl<'a> { pub struct PathSourceUrl<'a> { pub url: &'a Url, pub path: Cow<'a, Path>, + pub ext: SourceDistExtension, } impl std::fmt::Display for PathSourceUrl<'_> { @@ -159,6 +163,7 @@ impl<'a> From<&'a PathSourceDist> for PathSourceUrl<'a> { Self { url: &dist.url, path: Cow::Borrowed(&dist.install_path), + ext: dist.ext, } } } diff --git a/crates/distribution-types/src/lib.rs b/crates/distribution-types/src/lib.rs index 7b8937ecfcff1..5ef1567b8c7e8 100644 --- a/crates/distribution-types/src/lib.rs +++ b/crates/distribution-types/src/lib.rs @@ -38,7 +38,7 @@ use std::str::FromStr; use url::Url; -use distribution_filename::WheelFilename; +use distribution_filename::{DistExtension, SourceDistExtension, WheelFilename}; use pep440_rs::Version; use pep508_rs::{Pep508Url, VerbatimUrl}; use pypi_types::{ParsedUrl, VerbatimParsedUrl}; @@ -228,6 +228,8 @@ pub struct RegistrySourceDist { pub name: PackageName, pub version: Version, pub file: Box, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + pub ext: SourceDistExtension, pub index: IndexUrl, /// When an sdist is selected, it may be the case that there were /// available wheels too. There are many reasons why a wheel might not @@ -249,6 +251,8 @@ pub struct DirectUrlSourceDist { pub location: Url, /// The subdirectory within the archive in which the source distribution is located. pub subdirectory: Option, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + pub ext: SourceDistExtension, /// The URL as it was provided by the user, including the subdirectory fragment. pub url: VerbatimUrl, } @@ -275,6 +279,8 @@ pub struct PathSourceDist { /// which we use for locking. Unlike `given` on the verbatim URL all environment variables /// are resolved, and unlike the install path, we did not yet join it on the base directory. pub lock_path: PathBuf, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + pub ext: SourceDistExtension, /// The URL as it was provided by the user. pub url: VerbatimUrl, } @@ -303,33 +309,35 @@ impl Dist { url: VerbatimUrl, location: Url, subdirectory: Option, + ext: DistExtension, ) -> Result { - if Path::new(url.path()) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - // Validate that the name in the wheel matches that of the requirement. - let filename = WheelFilename::from_str(&url.filename()?)?; - if filename.name != name { - return Err(Error::PackageNameMismatch( + match ext { + DistExtension::Wheel => { + // Validate that the name in the wheel matches that of the requirement. + let filename = WheelFilename::from_str(&url.filename()?)?; + if filename.name != name { + return Err(Error::PackageNameMismatch( + name, + filename.name, + url.verbatim().to_string(), + )); + } + + Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { + filename, + location, + url, + }))) + } + DistExtension::Source(ext) => { + Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist { name, - filename.name, - url.verbatim().to_string(), - )); + location, + subdirectory, + ext, + url, + }))) } - - Ok(Self::Built(BuiltDist::DirectUrl(DirectUrlBuiltDist { - filename, - location, - url, - }))) - } else { - Ok(Self::Source(SourceDist::DirectUrl(DirectUrlSourceDist { - name, - location, - subdirectory, - url, - }))) } } @@ -339,6 +347,7 @@ impl Dist { url: VerbatimUrl, install_path: &Path, lock_path: &Path, + ext: DistExtension, ) -> Result { // Store the canonicalized path, which also serves to validate that it exists. let canonicalized_path = match install_path.canonicalize() { @@ -350,31 +359,30 @@ impl Dist { }; // Determine whether the path represents a built or source distribution. - if canonicalized_path - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - // Validate that the name in the wheel matches that of the requirement. - let filename = WheelFilename::from_str(&url.filename()?)?; - if filename.name != name { - return Err(Error::PackageNameMismatch( - name, - filename.name, - url.verbatim().to_string(), - )); + match ext { + DistExtension::Wheel => { + // Validate that the name in the wheel matches that of the requirement. + let filename = WheelFilename::from_str(&url.filename()?)?; + if filename.name != name { + return Err(Error::PackageNameMismatch( + name, + filename.name, + url.verbatim().to_string(), + )); + } + Ok(Self::Built(BuiltDist::Path(PathBuiltDist { + filename, + path: canonicalized_path, + url, + }))) } - Ok(Self::Built(BuiltDist::Path(PathBuiltDist { - filename, - path: canonicalized_path, - url, - }))) - } else { - Ok(Self::Source(SourceDist::Path(PathSourceDist { + DistExtension::Source(ext) => Ok(Self::Source(SourceDist::Path(PathSourceDist { name, install_path: canonicalized_path.clone(), lock_path: lock_path.to_path_buf(), + ext, url, - }))) + }))), } } @@ -423,12 +431,20 @@ impl Dist { /// Create a [`Dist`] for a URL-based distribution. pub fn from_url(name: PackageName, url: VerbatimParsedUrl) -> Result { match url.parsed_url { - ParsedUrl::Archive(archive) => { - Self::from_http_url(name, url.verbatim, archive.url, archive.subdirectory) - } - ParsedUrl::Path(file) => { - Self::from_file_url(name, url.verbatim, &file.install_path, &file.lock_path) - } + ParsedUrl::Archive(archive) => Self::from_http_url( + name, + url.verbatim, + archive.url, + archive.subdirectory, + archive.ext, + ), + ParsedUrl::Path(file) => Self::from_file_url( + name, + url.verbatim, + &file.install_path, + &file.lock_path, + file.ext, + ), ParsedUrl::Directory(directory) => Self::from_directory_url( name, url.verbatim, @@ -1262,7 +1278,7 @@ mod test { std::mem::size_of::() ); assert!( - std::mem::size_of::() <= 256, + std::mem::size_of::() <= 264, "{}", std::mem::size_of::() ); diff --git a/crates/distribution-types/src/resolution.rs b/crates/distribution-types/src/resolution.rs index b2191b147f5b5..9ff450ee86f14 100644 --- a/crates/distribution-types/src/resolution.rs +++ b/crates/distribution-types/src/resolution.rs @@ -1,6 +1,6 @@ -use std::collections::BTreeMap; - +use distribution_filename::DistExtension; use pypi_types::{HashDigest, Requirement, RequirementSource}; +use std::collections::BTreeMap; use uv_normalize::{ExtraName, GroupName, PackageName}; use crate::{BuiltDist, Diagnostic, Dist, Name, ResolvedDist, SourceDist}; @@ -143,12 +143,14 @@ impl From<&ResolvedDist> for Requirement { url: wheel.url.clone(), location, subdirectory: None, + ext: DistExtension::Wheel, } } Dist::Built(BuiltDist::Path(wheel)) => RequirementSource::Path { install_path: wheel.path.clone(), lock_path: wheel.path.clone(), url: wheel.url.clone(), + ext: DistExtension::Wheel, }, Dist::Source(SourceDist::Registry(sdist)) => RequirementSource::Registry { specifier: pep440_rs::VersionSpecifiers::from( @@ -163,6 +165,7 @@ impl From<&ResolvedDist> for Requirement { url: sdist.url.clone(), location, subdirectory: sdist.subdirectory.clone(), + ext: DistExtension::Source(sdist.ext), } } Dist::Source(SourceDist::Git(sdist)) => RequirementSource::Git { @@ -176,6 +179,7 @@ impl From<&ResolvedDist> for Requirement { install_path: sdist.install_path.clone(), lock_path: sdist.lock_path.clone(), url: sdist.url.clone(), + ext: DistExtension::Source(sdist.ext), }, Dist::Source(SourceDist::Directory(sdist)) => RequirementSource::Directory { install_path: sdist.install_path.clone(), diff --git a/crates/pep508-rs/src/lib.rs b/crates/pep508-rs/src/lib.rs index 79135804cf0c2..0d5b2c5c3f0e0 100644 --- a/crates/pep508-rs/src/lib.rs +++ b/crates/pep508-rs/src/lib.rs @@ -693,6 +693,11 @@ fn looks_like_unnamed_requirement(cursor: &mut Cursor) -> bool { return true; } + // Ex) `foo/bar` + if expanded.contains('/') || expanded.contains('\\') { + return true; + } + false } @@ -1010,7 +1015,7 @@ fn parse_pep508_requirement( // a package name. pip supports this in `requirements.txt`, but it doesn't adhere to // the PEP 508 grammar. let mut clone = cursor.clone().at(start); - return if parse_url::(&mut clone, working_dir).is_ok() { + return if looks_like_unnamed_requirement(&mut clone) { Err(Pep508Error { message: Pep508ErrorSource::UnsupportedRequirement("URL requirement must be preceded by a package name. Add the name of the package before the URL (e.g., `package_name @ https://...`).".to_string()), start, diff --git a/crates/pypi-types/Cargo.toml b/crates/pypi-types/Cargo.toml index 547d313f43832..32ebce84f40c8 100644 --- a/crates/pypi-types/Cargo.toml +++ b/crates/pypi-types/Cargo.toml @@ -13,6 +13,7 @@ license = { workspace = true } workspace = true [dependencies] +distribution-filename = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } uv-fs = { workspace = true, features = ["serde"] } diff --git a/crates/pypi-types/src/parsed_url.rs b/crates/pypi-types/src/parsed_url.rs index f562886b740f3..0915750658958 100644 --- a/crates/pypi-types/src/parsed_url.rs +++ b/crates/pypi-types/src/parsed_url.rs @@ -1,9 +1,9 @@ +use distribution_filename::{DistExtension, ExtensionError}; +use pep508_rs::{Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError}; use std::fmt::{Display, Formatter}; use std::path::{Path, PathBuf}; use thiserror::Error; use url::{ParseError, Url}; - -use pep508_rs::{Pep508Url, UnnamedRequirementUrl, VerbatimUrl, VerbatimUrlError}; use uv_git::{GitReference, GitSha, GitUrl, OidParseError}; use crate::{ArchiveInfo, DirInfo, DirectUrl, VcsInfo, VcsKind}; @@ -13,17 +13,21 @@ pub enum ParsedUrlError { #[error("Unsupported URL prefix `{prefix}` in URL: `{url}` ({message})")] UnsupportedUrlPrefix { prefix: String, - url: Url, + url: String, message: &'static str, }, #[error("Invalid path in file URL: `{0}`")] - InvalidFileUrl(Url), + InvalidFileUrl(String), #[error("Failed to parse Git reference from URL: `{0}`")] - GitShaParse(Url, #[source] OidParseError), + GitShaParse(String, #[source] OidParseError), #[error("Not a valid URL: `{0}`")] UrlParse(String, #[source] ParseError), #[error(transparent)] VerbatimUrl(#[from] VerbatimUrlError), + #[error("Expected direct URL (`{0}`) to end in a supported file extension: {1}")] + MissingExtensionUrl(String, ExtensionError), + #[error("Expected path (`{0}`) to end in a supported file extension: {1}")] + MissingExtensionPath(PathBuf, ExtensionError), } #[derive(Debug, Clone, Hash, PartialEq, PartialOrd, Eq, Ord)] @@ -75,6 +79,9 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl { url: verbatim.to_url(), install_path: verbatim.as_path()?, lock_path: path.as_ref().to_path_buf(), + ext: DistExtension::from_path(&path).map_err(|err| { + ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err) + })?, }) }; Ok(Self { @@ -103,6 +110,9 @@ impl UnnamedRequirementUrl for VerbatimParsedUrl { url: verbatim.to_url(), install_path: verbatim.as_path()?, lock_path: path.as_ref().to_path_buf(), + ext: DistExtension::from_path(&path).map_err(|err| { + ParsedUrlError::MissingExtensionPath(path.as_ref().to_path_buf(), err) + })?, }) }; Ok(Self { @@ -181,15 +191,23 @@ pub struct ParsedPathUrl { /// which we use for locking. Unlike `given` on the verbatim URL all environment variables /// are resolved, and unlike the install path, we did not yet join it on the base directory. pub lock_path: PathBuf, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + pub ext: DistExtension, } impl ParsedPathUrl { /// Construct a [`ParsedPathUrl`] from a path requirement source. - pub fn from_source(install_path: PathBuf, lock_path: PathBuf, url: Url) -> Self { + pub fn from_source( + install_path: PathBuf, + lock_path: PathBuf, + ext: DistExtension, + url: Url, + ) -> Self { Self { url, install_path, lock_path, + ext, } } } @@ -258,7 +276,7 @@ impl ParsedGitUrl { impl TryFrom for ParsedGitUrl { type Error = ParsedUrlError; - /// Supports URLS with and without the `git+` prefix. + /// Supports URLs with and without the `git+` prefix. /// /// When the URL includes a prefix, it's presumed to come from a PEP 508 requirement; when it's /// excluded, it's presumed to come from `tool.uv.sources`. @@ -271,7 +289,7 @@ impl TryFrom for ParsedGitUrl { .unwrap_or(url_in.as_str()); let url = Url::parse(url).map_err(|err| ParsedUrlError::UrlParse(url.to_string(), err))?; let url = GitUrl::try_from(url) - .map_err(|err| ParsedUrlError::GitShaParse(url_in.clone(), err))?; + .map_err(|err| ParsedUrlError::GitShaParse(url_in.to_string(), err))?; Ok(Self { url, subdirectory }) } } @@ -286,22 +304,32 @@ impl TryFrom for ParsedGitUrl { pub struct ParsedArchiveUrl { pub url: Url, pub subdirectory: Option, + pub ext: DistExtension, } impl ParsedArchiveUrl { /// Construct a [`ParsedArchiveUrl`] from a URL requirement source. - pub fn from_source(location: Url, subdirectory: Option) -> Self { + pub fn from_source(location: Url, subdirectory: Option, ext: DistExtension) -> Self { Self { url: location, subdirectory, + ext, } } } -impl From for ParsedArchiveUrl { - fn from(url: Url) -> Self { +impl TryFrom for ParsedArchiveUrl { + type Error = ParsedUrlError; + + fn try_from(url: Url) -> Result { let subdirectory = get_subdirectory(&url); - Self { url, subdirectory } + let ext = DistExtension::from_path(url.path()) + .map_err(|err| ParsedUrlError::MissingExtensionUrl(url.to_string(), err))?; + Ok(Self { + url, + subdirectory, + ext, + }) } } @@ -328,22 +356,22 @@ impl TryFrom for ParsedUrl { "git" => Ok(Self::Git(ParsedGitUrl::try_from(url)?)), "bzr" => Err(ParsedUrlError::UnsupportedUrlPrefix { prefix: prefix.to_string(), - url: url.clone(), + url: url.to_string(), message: "Bazaar is not supported", }), "hg" => Err(ParsedUrlError::UnsupportedUrlPrefix { prefix: prefix.to_string(), - url: url.clone(), + url: url.to_string(), message: "Mercurial is not supported", }), "svn" => Err(ParsedUrlError::UnsupportedUrlPrefix { prefix: prefix.to_string(), - url: url.clone(), + url: url.to_string(), message: "Subversion is not supported", }), _ => Err(ParsedUrlError::UnsupportedUrlPrefix { prefix: prefix.to_string(), - url: url.clone(), + url: url.to_string(), message: "Unknown scheme", }), } @@ -355,7 +383,7 @@ impl TryFrom for ParsedUrl { } else if url.scheme().eq_ignore_ascii_case("file") { let path = url .to_file_path() - .map_err(|()| ParsedUrlError::InvalidFileUrl(url.clone()))?; + .map_err(|()| ParsedUrlError::InvalidFileUrl(url.to_string()))?; let is_dir = if let Ok(metadata) = path.metadata() { metadata.is_dir() } else { @@ -371,12 +399,14 @@ impl TryFrom for ParsedUrl { } else { Ok(Self::Path(ParsedPathUrl { url, + ext: DistExtension::from_path(&path) + .map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?, install_path: path.clone(), lock_path: path, })) } } else { - Ok(Self::Archive(ParsedArchiveUrl::from(url))) + Ok(Self::Archive(ParsedArchiveUrl::try_from(url)?)) } } } diff --git a/crates/pypi-types/src/requirement.rs b/crates/pypi-types/src/requirement.rs index 64daba4e6d736..23162631b9384 100644 --- a/crates/pypi-types/src/requirement.rs +++ b/crates/pypi-types/src/requirement.rs @@ -2,17 +2,18 @@ use std::fmt::{Display, Formatter}; use std::path::{Path, PathBuf}; use std::str::FromStr; -use thiserror::Error; -use url::Url; - +use distribution_filename::DistExtension; use pep440_rs::VersionSpecifiers; use pep508_rs::{MarkerEnvironment, MarkerTree, RequirementOrigin, VerbatimUrl, VersionOrUrl}; +use thiserror::Error; +use url::Url; use uv_fs::PortablePathBuf; use uv_git::{GitReference, GitSha, GitUrl}; use uv_normalize::{ExtraName, PackageName}; use crate::{ - ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, VerbatimParsedUrl, + ParsedArchiveUrl, ParsedDirectoryUrl, ParsedGitUrl, ParsedPathUrl, ParsedUrl, ParsedUrlError, + VerbatimParsedUrl, }; #[derive(Debug, Error)] @@ -20,6 +21,8 @@ pub enum RequirementError { #[error(transparent)] VerbatimUrlError(#[from] pep508_rs::VerbatimUrlError), #[error(transparent)] + ParsedUrlError(#[from] ParsedUrlError), + #[error(transparent)] UrlParseError(#[from] url::ParseError), #[error(transparent)] OidParseError(#[from] uv_git::OidParseError), @@ -95,13 +98,15 @@ impl From for pep508_rs::Requirement { Some(VersionOrUrl::VersionSpecifier(specifier)) } RequirementSource::Url { - subdirectory, location, + subdirectory, + ext, url, } => Some(VersionOrUrl::Url(VerbatimParsedUrl { parsed_url: ParsedUrl::Archive(ParsedArchiveUrl { url: location, subdirectory, + ext, }), verbatim: url, })), @@ -128,12 +133,14 @@ impl From for pep508_rs::Requirement { RequirementSource::Path { install_path, lock_path, + ext, url, } => Some(VersionOrUrl::Url(VerbatimParsedUrl { parsed_url: ParsedUrl::Path(ParsedPathUrl { url: url.to_url(), install_path, lock_path, + ext, }), verbatim: url, })), @@ -259,11 +266,13 @@ pub enum RequirementSource { /// e.g. `foo @ https://example.org/foo-1.0-py3-none-any.whl`, or a source distribution, /// e.g.`foo @ https://example.org/foo-1.0.zip`. Url { + /// The remote location of the archive file, without subdirectory fragment. + location: Url, /// For source distributions, the path to the distribution if it is not in the archive /// root. subdirectory: Option, - /// The remote location of the archive file, without subdirectory fragment. - location: Url, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + ext: DistExtension, /// The PEP 508 style URL in the format /// `:///#subdirectory=`. url: VerbatimUrl, @@ -292,6 +301,8 @@ pub enum RequirementSource { /// which we use for locking. Unlike `given` on the verbatim URL all environment variables /// are resolved, and unlike the install path, we did not yet join it on the base directory. lock_path: PathBuf, + /// The file extension, e.g. `tar.gz`, `zip`, etc. + ext: DistExtension, /// The PEP 508 style URL in the format /// `file:///#subdirectory=`. url: VerbatimUrl, @@ -321,6 +332,7 @@ impl RequirementSource { ParsedUrl::Path(local_file) => RequirementSource::Path { install_path: local_file.install_path.clone(), lock_path: local_file.lock_path.clone(), + ext: local_file.ext, url, }, ParsedUrl::Directory(directory) => RequirementSource::Directory { @@ -340,6 +352,7 @@ impl RequirementSource { url, location: archive.url, subdirectory: archive.subdirectory, + ext: archive.ext, }, } } @@ -347,7 +360,7 @@ impl RequirementSource { /// Construct a [`RequirementSource`] for a URL source, given a URL parsed into components. pub fn from_verbatim_parsed_url(parsed_url: ParsedUrl) -> Self { let verbatim_url = VerbatimUrl::from_url(Url::from(parsed_url.clone())); - RequirementSource::from_parsed_url(parsed_url, verbatim_url) + Self::from_parsed_url(parsed_url, verbatim_url) } /// Convert the source to a [`VerbatimParsedUrl`], if it's a URL source. @@ -355,24 +368,28 @@ impl RequirementSource { match &self { Self::Registry { .. } => None, Self::Url { - subdirectory, location, + subdirectory, + ext, url, } => Some(VerbatimParsedUrl { parsed_url: ParsedUrl::Archive(ParsedArchiveUrl::from_source( location.clone(), subdirectory.clone(), + *ext, )), verbatim: url.clone(), }), Self::Path { install_path, lock_path, + ext, url, } => Some(VerbatimParsedUrl { parsed_url: ParsedUrl::Path(ParsedPathUrl::from_source( install_path.clone(), lock_path.clone(), + *ext, url.to_url(), )), verbatim: url.clone(), @@ -504,6 +521,7 @@ impl From for RequirementSourceWire { RequirementSource::Url { subdirectory, location, + ext: _, url: _, } => Self::Direct { url: location, @@ -564,6 +582,7 @@ impl From for RequirementSourceWire { RequirementSource::Path { install_path, lock_path: _, + ext: _, url: _, } => Self::Path { path: PortablePathBuf::from(install_path), @@ -626,13 +645,17 @@ impl TryFrom for RequirementSource { } RequirementSourceWire::Direct { url, subdirectory } => Ok(Self::Url { url: VerbatimUrl::from_url(url.clone()), - subdirectory: subdirectory.map(PathBuf::from), location: url.clone(), + subdirectory: subdirectory.map(PathBuf::from), + ext: DistExtension::from_path(url.path()) + .map_err(|err| ParsedUrlError::MissingExtensionUrl(url.to_string(), err))?, }), RequirementSourceWire::Path { path } => { let path = PathBuf::from(path); Ok(Self::Path { url: VerbatimUrl::from_path(path.as_path())?, + ext: DistExtension::from_path(path.as_path()) + .map_err(|err| ParsedUrlError::MissingExtensionPath(path.clone(), err))?, install_path: path.clone(), lock_path: path, }) diff --git a/crates/requirements-txt/src/lib.rs b/crates/requirements-txt/src/lib.rs index 4fe254dfa77ba..7c5a17716847d 100644 --- a/crates/requirements-txt/src/lib.rs +++ b/crates/requirements-txt/src/lib.rs @@ -1459,7 +1459,40 @@ mod test { let temp_dir = assert_fs::TempDir::new()?; let requirements_txt = temp_dir.child("requirements.txt"); requirements_txt.write_str(indoc! {" - -e http://localhost:8080/ + -e https://localhost:8080/ + "})?; + + let error = RequirementsTxt::parse( + requirements_txt.path(), + temp_dir.path(), + &BaseClientBuilder::new(), + ) + .await + .unwrap_err(); + let errors = anyhow::Error::new(error).chain().join("\n"); + + let requirement_txt = regex::escape(&requirements_txt.path().user_display().to_string()); + let filters = vec![(requirement_txt.as_str(), "")]; + insta::with_settings!({ + filters => filters + }, { + insta::assert_snapshot!(errors, @r###" + Couldn't parse requirement in `` at position 3 + Expected direct URL (`https://localhost:8080/`) to end in a supported file extension: `.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst` + https://localhost:8080/ + ^^^^^^^^^^^^^^^^^^^^^^^ + "###); + }); + + Ok(()) + } + + #[tokio::test] + async fn unsupported_editable_extension() -> Result<()> { + let temp_dir = assert_fs::TempDir::new()?; + let requirements_txt = temp_dir.child("requirements.txt"); + requirements_txt.write_str(indoc! {" + -e https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.gz "})?; let error = RequirementsTxt::parse( @@ -1478,7 +1511,7 @@ mod test { }, { insta::assert_snapshot!(errors, @r###" Unsupported editable requirement in `` - Editable must refer to a local directory, not an HTTPS URL: `http://localhost:8080/` + Editable must refer to a local directory, not an HTTPS URL: `https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.gz` "###); }); diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-whitespace.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-whitespace.txt.snap index 81071f2364d59..181e74dbe6e51 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-whitespace.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__line-endings-whitespace.txt.snap @@ -36,29 +36,33 @@ RequirementsTxt { version_or_url: Some( Url( VerbatimParsedUrl { - parsed_url: Archive( - ParsedArchiveUrl { - url: Url { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "github.com", + parsed_url: Git( + ParsedGitUrl { + url: GitUrl { + repository: Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "github.com", + ), ), - ), - port: None, - path: "/pandas-dev/pandas", - query: None, - fragment: None, + port: None, + path: "/pandas-dev/pandas.git", + query: None, + fragment: None, + }, + reference: DefaultBranch, + precise: None, }, subdirectory: None, }, ), verbatim: VerbatimUrl { url: Url { - scheme: "https", + scheme: "git+https", cannot_be_a_base: false, username: "", password: None, @@ -68,12 +72,12 @@ RequirementsTxt { ), ), port: None, - path: "/pandas-dev/pandas", + path: "/pandas-dev/pandas.git", query: None, fragment: None, }, given: Some( - "https://github.com/pandas-dev/pandas", + "git+https://github.com/pandas-dev/pandas.git", ), }, }, diff --git a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-whitespace.txt.snap b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-whitespace.txt.snap index 81071f2364d59..181e74dbe6e51 100644 --- a/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-whitespace.txt.snap +++ b/crates/requirements-txt/src/snapshots/requirements_txt__test__parse-whitespace.txt.snap @@ -36,29 +36,33 @@ RequirementsTxt { version_or_url: Some( Url( VerbatimParsedUrl { - parsed_url: Archive( - ParsedArchiveUrl { - url: Url { - scheme: "https", - cannot_be_a_base: false, - username: "", - password: None, - host: Some( - Domain( - "github.com", + parsed_url: Git( + ParsedGitUrl { + url: GitUrl { + repository: Url { + scheme: "https", + cannot_be_a_base: false, + username: "", + password: None, + host: Some( + Domain( + "github.com", + ), ), - ), - port: None, - path: "/pandas-dev/pandas", - query: None, - fragment: None, + port: None, + path: "/pandas-dev/pandas.git", + query: None, + fragment: None, + }, + reference: DefaultBranch, + precise: None, }, subdirectory: None, }, ), verbatim: VerbatimUrl { url: Url { - scheme: "https", + scheme: "git+https", cannot_be_a_base: false, username: "", password: None, @@ -68,12 +72,12 @@ RequirementsTxt { ), ), port: None, - path: "/pandas-dev/pandas", + path: "/pandas-dev/pandas.git", query: None, fragment: None, }, given: Some( - "https://github.com/pandas-dev/pandas", + "git+https://github.com/pandas-dev/pandas.git", ), }, }, diff --git a/crates/requirements-txt/test-data/requirements-txt/whitespace.txt b/crates/requirements-txt/test-data/requirements-txt/whitespace.txt index c1e99017e28a7..63a4a8ba5c1d0 100644 --- a/crates/requirements-txt/test-data/requirements-txt/whitespace.txt +++ b/crates/requirements-txt/test-data/requirements-txt/whitespace.txt @@ -16,7 +16,7 @@ \ -pandas [tabulate] @ https://github.com/pandas-dev/pandas \ +pandas [tabulate] @ git+https://github.com/pandas-dev/pandas.git \ # üh diff --git a/crates/uv-cache/src/lib.rs b/crates/uv-cache/src/lib.rs index eb2c981737063..060c3a8fbf94a 100644 --- a/crates/uv-cache/src/lib.rs +++ b/crates/uv-cache/src/lib.rs @@ -691,7 +691,7 @@ impl CacheBucket { Self::Interpreter => "interpreter-v2", // Note that when bumping this, you'll also need to bump it // in crates/uv/tests/cache_clean.rs. - Self::Simple => "simple-v11", + Self::Simple => "simple-v12", Self::Wheels => "wheels-v1", Self::Archive => "archive-v0", Self::Builds => "builds-v0", diff --git a/crates/uv-client/src/registry_client.rs b/crates/uv-client/src/registry_client.rs index bb6da9c5d02c5..8eb16f8d05f8d 100644 --- a/crates/uv-client/src/registry_client.rs +++ b/crates/uv-client/src/registry_client.rs @@ -718,30 +718,32 @@ impl SimpleMetadata { // Group the distributions by version and kind for file in files { - if let Some(filename) = + let Some(filename) = DistFilename::try_from_filename(file.filename.as_str(), package_name) - { - let version = match filename { - DistFilename::SourceDistFilename(ref inner) => &inner.version, - DistFilename::WheelFilename(ref inner) => &inner.version, - }; - let file = match File::try_from(file, base) { - Ok(file) => file, - Err(err) => { - // Ignore files with unparsable version specifiers. - warn!("Skipping file for {package_name}: {err}"); - continue; - } - }; - match map.entry(version.clone()) { - std::collections::btree_map::Entry::Occupied(mut entry) => { - entry.get_mut().push(filename, file); - } - std::collections::btree_map::Entry::Vacant(entry) => { - let mut files = VersionFiles::default(); - files.push(filename, file); - entry.insert(files); - } + else { + warn!("Skipping file for {package_name}: {}", file.filename); + continue; + }; + let version = match filename { + DistFilename::SourceDistFilename(ref inner) => &inner.version, + DistFilename::WheelFilename(ref inner) => &inner.version, + }; + let file = match File::try_from(file, base) { + Ok(file) => file, + Err(err) => { + // Ignore files with unparsable version specifiers. + warn!("Skipping file for {package_name}: {err}"); + continue; + } + }; + match map.entry(version.clone()) { + std::collections::btree_map::Entry::Occupied(mut entry) => { + entry.get_mut().push(filename, file); + } + std::collections::btree_map::Entry::Vacant(entry) => { + let mut files = VersionFiles::default(); + files.push(filename, file); + entry.insert(files); } } } diff --git a/crates/uv-distribution/src/error.rs b/crates/uv-distribution/src/error.rs index eb64f9292834e..a0497f52af218 100644 --- a/crates/uv-distribution/src/error.rs +++ b/crates/uv-distribution/src/error.rs @@ -7,7 +7,7 @@ use zip::result::ZipError; use crate::metadata::MetadataError; use distribution_filename::WheelFilenameError; use pep440_rs::Version; -use pypi_types::HashDigest; +use pypi_types::{HashDigest, ParsedUrlError}; use uv_client::WrappedReqwestError; use uv_fs::Simplified; use uv_normalize::PackageName; @@ -23,6 +23,8 @@ pub enum Error { #[error("Expected an absolute path, but received: {}", _0.user_display())] RelativePath(PathBuf), #[error(transparent)] + ParsedUrl(#[from] ParsedUrlError), + #[error(transparent)] JoinRelativeUrl(#[from] pypi_types::JoinRelativeError), #[error("Expected a file URL, but received: {0}")] NonFileUrl(Url), diff --git a/crates/uv-distribution/src/metadata/lowering.rs b/crates/uv-distribution/src/metadata/lowering.rs index 0d93fbd5a2c0b..99fe85cde2b55 100644 --- a/crates/uv-distribution/src/metadata/lowering.rs +++ b/crates/uv-distribution/src/metadata/lowering.rs @@ -2,13 +2,13 @@ use std::collections::BTreeMap; use std::io; use std::path::{Path, PathBuf}; +use distribution_filename::DistExtension; use path_absolutize::Absolutize; -use thiserror::Error; -use url::Url; - use pep440_rs::VersionSpecifiers; use pep508_rs::{VerbatimUrl, VersionOrUrl}; -use pypi_types::{Requirement, RequirementSource, VerbatimParsedUrl}; +use pypi_types::{ParsedUrlError, Requirement, RequirementSource, VerbatimParsedUrl}; +use thiserror::Error; +use url::Url; use uv_configuration::PreviewMode; use uv_fs::{relative_to, Simplified}; use uv_git::GitReference; @@ -41,6 +41,8 @@ pub enum LoweringError { WorkspaceFalse, #[error("Editable must refer to a local directory, not a file: `{0}`")] EditableFile(String), + #[error(transparent)] + ParsedUrl(#[from] ParsedUrlError), #[error(transparent)] // Function attaches the context RelativeTo(io::Error), } @@ -155,10 +157,14 @@ pub(crate) fn lower_requirement( verbatim_url.set_fragment(Some(subdirectory)); } + let ext = DistExtension::from_path(url.path()) + .map_err(|err| ParsedUrlError::MissingExtensionUrl(url.to_string(), err))?; + let verbatim_url = VerbatimUrl::from_url(verbatim_url); RequirementSource::Url { location: url, subdirectory: subdirectory.map(PathBuf::from), + ext, url: verbatim_url, } } @@ -290,6 +296,8 @@ fn path_source( Ok(RequirementSource::Path { install_path: absolute_path, lock_path: relative_to_workspace, + ext: DistExtension::from_path(path) + .map_err(|err| ParsedUrlError::MissingExtensionPath(path.to_path_buf(), err))?, url, }) } diff --git a/crates/uv-distribution/src/source/mod.rs b/crates/uv-distribution/src/source/mod.rs index 744e8e8a9602a..54f1d4ed875ba 100644 --- a/crates/uv-distribution/src/source/mod.rs +++ b/crates/uv-distribution/src/source/mod.rs @@ -13,14 +13,14 @@ use tracing::{debug, info_span, instrument, Instrument}; use url::Url; use zip::ZipArchive; -use distribution_filename::WheelFilename; +use distribution_filename::{SourceDistExtension, WheelFilename}; use distribution_types::{ BuildableSource, DirectorySourceUrl, FileLocation, GitSourceUrl, HashPolicy, Hashed, PathSourceUrl, RemoteSource, SourceDist, SourceUrl, }; use install_wheel_rs::metadata::read_archive_metadata; use platform_tags::Tags; -use pypi_types::{HashDigest, Metadata23, ParsedArchiveUrl}; +use pypi_types::{HashDigest, Metadata23}; use uv_cache::{ ArchiveTimestamp, CacheBucket, CacheEntry, CacheShard, CachedByTimestamp, Timestamp, WheelCache, }; @@ -111,6 +111,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &PathSourceUrl { url: &url, path: Cow::Borrowed(path), + ext: dist.ext, }, &cache_shard, tags, @@ -132,6 +133,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &PathSourceUrl { url: &url, path: Cow::Owned(path), + ext: dist.ext, }, &cache_shard, tags, @@ -147,6 +149,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, None, + dist.ext, tags, hashes, client, @@ -156,21 +159,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); - let ParsedArchiveUrl { url, subdirectory } = - ParsedArchiveUrl::from(dist.url.to_url()); // For direct URLs, cache directly under the hash of the URL itself. let cache_shard = self.build_context.cache().shard( CacheBucket::SourceDistributions, - WheelCache::Url(&url).root(), + WheelCache::Url(&dist.url).root(), ); self.url( source, &filename, - &url, + &dist.url, &cache_shard, - subdirectory.as_deref(), + dist.subdirectory.as_deref(), + dist.ext, tags, hashes, client, @@ -208,21 +210,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .url .filename() .expect("Distribution must have a filename"); - let ParsedArchiveUrl { url, subdirectory } = - ParsedArchiveUrl::from(resource.url.clone()); // For direct URLs, cache directly under the hash of the URL itself. let cache_shard = self.build_context.cache().shard( CacheBucket::SourceDistributions, - WheelCache::Url(&url).root(), + WheelCache::Url(resource.url).root(), ); self.url( source, &filename, - &url, + resource.url, &cache_shard, - subdirectory.as_deref(), + resource.subdirectory, + resource.ext, tags, hashes, client, @@ -287,6 +288,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &PathSourceUrl { url: &url, path: Cow::Borrowed(path), + ext: dist.ext, }, &cache_shard, hashes, @@ -307,6 +309,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &PathSourceUrl { url: &url, path: Cow::Owned(path), + ext: dist.ext, }, &cache_shard, hashes, @@ -321,6 +324,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &url, &cache_shard, None, + dist.ext, hashes, client, ) @@ -329,21 +333,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { } BuildableSource::Dist(SourceDist::DirectUrl(dist)) => { let filename = dist.filename().expect("Distribution must have a filename"); - let ParsedArchiveUrl { url, subdirectory } = - ParsedArchiveUrl::from(dist.url.to_url()); // For direct URLs, cache directly under the hash of the URL itself. let cache_shard = self.build_context.cache().shard( CacheBucket::SourceDistributions, - WheelCache::Url(&url).root(), + WheelCache::Url(&dist.url).root(), ); self.url_metadata( source, &filename, - &url, + &dist.url, &cache_shard, - subdirectory.as_deref(), + dist.subdirectory.as_deref(), + dist.ext, hashes, client, ) @@ -374,21 +377,20 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { .url .filename() .expect("Distribution must have a filename"); - let ParsedArchiveUrl { url, subdirectory } = - ParsedArchiveUrl::from(resource.url.clone()); // For direct URLs, cache directly under the hash of the URL itself. let cache_shard = self.build_context.cache().shard( CacheBucket::SourceDistributions, - WheelCache::Url(&url).root(), + WheelCache::Url(resource.url).root(), ); self.url_metadata( source, &filename, - &url, + resource.url, &cache_shard, - subdirectory.as_deref(), + resource.subdirectory, + resource.ext, hashes, client, ) @@ -441,6 +443,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &'data Url, cache_shard: &CacheShard, subdirectory: Option<&'data Path>, + ext: SourceDistExtension, tags: &Tags, hashes: HashPolicy<'_>, client: &ManagedClient<'_>, @@ -449,7 +452,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard, hashes, client) + .url_revision(source, filename, ext, url, cache_shard, hashes, client) .await?; // Before running the build, check that the hashes match. @@ -512,6 +515,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { url: &'data Url, cache_shard: &CacheShard, subdirectory: Option<&'data Path>, + ext: SourceDistExtension, hashes: HashPolicy<'_>, client: &ManagedClient<'_>, ) -> Result { @@ -519,7 +523,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Fetch the revision for the source distribution. let revision = self - .url_revision(source, filename, url, cache_shard, hashes, client) + .url_revision(source, filename, ext, url, cache_shard, hashes, client) .await?; // Before running the build, check that the hashes match. @@ -600,6 +604,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { &self, source: &BuildableSource<'_>, filename: &str, + ext: SourceDistExtension, url: &Url, cache_shard: &CacheShard, hashes: HashPolicy<'_>, @@ -626,7 +631,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { debug!("Downloading source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry(filename); let hashes = self - .download_archive(response, source, filename, entry.path(), hashes) + .download_archive(response, source, filename, ext, entry.path(), hashes) .await?; Ok(revision.with_hashes(hashes)) @@ -859,7 +864,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { debug!("Unpacking source distribution: {source}"); let entry = cache_shard.shard(revision.id()).entry("source"); let hashes = self - .persist_archive(&resource.path, entry.path(), hashes) + .persist_archive(&resource.path, resource.ext, entry.path(), hashes) .await?; let revision = revision.with_hashes(hashes); @@ -1306,6 +1311,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { response: Response, source: &BuildableSource<'_>, filename: &str, + ext: SourceDistExtension, target: &Path, hashes: HashPolicy<'_>, ) -> Result, Error> { @@ -1327,7 +1333,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { // Download and unzip the source distribution into a temporary directory. let span = info_span!("download_source_dist", filename = filename, source_dist = %source); - uv_extract::stream::archive(&mut hasher, filename, temp_dir.path()).await?; + uv_extract::stream::archive(&mut hasher, ext, temp_dir.path()).await?; drop(span); // If necessary, exhaust the reader to compute the hash. @@ -1359,6 +1365,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { async fn persist_archive( &self, path: &Path, + ext: SourceDistExtension, target: &Path, hashes: HashPolicy<'_>, ) -> Result, Error> { @@ -1380,7 +1387,7 @@ impl<'a, T: BuildContext> SourceDistributionBuilder<'a, T> { let mut hasher = uv_extract::hash::HashReader::new(reader, &mut hashers); // Unzip the archive into a temporary directory. - uv_extract::stream::archive(&mut hasher, path, &temp_dir.path()).await?; + uv_extract::stream::archive(&mut hasher, ext, &temp_dir.path()).await?; // If necessary, exhaust the reader to compute the hash. if !hashes.is_none() { diff --git a/crates/uv-extract/Cargo.toml b/crates/uv-extract/Cargo.toml index baf86ffd621c2..70bbb825fc2a7 100644 --- a/crates/uv-extract/Cargo.toml +++ b/crates/uv-extract/Cargo.toml @@ -13,6 +13,7 @@ license = { workspace = true } workspace = true [dependencies] +distribution-filename = { workspace = true } pypi-types = { workspace = true } async-compression = { workspace = true, features = ["bzip2", "gzip", "zstd", "xz"] } diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index ca4eefd358642..b3886beadb6f7 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,13 +1,13 @@ use std::path::Path; use std::pin::Pin; +use crate::Error; +use distribution_filename::SourceDistExtension; use futures::StreamExt; use rustc_hash::FxHashSet; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; use tracing::warn; -use crate::Error; - const DEFAULT_BUF_SIZE: usize = 128 * 1024; /// Unzip a `.zip` archive into the target directory, without requiring `Seek`. @@ -231,77 +231,25 @@ pub async fn untar_xz( /// without requiring `Seek`. pub async fn archive( reader: R, - source: impl AsRef, + ext: SourceDistExtension, target: impl AsRef, ) -> Result<(), Error> { - // `.zip` - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("zip")) - { - unzip(reader, target).await?; - return Ok(()); - } - - // `.tar.gz` - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("gz")) - && source.as_ref().file_stem().is_some_and(|stem| { - Path::new(stem) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) - }) - { - untar_gz(reader, target).await?; - return Ok(()); - } - - // `.tar.bz2` - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("bz2")) - && source.as_ref().file_stem().is_some_and(|stem| { - Path::new(stem) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) - }) - { - untar_bz2(reader, target).await?; - return Ok(()); - } - // `.tar.zst` - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("zst")) - && source.as_ref().file_stem().is_some_and(|stem| { - Path::new(stem) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) - }) - { - untar_zst(reader, target).await?; - return Ok(()); - } - - // `.tar.xz` - if source - .as_ref() - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("xz")) - && source.as_ref().file_stem().is_some_and(|stem| { - Path::new(stem) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("tar")) - }) - { - untar_xz(reader, target).await?; - return Ok(()); + match ext { + SourceDistExtension::Zip => { + unzip(reader, target).await?; + } + SourceDistExtension::TarGz => { + untar_gz(reader, target).await?; + } + SourceDistExtension::TarBz2 => { + untar_bz2(reader, target).await?; + } + SourceDistExtension::TarXz => { + untar_xz(reader, target).await?; + } + SourceDistExtension::TarZst => { + untar_zst(reader, target).await?; + } } - - Err(Error::UnsupportedArchive(source.as_ref().to_path_buf())) + Ok(()) } diff --git a/crates/uv-installer/src/plan.rs b/crates/uv-installer/src/plan.rs index 5c20738849409..e02274edd9a39 100644 --- a/crates/uv-installer/src/plan.rs +++ b/crates/uv-installer/src/plan.rs @@ -1,12 +1,11 @@ use std::collections::hash_map::Entry; -use std::path::Path; use std::str::FromStr; use anyhow::{bail, Result}; use rustc_hash::{FxBuildHasher, FxHashMap}; use tracing::debug; -use distribution_filename::WheelFilename; +use distribution_filename::{DistExtension, WheelFilename}; use distribution_types::{ CachedDirectUrlDist, CachedDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist, Error, GitSourceDist, Hashed, IndexLocations, InstalledDist, Name, PathBuiltDist, @@ -152,86 +151,87 @@ impl<'a> Planner<'a> { } } RequirementSource::Url { - subdirectory, location, + subdirectory, + ext, url, } => { - // Check if we have a wheel or a source distribution. - if Path::new(url.path()) - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - // Validate that the name in the wheel matches that of the requirement. - let filename = WheelFilename::from_str(&url.filename()?)?; - if filename.name != requirement.name { - return Err(Error::PackageNameMismatch( - requirement.name.clone(), - filename.name, - url.verbatim().to_string(), - ) - .into()); - } + match ext { + DistExtension::Wheel => { + // Validate that the name in the wheel matches that of the requirement. + let filename = WheelFilename::from_str(&url.filename()?)?; + if filename.name != requirement.name { + return Err(Error::PackageNameMismatch( + requirement.name.clone(), + filename.name, + url.verbatim().to_string(), + ) + .into()); + } - let wheel = DirectUrlBuiltDist { - filename, - location: location.clone(), - url: url.clone(), - }; + let wheel = DirectUrlBuiltDist { + filename, + location: location.clone(), + url: url.clone(), + }; - if !wheel.filename.is_compatible(tags) { - bail!( + if !wheel.filename.is_compatible(tags) { + bail!( "A URL dependency is incompatible with the current platform: {}", wheel.url ); - } + } - if no_binary { - bail!( + if no_binary { + bail!( "A URL dependency points to a wheel which conflicts with `--no-binary`: {}", wheel.url ); - } + } + + // Find the exact wheel from the cache, since we know the filename in + // advance. + let cache_entry = cache + .shard( + CacheBucket::Wheels, + WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), + ) + .entry(format!("{}.http", wheel.filename.stem())); + + // Read the HTTP pointer. + if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? { + let archive = pointer.into_archive(); + if archive.satisfies(hasher.get(&wheel)) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + cache.archive(&archive.id), + ); - // Find the exact wheel from the cache, since we know the filename in - // advance. - let cache_entry = cache - .shard( - CacheBucket::Wheels, - WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), - ) - .entry(format!("{}.http", wheel.filename.stem())); - - // Read the HTTP pointer. - if let Some(pointer) = HttpArchivePointer::read_from(&cache_entry)? { - let archive = pointer.into_archive(); - if archive.satisfies(hasher.get(&wheel)) { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive.hashes, - cache.archive(&archive.id), - ); - - debug!("URL wheel requirement already cached: {cached_dist}"); + debug!("URL wheel requirement already cached: {cached_dist}"); + cached.push(CachedDist::Url(cached_dist)); + continue; + } + } + } + DistExtension::Source(ext) => { + let sdist = DirectUrlSourceDist { + name: requirement.name.clone(), + location: location.clone(), + subdirectory: subdirectory.clone(), + ext: *ext, + url: url.clone(), + }; + // Find the most-compatible wheel from the cache, since we don't know + // the filename in advance. + if let Some(wheel) = built_index.url(&sdist)? { + let cached_dist = wheel.into_url_dist(url.clone()); + debug!("URL source requirement already cached: {cached_dist}"); cached.push(CachedDist::Url(cached_dist)); continue; } } - } else { - let sdist = DirectUrlSourceDist { - name: requirement.name.clone(), - location: location.clone(), - subdirectory: subdirectory.clone(), - url: url.clone(), - }; - // Find the most-compatible wheel from the cache, since we don't know - // the filename in advance. - if let Some(wheel) = built_index.url(&sdist)? { - let cached_dist = wheel.into_url_dist(url.clone()); - debug!("URL source requirement already cached: {cached_dist}"); - cached.push(CachedDist::Url(cached_dist)); - continue; - } } } RequirementSource::Git { @@ -300,6 +300,7 @@ impl<'a> Planner<'a> { } RequirementSource::Path { + ext, url, install_path, lock_path, @@ -313,84 +314,86 @@ impl<'a> Planner<'a> { Err(err) => return Err(err.into()), }; - // Check if we have a wheel or a source distribution. - if path - .extension() - .is_some_and(|ext| ext.eq_ignore_ascii_case("whl")) - { - // Validate that the name in the wheel matches that of the requirement. - let filename = WheelFilename::from_str(&url.filename()?)?; - if filename.name != requirement.name { - return Err(Error::PackageNameMismatch( - requirement.name.clone(), - filename.name, - url.verbatim().to_string(), - ) - .into()); - } + match ext { + DistExtension::Wheel => { + // Validate that the name in the wheel matches that of the requirement. + let filename = WheelFilename::from_str(&url.filename()?)?; + if filename.name != requirement.name { + return Err(Error::PackageNameMismatch( + requirement.name.clone(), + filename.name, + url.verbatim().to_string(), + ) + .into()); + } - let wheel = PathBuiltDist { - filename, - url: url.clone(), - path, - }; + let wheel = PathBuiltDist { + filename, + url: url.clone(), + path, + }; - if !wheel.filename.is_compatible(tags) { - bail!( + if !wheel.filename.is_compatible(tags) { + bail!( "A path dependency is incompatible with the current platform: {}", wheel.path.user_display() ); - } + } - if no_binary { - bail!( + if no_binary { + bail!( "A path dependency points to a wheel which conflicts with `--no-binary`: {}", wheel.url ); - } - - // Find the exact wheel from the cache, since we know the filename in - // advance. - let cache_entry = cache - .shard( - CacheBucket::Wheels, - WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), - ) - .entry(format!("{}.rev", wheel.filename.stem())); - - if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? { - let timestamp = ArchiveTimestamp::from_file(&wheel.path)?; - if pointer.is_up_to_date(timestamp) { - let archive = pointer.into_archive(); - if archive.satisfies(hasher.get(&wheel)) { - let cached_dist = CachedDirectUrlDist::from_url( - wheel.filename, - wheel.url, - archive.hashes, - cache.archive(&archive.id), - ); + } - debug!("Path wheel requirement already cached: {cached_dist}"); - cached.push(CachedDist::Url(cached_dist)); - continue; + // Find the exact wheel from the cache, since we know the filename in + // advance. + let cache_entry = cache + .shard( + CacheBucket::Wheels, + WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), + ) + .entry(format!("{}.rev", wheel.filename.stem())); + + if let Some(pointer) = LocalArchivePointer::read_from(&cache_entry)? { + let timestamp = ArchiveTimestamp::from_file(&wheel.path)?; + if pointer.is_up_to_date(timestamp) { + let archive = pointer.into_archive(); + if archive.satisfies(hasher.get(&wheel)) { + let cached_dist = CachedDirectUrlDist::from_url( + wheel.filename, + wheel.url, + archive.hashes, + cache.archive(&archive.id), + ); + + debug!( + "Path wheel requirement already cached: {cached_dist}" + ); + cached.push(CachedDist::Url(cached_dist)); + continue; + } } } } - } else { - let sdist = PathSourceDist { - name: requirement.name.clone(), - url: url.clone(), - install_path: path, - lock_path: lock_path.clone(), - }; - - // Find the most-compatible wheel from the cache, since we don't know - // the filename in advance. - if let Some(wheel) = built_index.path(&sdist)? { - let cached_dist = wheel.into_url_dist(url.clone()); - debug!("Path source requirement already cached: {cached_dist}"); - cached.push(CachedDist::Url(cached_dist)); - continue; + DistExtension::Source(ext) => { + let sdist = PathSourceDist { + name: requirement.name.clone(), + url: url.clone(), + install_path: path, + lock_path: lock_path.clone(), + ext: *ext, + }; + + // Find the most-compatible wheel from the cache, since we don't know + // the filename in advance. + if let Some(wheel) = built_index.path(&sdist)? { + let cached_dist = wheel.into_url_dist(url.clone()); + debug!("Path source requirement already cached: {cached_dist}"); + cached.push(CachedDist::Url(cached_dist)); + continue; + } } } } diff --git a/crates/uv-installer/src/satisfies.rs b/crates/uv-installer/src/satisfies.rs index a0d83af0ac67a..1a5aac961fbdc 100644 --- a/crates/uv-installer/src/satisfies.rs +++ b/crates/uv-installer/src/satisfies.rs @@ -44,6 +44,7 @@ impl RequirementSatisfaction { // records `"url": "https://github.com/tqdm/tqdm"` in `direct_url.json`. location: requested_url, subdirectory: requested_subdirectory, + ext: _, url: _, } => { let InstalledDist::Url(InstalledDirectUrlDist { @@ -150,6 +151,7 @@ impl RequirementSatisfaction { RequirementSource::Path { install_path: requested_path, lock_path: _, + ext: _, url: _, } => { let InstalledDist::Url(InstalledDirectUrlDist { direct_url, .. }) = &distribution diff --git a/crates/uv-python/Cargo.toml b/crates/uv-python/Cargo.toml index 7aeb7727c6d45..90d3496ed6e1a 100644 --- a/crates/uv-python/Cargo.toml +++ b/crates/uv-python/Cargo.toml @@ -14,14 +14,15 @@ workspace = true [dependencies] cache-key = { workspace = true } +distribution-filename = { workspace = true } install-wheel-rs = { workspace = true } pep440_rs = { workspace = true } pep508_rs = { workspace = true } platform-tags = { workspace = true } pypi-types = { workspace = true } uv-cache = { workspace = true } -uv-configuration = { workspace = true } uv-client = { workspace = true } +uv-configuration = { workspace = true } uv-extract = { workspace = true } uv-fs = { workspace = true } uv-state = { workspace = true } diff --git a/crates/uv-python/src/downloads.rs b/crates/uv-python/src/downloads.rs index ffea6b7b8348e..eb6a0bff74bf4 100644 --- a/crates/uv-python/src/downloads.rs +++ b/crates/uv-python/src/downloads.rs @@ -5,15 +5,15 @@ use std::pin::Pin; use std::str::FromStr; use std::task::{Context, Poll}; +use distribution_filename::{ExtensionError, SourceDistExtension}; use futures::TryStreamExt; use owo_colors::OwoColorize; +use pypi_types::{HashAlgorithm, HashDigest}; use thiserror::Error; use tokio::io::{AsyncRead, ReadBuf}; use tokio_util::compat::FuturesAsyncReadCompatExt; use tracing::{debug, instrument}; use url::Url; - -use pypi_types::{HashAlgorithm, HashDigest}; use uv_cache::Cache; use uv_client::WrappedReqwestError; use uv_extract::hash::Hasher; @@ -32,6 +32,8 @@ pub enum Error { Io(#[from] io::Error), #[error(transparent)] ImplementationError(#[from] ImplementationError), + #[error("Expected download URL (`{0}`) to end in a supported file extension: {1}")] + MissingExtension(String, ExtensionError), #[error("Invalid Python version: {0}")] InvalidPythonVersion(String), #[error("Invalid request key (too many parts): {0}")] @@ -423,6 +425,8 @@ impl ManagedPythonDownload { } let filename = url.path_segments().unwrap().last().unwrap(); + let ext = SourceDistExtension::from_path(filename) + .map_err(|err| Error::MissingExtension(url.to_string(), err))?; let response = client.get(url.clone()).send().await?; // Ensure the request was successful. @@ -458,12 +462,12 @@ impl ManagedPythonDownload { match progress { Some((&reporter, progress)) => { let mut reader = ProgressReader::new(&mut hasher, progress, reporter); - uv_extract::stream::archive(&mut reader, filename, temp_dir.path()) + uv_extract::stream::archive(&mut reader, ext, temp_dir.path()) .await .map_err(|err| Error::ExtractError(filename.to_string(), err))?; } None => { - uv_extract::stream::archive(&mut hasher, filename, temp_dir.path()) + uv_extract::stream::archive(&mut hasher, ext, temp_dir.path()) .await .map_err(|err| Error::ExtractError(filename.to_string(), err))?; } diff --git a/crates/uv-requirements/src/lookahead.rs b/crates/uv-requirements/src/lookahead.rs index 5e50b02374997..ad171433b411d 100644 --- a/crates/uv-requirements/src/lookahead.rs +++ b/crates/uv-requirements/src/lookahead.rs @@ -247,12 +247,14 @@ fn required_dist(requirement: &Requirement) -> Result, distribution RequirementSource::Url { subdirectory, location, + ext, url, } => Dist::from_http_url( requirement.name.clone(), url.clone(), location.clone(), subdirectory.clone(), + *ext, )?, RequirementSource::Git { repository, @@ -276,12 +278,14 @@ fn required_dist(requirement: &Requirement) -> Result, distribution RequirementSource::Path { install_path, lock_path, + ext, url, } => Dist::from_file_url( requirement.name.clone(), url.clone(), install_path, lock_path, + *ext, )?, RequirementSource::Directory { install_path, diff --git a/crates/uv-requirements/src/unnamed.rs b/crates/uv-requirements/src/unnamed.rs index 8690899b2ac34..0cc2a994c0054 100644 --- a/crates/uv-requirements/src/unnamed.rs +++ b/crates/uv-requirements/src/unnamed.rs @@ -9,7 +9,7 @@ use serde::Deserialize; use tracing::debug; use url::Host; -use distribution_filename::{SourceDistFilename, WheelFilename}; +use distribution_filename::{DistExtension, SourceDistFilename, WheelFilename}; use distribution_types::{ BuildableSource, DirectSourceUrl, DirectorySourceUrl, GitSourceUrl, PathSourceUrl, RemoteSource, SourceUrl, UnresolvedRequirement, UnresolvedRequirementSpecification, VersionId, @@ -260,13 +260,28 @@ impl<'a, Context: BuildContext> NamedRequirementsResolver<'a, Context> { editable: parsed_directory_url.editable, }) } - ParsedUrl::Path(parsed_path_url) => SourceUrl::Path(PathSourceUrl { - url: &requirement.url.verbatim, - path: Cow::Borrowed(&parsed_path_url.install_path), - }), - ParsedUrl::Archive(parsed_archive_url) => SourceUrl::Direct(DirectSourceUrl { - url: &parsed_archive_url.url, - }), + ParsedUrl::Path(parsed_path_url) => { + let ext = match parsed_path_url.ext { + DistExtension::Source(ext) => ext, + DistExtension::Wheel => unreachable!(), + }; + SourceUrl::Path(PathSourceUrl { + url: &requirement.url.verbatim, + path: Cow::Borrowed(&parsed_path_url.install_path), + ext, + }) + } + ParsedUrl::Archive(parsed_archive_url) => { + let ext = match parsed_archive_url.ext { + DistExtension::Source(ext) => ext, + DistExtension::Wheel => unreachable!(), + }; + SourceUrl::Direct(DirectSourceUrl { + url: &parsed_archive_url.url, + subdirectory: parsed_archive_url.subdirectory.as_deref(), + ext, + }) + } ParsedUrl::Git(parsed_git_url) => SourceUrl::Git(GitSourceUrl { url: &requirement.url.verbatim, git: &parsed_git_url.url, diff --git a/crates/uv-resolver/src/error.rs b/crates/uv-resolver/src/error.rs index d011b46d3f056..e8676befb4568 100644 --- a/crates/uv-resolver/src/error.rs +++ b/crates/uv-resolver/src/error.rs @@ -65,6 +65,9 @@ pub enum ResolveError { #[error(transparent)] DistributionType(#[from] distribution_types::Error), + #[error(transparent)] + ParsedUrl(#[from] pypi_types::ParsedUrlError), + #[error("Failed to download `{0}`")] Fetch(Box, #[source] uv_distribution::Error), diff --git a/crates/uv-resolver/src/flat_index.rs b/crates/uv-resolver/src/flat_index.rs index 7fa07fc6b0fc5..2025b37d24ebb 100644 --- a/crates/uv-resolver/src/flat_index.rs +++ b/crates/uv-resolver/src/flat_index.rs @@ -96,6 +96,7 @@ impl FlatIndex { let dist = RegistrySourceDist { name: filename.name.clone(), version: filename.version.clone(), + ext: filename.extension, file: Box::new(file), index, wheels: vec![], diff --git a/crates/uv-resolver/src/lock.rs b/crates/uv-resolver/src/lock.rs index d3c50cca9ff52..82ae1a7b1376f 100644 --- a/crates/uv-resolver/src/lock.rs +++ b/crates/uv-resolver/src/lock.rs @@ -15,7 +15,7 @@ use toml_edit::{value, Array, ArrayOfTables, InlineTable, Item, Table, Value}; use url::Url; use cache_key::RepositoryUrl; -use distribution_filename::WheelFilename; +use distribution_filename::{DistExtension, ExtensionError, SourceDistExtension, WheelFilename}; use distribution_types::{ BuiltDist, DirectUrlBuiltDist, DirectUrlSourceDist, DirectorySourceDist, Dist, DistributionMetadata, FileLocation, GitSourceDist, HashComparison, IndexUrl, Name, @@ -791,6 +791,7 @@ impl Package { let url = Url::from(ParsedArchiveUrl { url: url.to_url(), subdirectory: direct.subdirectory.as_ref().map(PathBuf::from), + ext: DistExtension::Wheel, }); let direct_dist = DirectUrlBuiltDist { filename, @@ -843,6 +844,7 @@ impl Package { url: verbatim_url(workspace_root.join(path), &self.id)?, install_path: workspace_root.join(path), lock_path: path.clone(), + ext: SourceDistExtension::from_path(path)?, }; distribution_types::SourceDist::Path(path_dist) } @@ -895,14 +897,18 @@ impl Package { distribution_types::SourceDist::Git(git_dist) } Source::Direct(url, direct) => { + let ext = SourceDistExtension::from_path(url.as_ref())?; + let subdirectory = direct.subdirectory.as_ref().map(PathBuf::from); let url = Url::from(ParsedArchiveUrl { url: url.to_url(), - subdirectory: direct.subdirectory.as_ref().map(PathBuf::from), + subdirectory: subdirectory.clone(), + ext: DistExtension::Source(ext), }); let direct_dist = DirectUrlSourceDist { name: self.id.name.clone(), location: url.clone(), - subdirectory: direct.subdirectory.as_ref().map(PathBuf::from), + subdirectory: subdirectory.clone(), + ext, url: VerbatimUrl::from_url(url), }; distribution_types::SourceDist::DirectUrl(direct_dist) @@ -920,6 +926,7 @@ impl Package { .ok_or_else(|| LockErrorKind::MissingFilename { id: self.id.clone(), })?; + let ext = SourceDistExtension::from_path(filename.as_ref())?; let file = Box::new(distribution_types::File { dist_info_metadata: false, filename: filename.to_string(), @@ -939,6 +946,7 @@ impl Package { name: self.id.name.clone(), version: self.id.version.clone(), file, + ext, index, wheels: vec![], }; @@ -2232,6 +2240,7 @@ impl Dependency { let parsed_url = ParsedUrl::Archive(ParsedArchiveUrl { url: url.to_url(), subdirectory: direct.subdirectory.as_ref().map(PathBuf::from), + ext: DistExtension::from_path(url.as_ref())?, }); RequirementSource::from_verbatim_parsed_url(parsed_url) } @@ -2239,6 +2248,7 @@ impl Dependency { lock_path: path.clone(), install_path: workspace_root.join(path), url: verbatim_url(workspace_root.join(path), &self.package_id)?, + ext: DistExtension::from_path(path)?, }, Source::Directory(ref path) => RequirementSource::Directory { editable: false, @@ -2459,6 +2469,10 @@ enum LockErrorKind { #[source] ToUrlError, ), + /// An error that occurs when the extension can't be determined + /// for a given wheel or source distribution. + #[error("failed to parse file extension; expected one of: {0}")] + MissingExtension(#[from] ExtensionError), /// Failed to parse a git source URL. #[error("failed to parse source git URL")] InvalidGitSourceUrl( diff --git a/crates/uv-resolver/src/pubgrub/dependencies.rs b/crates/uv-resolver/src/pubgrub/dependencies.rs index 72a0a59320350..ae59ac98ff7ac 100644 --- a/crates/uv-resolver/src/pubgrub/dependencies.rs +++ b/crates/uv-resolver/src/pubgrub/dependencies.rs @@ -106,11 +106,13 @@ impl PubGrubRequirement { RequirementSource::Url { subdirectory, location, + ext, url, } => { let parsed_url = ParsedUrl::Archive(ParsedArchiveUrl::from_source( location.clone(), subdirectory.clone(), + *ext, )); (url, parsed_url) } @@ -130,6 +132,7 @@ impl PubGrubRequirement { (url, parsed_url) } RequirementSource::Path { + ext, url, install_path, lock_path, @@ -137,6 +140,7 @@ impl PubGrubRequirement { let parsed_url = ParsedUrl::Path(ParsedPathUrl::from_source( install_path.clone(), lock_path.clone(), + *ext, url.to_url(), )); (url, parsed_url) diff --git a/crates/uv-resolver/src/version_map.rs b/crates/uv-resolver/src/version_map.rs index 0b8f3cd972a37..990e353f444c2 100644 --- a/crates/uv-resolver/src/version_map.rs +++ b/crates/uv-resolver/src/version_map.rs @@ -391,6 +391,7 @@ impl VersionMapLazy { let dist = RegistrySourceDist { name: filename.name.clone(), version: filename.version.clone(), + ext: filename.extension, file: Box::new(file), index: self.index.clone(), wheels: vec![], diff --git a/crates/uv/tests/cache_clean.rs b/crates/uv/tests/cache_clean.rs index fea96cb60c009..ca3da3741265d 100644 --- a/crates/uv/tests/cache_clean.rs +++ b/crates/uv/tests/cache_clean.rs @@ -57,7 +57,7 @@ fn clean_package_pypi() -> Result<()> { // Assert that the `.rkyv` file is created for `iniconfig`. let rkyv = context .cache_dir - .child("simple-v11") + .child("simple-v12") .child("pypi") .child("iniconfig.rkyv"); assert!( @@ -104,7 +104,7 @@ fn clean_package_index() -> Result<()> { // Assert that the `.rkyv` file is created for `iniconfig`. let rkyv = context .cache_dir - .child("simple-v11") + .child("simple-v12") .child("index") .child("e8208120cae3ba69") .child("iniconfig.rkyv"); diff --git a/crates/uv/tests/pip_compile.rs b/crates/uv/tests/pip_compile.rs index 3de1c8b86a623..3df9e076edfed 100644 --- a/crates/uv/tests/pip_compile.rs +++ b/crates/uv/tests/pip_compile.rs @@ -11482,7 +11482,7 @@ fn tool_uv_sources() -> Result<()> { "boltons==24.0.0" ] dont_install_me = [ - "broken @ https://example.org/does/not/exist" + "broken @ https://example.org/does/not/exist.tar.gz" ] [tool.uv.sources] @@ -11540,6 +11540,66 @@ fn tool_uv_sources() -> Result<()> { Ok(()) } +#[test] +fn invalid_tool_uv_sources() -> Result<()> { + let context = TestContext::new("3.12"); + + // Write an invalid extension on a PEP 508 URL. + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str(indoc! {r#" + [project] + name = "project" + version = "0.0.0" + dependencies = [ + "urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.tar.baz", + ] + "#})?; + + uv_snapshot!(context.filters(), context.pip_compile() + .arg("--preview") + .arg(context.temp_dir.join("pyproject.toml")), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to parse metadata from built wheel + Caused by: Expected direct URL (`https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.tar.baz`) to end in a supported file extension: `.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst` + urllib3 @ https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.tar.baz + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "### + ); + + // Write an invalid extension on a source. + let pyproject_toml = context.temp_dir.child("pyproject.toml"); + pyproject_toml.write_str(indoc! {r#" + [project] + name = "project" + version = "0.0.0" + dependencies = [ + "urllib3", + ] + + [tool.uv.sources] + urllib3 = { url = "https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.tar.baz" } + "#})?; + + uv_snapshot!(context.filters(), context.pip_compile() + .arg("--preview") + .arg(context.temp_dir.join("pyproject.toml")), @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to parse entry for: `urllib3` + Caused by: Expected direct URL (`https://files.pythonhosted.org/packages/a2/73/a68704750a7679d0b6d3ad7aa8d4da8e14e151ae82e6fee774e6e0d05ec8/urllib3-2.2.1-py3-none-any.tar.baz`) to end in a supported file extension: `.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst` + "### + ); + + Ok(()) +} + /// Check that a dynamic `pyproject.toml` is supported a compile input file. #[test] fn dynamic_pyproject_toml() -> Result<()> { diff --git a/crates/uv/tests/pip_install.rs b/crates/uv/tests/pip_install.rs index 433ff0a97a5fd..fa7b4716e3b30 100644 --- a/crates/uv/tests/pip_install.rs +++ b/crates/uv/tests/pip_install.rs @@ -5676,7 +5676,7 @@ fn tool_uv_sources() -> Result<()> { "boltons==24.0.0" ] dont_install_me = [ - "broken @ https://example.org/does/not/exist" + "broken @ https://example.org/does/not/exist.tar.gz" ] [tool.uv.sources] @@ -6417,3 +6417,43 @@ fn install_build_isolation_package() -> Result<()> { Ok(()) } + +/// Install a package with an unsupported extension. +#[test] +fn invalid_extension() { + let context = TestContext::new("3.8"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.baz") + , @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to parse: `ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.baz` + Caused by: Expected direct URL (`https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.baz`) to end in a supported file extension: `.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst` + ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6.tar.baz + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "###); +} + +/// Install a package without unsupported extension. +#[test] +fn no_extension() { + let context = TestContext::new("3.8"); + + uv_snapshot!(context.filters(), context.pip_install() + .arg("ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6") + , @r###" + success: false + exit_code: 2 + ----- stdout ----- + + ----- stderr ----- + error: Failed to parse: `ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6` + Caused by: Expected direct URL (`https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6`) to end in a supported file extension: `.whl`, `.zip`, `.tar.gz`, `.tar.bz2`, `.tar.xz`, or `.tar.zst` + ruff @ https://files.pythonhosted.org/packages/f7/69/96766da2cdb5605e6a31ef2734aff0be17901cefb385b885c2ab88896d76/ruff-0.5.6 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + "###); +}