diff --git a/crates/install-wheel-rs/src/metadata.rs b/crates/install-wheel-rs/src/metadata.rs index 5129ad097a54..383fea2207de 100644 --- a/crates/install-wheel-rs/src/metadata.rs +++ b/crates/install-wheel-rs/src/metadata.rs @@ -7,7 +7,7 @@ use zip::ZipArchive; use distribution_filename::WheelFilename; use pep440_rs::Version; -use uv_normalize::PackageName; +use uv_normalize::DistInfoName; use crate::Error; @@ -50,16 +50,19 @@ pub fn find_archive_dist_info<'a, T: Copy>( // Like `pip`, validate that the `.dist-info` directory is prefixed with the canonical // package name, but only warn if the version is not the normalized version. - let Some((name, version)) = dist_info_prefix.rsplit_once('-') else { - return Err(Error::MissingDistInfoSegments(dist_info_prefix.to_string())); - }; - if PackageName::from_str(name)? != filename.name { + let normalized_prefix = DistInfoName::new(dist_info_prefix); + let Some(rest) = normalized_prefix + .as_ref() + .strip_prefix(filename.name.as_str()) + else { return Err(Error::MissingDistInfoPackageName( dist_info_prefix.to_string(), filename.name.to_string(), )); - } - if !Version::from_str(version).is_ok_and(|version| version == filename.version) { + }; + if !rest.strip_prefix('-').is_some_and(|version| { + Version::from_str(version).is_ok_and(|version| version == filename.version) + }) { warn!( "{}", Error::MissingDistInfoVersion( @@ -87,16 +90,19 @@ pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> Result +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct DistInfoName<'a>(Cow<'a, str>); + +impl<'a> DistInfoName<'a> { + /// Create a validated, normalized extra name. + pub fn new(name: &'a str) -> Self { + if Self::is_normalized(name) { + Self(Cow::Borrowed(name)) + } else { + Self(Cow::Owned(Self::normalize(name))) + } + } + + /// Normalize a `.dist-info` name, converting it to lowercase and collapsing runs + /// of `-`, `_`, and `.` down to a single `-`. + fn normalize(name: impl AsRef) -> String { + let mut normalized = String::with_capacity(name.as_ref().len()); + let mut last = None; + for char in name.as_ref().bytes() { + match char { + b'A'..=b'Z' => { + normalized.push(char.to_ascii_lowercase() as char); + } + b'-' | b'_' | b'.' => { + if matches!(last, Some(b'-' | b'_' | b'.')) { + continue; + } + normalized.push('-'); + } + _ => { + normalized.push(char as char); + } + } + last = Some(char); + } + normalized + } + + /// Returns `true` if the name is already normalized. + fn is_normalized(name: impl AsRef) -> bool { + let mut last = None; + for char in name.as_ref().bytes() { + match char { + b'A'..=b'Z' => { + // Uppercase characters need to be converted to lowercase. + return false; + } + b'_' | b'.' => { + // `_` and `.` are normalized to `-`. + return false; + } + b'-' => { + if matches!(last, Some(b'-')) { + // Runs of `-` are normalized to a single `-`. + return false; + } + } + _ => {} + } + last = Some(char); + } + true + } +} + +impl Display for DistInfoName<'_> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +impl AsRef for DistInfoName<'_> { + fn as_ref(&self) -> &str { + &self.0 + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn normalize() { + let inputs = [ + "friendly-bard", + "Friendly-Bard", + "FRIENDLY-BARD", + "friendly.bard", + "friendly_bard", + "friendly--bard", + "friendly-.bard", + "FrIeNdLy-._.-bArD", + ]; + for input in inputs { + assert_eq!(DistInfoName::normalize(input), "friendly-bard"); + } + } +} diff --git a/crates/uv-normalize/src/lib.rs b/crates/uv-normalize/src/lib.rs index 38fdb5d51a6e..0360a5a1a161 100644 --- a/crates/uv-normalize/src/lib.rs +++ b/crates/uv-normalize/src/lib.rs @@ -1,10 +1,12 @@ use std::error::Error; use std::fmt::{Display, Formatter}; +pub use dist_info_name::DistInfoName; pub use extra_name::ExtraName; pub use group_name::{GroupName, DEV_DEPENDENCIES}; pub use package_name::PackageName; +mod dist_info_name; mod extra_name; mod group_name; mod package_name; @@ -22,10 +24,11 @@ pub(crate) fn validate_and_normalize_owned(name: String) -> Result, ) -> Result { - let mut normalized = String::with_capacity(name.as_ref().len()); + let name = name.as_ref(); + let mut normalized = String::with_capacity(name.len()); let mut last = None; - for char in name.as_ref().bytes() { + for char in name.bytes() { match char { b'A'..=b'Z' => { normalized.push(char.to_ascii_lowercase() as char); @@ -36,19 +39,19 @@ pub(crate) fn validate_and_normalize_ref( b'-' | b'_' | b'.' => { match last { // Names can't start with punctuation. - None => return Err(InvalidNameError(name.as_ref().to_string())), + None => return Err(InvalidNameError(name.to_string())), Some(b'-' | b'_' | b'.') => {} Some(_) => normalized.push('-'), } } - _ => return Err(InvalidNameError(name.as_ref().to_string())), + _ => return Err(InvalidNameError(name.to_string())), } last = Some(char); } // Names can't end with punctuation. if matches!(last, Some(b'-' | b'_' | b'.')) { - return Err(InvalidNameError(name.as_ref().to_string())); + return Err(InvalidNameError(name.to_string())); } Ok(normalized)