From b6ab919945bea94210d07de92bcb17c3fd344523 Mon Sep 17 00:00:00 2001 From: Charlie Marsh <charlie.r.marsh@gmail.com> Date: Wed, 27 Mar 2024 20:06:25 -0400 Subject: [PATCH] Make tags non-required for fetching wheel metadata (#2700) ## Summary This looks like a big change but it really isn't. Rather, I just split `get_or_build_wheel` into separate `get_wheel` and `build_wheel` methods internally, which made `get_or_build_wheel_metadata` capable of _not_ relying on `Tags`, which in turn makes it easier for us to use the `DistributionDatabase` in various places without having it coupled to an interpreter or environment (something we already did for `SourceDistributionBuilder`). --- .../src/distribution_database.rs | 293 +++++++++--------- crates/uv-installer/src/downloader.rs | 15 +- crates/uv-resolver/src/resolver/mod.rs | 2 +- 3 files changed, 152 insertions(+), 158 deletions(-) diff --git a/crates/uv-distribution/src/distribution_database.rs b/crates/uv-distribution/src/distribution_database.rs index 6f9d1be0b501..8d92e8db25f2 100644 --- a/crates/uv-distribution/src/distribution_database.rs +++ b/crates/uv-distribution/src/distribution_database.rs @@ -11,7 +11,7 @@ use url::Url; use distribution_filename::WheelFilename; use distribution_types::{ - BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, + BuildableSource, BuiltDist, Dist, FileLocation, IndexLocations, LocalEditable, Name, SourceDist, }; use platform_tags::Tags; use pypi_types::Metadata23; @@ -40,24 +40,17 @@ pub struct DistributionDatabase<'a, Context: BuildContext + Send + Sync> { cache: &'a Cache, reporter: Option<Arc<dyn Reporter>>, locks: Arc<Locks>, - tags: &'a Tags, client: &'a RegistryClient, build_context: &'a Context, builder: SourceDistCachedBuilder<'a, Context>, } impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> { - pub fn new( - cache: &'a Cache, - tags: &'a Tags, - client: &'a RegistryClient, - build_context: &'a Context, - ) -> Self { + pub fn new(cache: &'a Cache, client: &'a RegistryClient, build_context: &'a Context) -> Self { Self { cache, reporter: None, locks: Arc::new(Locks::default()), - tags, client, build_context, builder: SourceDistCachedBuilder::new(build_context, client), @@ -94,18 +87,110 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> /// If `no_remote_wheel` is set, the wheel will be built from a source distribution /// even if compatible pre-built wheels are available. #[instrument(skip(self))] - pub async fn get_or_build_wheel(&self, dist: Dist) -> Result<LocalWheel, Error> { + pub async fn get_or_build_wheel(&self, dist: &Dist, tags: &Tags) -> Result<LocalWheel, Error> { + match dist { + Dist::Built(built) => self.get_wheel(built).await, + Dist::Source(source) => self.build_wheel(source, tags).await, + } + } + + /// Either fetch the only wheel metadata (directly from the index or with range requests) or + /// fetch and build the source distribution. + /// + /// Returns the [`Metadata23`], along with a "precise" URL for the source distribution, if + /// possible. For example, given a Git dependency with a reference to a branch or tag, return a + /// URL with a precise reference to the current commit of that branch or tag. + #[instrument(skip_all, fields(%dist))] + pub async fn get_or_build_wheel_metadata( + &self, + dist: &Dist, + ) -> Result<(Metadata23, Option<Url>), Error> { + match dist { + Dist::Built(built_dist) => { + match self.client.wheel_metadata(built_dist).boxed().await { + Ok(metadata) => Ok((metadata, None)), + Err(err) if err.is_http_streaming_unsupported() => { + warn!("Streaming unsupported when fetching metadata for {built_dist}; downloading wheel directly ({err})"); + + // If the request failed due to an error that could be resolved by + // downloading the wheel directly, try that. + let wheel = self.get_wheel(built_dist).await?; + Ok((wheel.metadata()?, None)) + } + Err(err) => Err(err.into()), + } + } + Dist::Source(source_dist) => { + let no_build = match self.build_context.no_build() { + NoBuild::All => true, + NoBuild::None => false, + NoBuild::Packages(packages) => packages.contains(source_dist.name()), + }; + + // Optimization: Skip source dist download when we must not build them anyway. + if no_build { + return Err(Error::NoBuild); + } + + let lock = self.locks.acquire(dist).await; + let _guard = lock.lock().await; + + // Insert the `precise` URL, if it exists. + let precise = resolve_precise( + source_dist, + self.build_context.cache(), + self.reporter.as_ref(), + ) + .await?; + + let source_dist = match precise.as_ref() { + Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())), + None => Cow::Borrowed(source_dist), + }; + + let metadata = self + .builder + .download_and_build_metadata(&BuildableSource::Dist(&source_dist)) + .boxed() + .await?; + Ok((metadata, precise)) + } + } + } + + /// Build a directory into an editable wheel. + pub async fn build_wheel_editable( + &self, + editable: &LocalEditable, + editable_wheel_dir: &Path, + ) -> Result<(LocalWheel, Metadata23), Error> { + let (dist, disk_filename, filename, metadata) = self + .builder + .build_editable(editable, editable_wheel_dir) + .await?; + + let built_wheel = BuiltWheel { + dist, + filename, + path: editable_wheel_dir.join(disk_filename), + target: editable_wheel_dir.join(cache_key::digest(&editable.path)), + }; + Ok((LocalWheel::Built(built_wheel), metadata)) + } + + /// Fetch a wheel from the cache or download it from the index. + async fn get_wheel(&self, dist: &BuiltDist) -> Result<LocalWheel, Error> { let no_binary = match self.build_context.no_binary() { NoBinary::None => false, NoBinary::All => true, NoBinary::Packages(packages) => packages.contains(dist.name()), }; - match &dist { - Dist::Built(BuiltDist::Registry(wheel)) => { - if no_binary { - return Err(Error::NoBinary); - } + if no_binary { + return Err(Error::NoBinary); + } + match dist { + BuiltDist::Registry(wheel) => { let url = match &wheel.file.url { FileLocation::RelativeUrl(base, url) => { pypi_types::base_url_join_relative(base, url)? @@ -132,7 +217,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .map_err(Error::CacheRead)? { return Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })); @@ -144,7 +229,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Otherwise, unzip the file. return Ok(LocalWheel::Disk(DiskWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), path: path.clone(), target: cache_entry.into_path_buf(), filename: wheel.filename.clone(), @@ -161,11 +246,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, &dist) + .stream_wheel(url.clone(), &wheel.filename, &wheel_entry, dist) .await { Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })), @@ -177,10 +262,10 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // If the request failed because streaming is unsupported, download the // wheel directly. let archive = self - .download_wheel(url, &wheel.filename, &wheel_entry, &dist) + .download_wheel(url, &wheel.filename, &wheel_entry, dist) .await?; Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })) @@ -189,11 +274,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } } - Dist::Built(BuiltDist::DirectUrl(wheel)) => { - if no_binary { - return Err(Error::NoBinary); - } - + BuiltDist::DirectUrl(wheel) => { // Create a cache entry for the wheel. let wheel_entry = self.cache.entry( CacheBucket::Wheels, @@ -203,16 +284,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> // Download and unzip. match self - .stream_wheel( - wheel.url.raw().clone(), - &wheel.filename, - &wheel_entry, - &dist, - ) + .stream_wheel(wheel.url.raw().clone(), &wheel.filename, &wheel_entry, dist) .await { Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })), @@ -228,11 +304,11 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> wheel.url.raw().clone(), &wheel.filename, &wheel_entry, - &dist, + dist, ) .await?; Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })) @@ -241,11 +317,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } } - Dist::Built(BuiltDist::Path(wheel)) => { - if no_binary { - return Err(Error::NoBinary); - } - + BuiltDist::Path(wheel) => { let cache_entry = self.cache.entry( CacheBucket::Wheels, WheelCache::Url(&wheel.url).wheel_dir(wheel.name().as_ref()), @@ -263,7 +335,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> .map_err(Error::CacheRead)? { return Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), archive, filename: wheel.filename.clone(), })); @@ -274,126 +346,45 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> } Ok(LocalWheel::Disk(DiskWheel { - dist: dist.clone(), + dist: Dist::Built(dist.clone()), path: wheel.path.clone(), target: cache_entry.into_path_buf(), filename: wheel.filename.clone(), })) } - - Dist::Source(source_dist) => { - let lock = self.locks.acquire(&dist).await; - let _guard = lock.lock().await; - - let built_wheel = self - .builder - .download_and_build(&BuildableSource::Dist(source_dist), self.tags) - .boxed() - .await?; - - // If the wheel was unzipped previously, respect it. Source distributions are - // cached under a unique build ID, so unzipped directories are never stale. - match built_wheel.target.canonicalize() { - Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel { - dist: dist.clone(), - archive, - filename: built_wheel.filename, - })), - Err(err) if err.kind() == io::ErrorKind::NotFound => { - Ok(LocalWheel::Built(BuiltWheel { - dist: dist.clone(), - path: built_wheel.path, - target: built_wheel.target, - filename: built_wheel.filename, - })) - } - Err(err) => return Err(Error::CacheRead(err)), - } - } } } - /// Either fetch the only wheel metadata (directly from the index or with range requests) or - /// fetch and build the source distribution. - /// - /// Returns the [`Metadata23`], along with a "precise" URL for the source distribution, if - /// possible. For example, given a Git dependency with a reference to a branch or tag, return a - /// URL with a precise reference to the current commit of that branch or tag. - #[instrument(skip_all, fields(%dist))] - pub async fn get_or_build_wheel_metadata( - &self, - dist: &Dist, - ) -> Result<(Metadata23, Option<Url>), Error> { - match dist { - Dist::Built(built_dist) => { - match self.client.wheel_metadata(built_dist).boxed().await { - Ok(metadata) => Ok((metadata, None)), - Err(err) if err.is_http_streaming_unsupported() => { - warn!("Streaming unsupported when fetching metadata for {dist}; downloading wheel directly ({err})"); - - // If the request failed due to an error that could be resolved by - // downloading the wheel directly, try that. - let wheel = self.get_or_build_wheel(dist.clone()).await?; - Ok((wheel.metadata()?, None)) - } - Err(err) => Err(err.into()), - } - } - Dist::Source(source_dist) => { - let no_build = match self.build_context.no_build() { - NoBuild::All => true, - NoBuild::None => false, - NoBuild::Packages(packages) => packages.contains(source_dist.name()), - }; - // Optimization: Skip source dist download when we must not build them anyway. - if no_build { - return Err(Error::NoBuild); - } - - let lock = self.locks.acquire(dist).await; - let _guard = lock.lock().await; - - // Insert the `precise` URL, if it exists. - let precise = resolve_precise( - source_dist, - self.build_context.cache(), - self.reporter.as_ref(), - ) - .await?; - - let source_dist = match precise.as_ref() { - Some(url) => Cow::Owned(source_dist.clone().with_url(url.clone())), - None => Cow::Borrowed(source_dist), - }; + /// Convert a source distribution into a wheel, fetching it from the cache or building it if + /// necessary. + async fn build_wheel(&self, dist: &SourceDist, tags: &Tags) -> Result<LocalWheel, Error> { + let lock = self.locks.acquire(&Dist::Source(dist.clone())).await; + let _guard = lock.lock().await; - let metadata = self - .builder - .download_and_build_metadata(&BuildableSource::Dist(&source_dist)) - .boxed() - .await?; - Ok((metadata, precise)) - } - } - } - - /// Build a directory into an editable wheel. - pub async fn build_wheel_editable( - &self, - editable: &LocalEditable, - editable_wheel_dir: &Path, - ) -> Result<(LocalWheel, Metadata23), Error> { - let (dist, disk_filename, filename, metadata) = self + let built_wheel = self .builder - .build_editable(editable, editable_wheel_dir) + .download_and_build(&BuildableSource::Dist(dist), tags) + .boxed() .await?; - let built_wheel = BuiltWheel { - dist, - filename, - path: editable_wheel_dir.join(disk_filename), - target: editable_wheel_dir.join(cache_key::digest(&editable.path)), - }; - Ok((LocalWheel::Built(built_wheel), metadata)) + // If the wheel was unzipped previously, respect it. Source distributions are + // cached under a unique build ID, so unzipped directories are never stale. + match built_wheel.target.canonicalize() { + Ok(archive) => Ok(LocalWheel::Unzipped(UnzippedWheel { + dist: Dist::Source(dist.clone()), + archive, + filename: built_wheel.filename, + })), + Err(err) if err.kind() == io::ErrorKind::NotFound => { + Ok(LocalWheel::Built(BuiltWheel { + dist: Dist::Source(dist.clone()), + path: built_wheel.path, + target: built_wheel.target, + filename: built_wheel.filename, + })) + } + Err(err) => Err(Error::CacheRead(err)), + } } /// Stream a wheel from a URL, unzipping it into the cache as it's downloaded. @@ -402,7 +393,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> url: Url, filename: &WheelFilename, wheel_entry: &CacheEntry, - dist: &Dist, + dist: &BuiltDist, ) -> Result<PathBuf, Error> { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); @@ -470,7 +461,7 @@ impl<'a, Context: BuildContext + Send + Sync> DistributionDatabase<'a, Context> url: Url, filename: &WheelFilename, wheel_entry: &CacheEntry, - dist: &Dist, + dist: &BuiltDist, ) -> Result<PathBuf, Error> { // Create an entry for the HTTP cache. let http_entry = wheel_entry.with_file(format!("{}.http", filename.stem())); diff --git a/crates/uv-installer/src/downloader.rs b/crates/uv-installer/src/downloader.rs index 8e140d2418c3..7ee10205e91c 100644 --- a/crates/uv-installer/src/downloader.rs +++ b/crates/uv-installer/src/downloader.rs @@ -38,8 +38,9 @@ pub enum Error { /// Download, build, and unzip a set of distributions. pub struct Downloader<'a, Context: BuildContext + Send + Sync> { - database: DistributionDatabase<'a, Context>, + tags: &'a Tags, cache: &'a Cache, + database: DistributionDatabase<'a, Context>, reporter: Option<Arc<dyn Reporter>>, } @@ -51,9 +52,10 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { build_context: &'a Context, ) -> Self { Self { - database: DistributionDatabase::new(cache, tags, client, build_context), - reporter: None, + tags, cache, + database: DistributionDatabase::new(cache, client, build_context), + reporter: None, } } @@ -62,9 +64,10 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self { let reporter: Arc<dyn Reporter> = Arc::new(reporter); Self { - reporter: Some(reporter.clone()), - database: self.database.with_reporter(Facade::from(reporter.clone())), + tags: self.tags, cache: self.cache, + database: self.database.with_reporter(Facade::from(reporter.clone())), + reporter: Some(reporter.clone()), } } @@ -165,7 +168,7 @@ impl<'a, Context: BuildContext + Send + Sync> Downloader<'a, Context> { if in_flight.downloads.register(id.clone()) { let download: LocalWheel = self .database - .get_or_build_wheel(dist.clone()) + .get_or_build_wheel(&dist, self.tags) .boxed() .map_err(|err| Error::Fetch(dist.clone(), err)) .await?; diff --git a/crates/uv-resolver/src/resolver/mod.rs b/crates/uv-resolver/src/resolver/mod.rs index 229ef6489d5b..e7d717eabd8c 100644 --- a/crates/uv-resolver/src/resolver/mod.rs +++ b/crates/uv-resolver/src/resolver/mod.rs @@ -129,7 +129,7 @@ impl<'a, Context: BuildContext + Send + Sync> Resolver<'a, DefaultResolverProvid ) -> Result<Self, ResolveError> { let provider = DefaultResolverProvider::new( client, - DistributionDatabase::new(build_context.cache(), tags, client, build_context), + DistributionDatabase::new(build_context.cache(), client, build_context), flat_index, tags, PythonRequirement::new(interpreter, markers),