Skip to content

Commit

Permalink
Introduce BuiltWheelDir and prune these.
Browse files Browse the repository at this point in the history
This should be the last of the associated deps to prune based on
reachability / implication from old zipapps and venvs.
  • Loading branch information
jsirois committed Oct 20, 2024
1 parent c2c8904 commit b12e409
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 60 deletions.
132 changes: 130 additions & 2 deletions pex/cache/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from pex.pep_440 import Version
from pex.pep_503 import ProjectName
from pex.pip.version import PipVersionValue
from pex.targets import Target


class CacheDir(Enum["CacheDir.Value"]):
Expand Down Expand Up @@ -614,8 +615,135 @@ def version(self):
# type: () -> Version
return self._pnav.canonicalized_version


class BuiltWheelDir(AtomicCacheDir):
@classmethod
def iter_all(cls, pex_root=ENV):
# type: (Union[str, Variables]) -> Iterator[BuiltWheelDir]

from pex.dist_metadata import ProjectNameAndVersion, UnrecognizedDistributionFormat

for path in glob.glob(CacheDir.BUILT_WHEELS.path("sdists", "*", "*")):
sdist, fingerprint = os.path.split(path)
try:
pnav = ProjectNameAndVersion.from_filename(sdist)
yield BuiltWheelDir.create(
sdist=sdist, fingerprint=fingerprint, pnav=pnav, pex_root=pex_root
)
except UnrecognizedDistributionFormat:
# This is a source distribution that does not follow sdist naming patterns / is not
# distributed via PyPI; e.g.: a GitHub source tarball or zip.
for built_wheel in glob.glob(os.path.join(path, "*", "*")):
file_name = os.path.basename(built_wheel)
dist_dir = os.path.dirname(built_wheel)
yield BuiltWheelDir(path=dist_dir, dist_dir=dist_dir, file_name=file_name)

for built_wheel in glob.glob(
CacheDir.BUILT_WHEELS.path("local_projects", "*", "*", "*", "*")
):
file_name = os.path.basename(built_wheel)
dist_dir = os.path.dirname(built_wheel)
yield BuiltWheelDir(path=dist_dir, dist_dir=dist_dir, file_name=file_name)

@classmethod
def create(
cls,
sdist, # type: str
fingerprint=None, # type: Optional[str]
pnav=None, # type: Optional[ProjectNameAndVersion]
target=None, # type: Optional[Target]
pex_root=ENV, # type: Union[str, Variables]
):
# type: (...) -> BuiltWheelDir

import hashlib

from pex import targets
from pex.dist_metadata import is_sdist
from pex.util import CacheHelper

if is_sdist(sdist):
dist_type = "sdists"
fingerprint = fingerprint or CacheHelper.hash(sdist, hasher=hashlib.sha256)
file_name = os.path.basename(sdist)
else:
dist_type = "local_projects"
fingerprint = fingerprint or CacheHelper.dir_hash(sdist, hasher=hashlib.sha256)
file_name = None

# For the purposes of building a wheel from source, the product should be uniqued by the
# wheel name which is unique on the host os up to the python and abi tags. In other words,
# the product of a CPython 2.7.6 wheel build and a CPython 2.7.18 wheel build should be
# functionally interchangeable if the two CPython interpreters have matching abis.
#
# However, this is foiled by at least two scenarios:
# 1. Running a vm / container with shared storage mounted. This can introduce a different
# platform on the host.
# 2. On macOS the same host can report / use different OS versions (c.f.: the
# MACOSX_DEPLOYMENT_TARGET environment variable and the 10.16 / 11.0 macOS Big Sur
# transitional case in particular).
#
# As such, we must be pessimistic and assume the wheel will be platform specific to the
# full extent possible.
interpreter = (target or targets.current()).get_interpreter()
target_tags = "{python_tag}-{abi_tag}-{platform_tag}".format(
python_tag=interpreter.identity.python_tag,
abi_tag=interpreter.identity.abi_tag,
platform_tag=interpreter.identity.platform_tag,
)
sdist_dir = CacheDir.BUILT_WHEELS.path(
dist_type, os.path.basename(sdist), pex_root=pex_root
)
dist_dir = os.path.join(sdist_dir, fingerprint, target_tags)

if is_sdist(sdist):
return cls(path=sdist_dir, dist_dir=dist_dir, file_name=file_name, pnav=pnav)
else:
return cls(path=dist_dir, dist_dir=dist_dir, file_name=file_name, pnav=pnav)

def __init__(
self,
path, # type: str
dist_dir, # type: str
file_name=None, # type: Optional[str]
pnav=None, # type: Optional[ProjectNameAndVersion]
):
# type: (...) -> None
super(BuiltWheelDir, self).__init__(path)
self.dist_dir = dist_dir
self._file_name = file_name
self.__pnav = pnav

@property
def is_wheel(self):
def file_name(self):
# type: () -> str
from pex.dist_metadata import is_wheel

return is_wheel(self.file_name)
if self._file_name is None:
potential_file_names = [
file_name
for file_name in os.listdir(self.dist_dir)
if not os.path.isdir(os.path.join(self.dist_dir, file_name)) and is_wheel(file_name)
]
production_assert(len(potential_file_names) == 1)
self._file_name = potential_file_names[0]
return self._file_name

@property
def _pnav(self):
# type: () -> ProjectNameAndVersion
if self.__pnav is None:
from pex.dist_metadata import ProjectNameAndVersion

self.__pnav = ProjectNameAndVersion.from_filename(self.file_name)
return self.__pnav

@property
def project_name(self):
# type: () -> ProjectName
return self._pnav.canonicalized_project_name

@property
def version(self):
# type: () -> Version
return self._pnav.canonicalized_version
24 changes: 20 additions & 4 deletions pex/cli/commands/cache/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import absolute_import, print_function

import functools
import itertools
import os
import re
from argparse import Action, ArgumentError, _ActionsContainer
Expand All @@ -15,6 +16,7 @@
from pex.cache.dirs import (
AtomicCacheDir,
BootstrapDir,
BuiltWheelDir,
CacheDir,
DownloadDir,
InstalledWheelDir,
Expand All @@ -40,7 +42,18 @@

if TYPE_CHECKING:
import typing
from typing import IO, DefaultDict, Dict, Iterable, List, Mapping, Optional, Tuple, Union
from typing import (
IO,
DefaultDict,
Dict,
Iterable,
Iterator,
List,
Mapping,
Optional,
Tuple,
Union,
)

import attr # vendor:skip
else:
Expand Down Expand Up @@ -527,10 +540,13 @@ def _prune(self):
additional_cache_dirs_by_project_name_and_version = defaultdict(
list
) # type: DefaultDict[Tuple[ProjectName, Version], List[AtomicCacheDir]]
for download_dir in DownloadDir.iter_all():
cached_artifact_dirs = itertools.chain(
BuiltWheelDir.iter_all(), DownloadDir.iter_all()
) # type: Iterator[Union[BuiltWheelDir, DownloadDir]]
for cache_dir in cached_artifact_dirs:
additional_cache_dirs_by_project_name_and_version[
(download_dir.project_name, download_dir.version)
].append(download_dir)
(cache_dir.project_name, cache_dir.version)
].append(cache_dir)

prune_cache_dir = functools.partial(
self._prune_cache_dir, additional_cache_dirs_by_project_name_and_version
Expand Down
70 changes: 16 additions & 54 deletions pex/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from pex import targets
from pex.atomic_directory import AtomicDirectory, atomic_directory
from pex.auth import PasswordEntry
from pex.cache.dirs import CacheDir
from pex.cache.dirs import BuiltWheelDir, CacheDir
from pex.common import pluralize, safe_mkdir, safe_mkdtemp, safe_open
from pex.compatibility import url_unquote, urlparse
from pex.dependency_configuration import DependencyConfiguration
Expand Down Expand Up @@ -304,51 +304,22 @@ def create(
source_path = attr.ib() # type: str
fingerprint = attr.ib() # type: str

def result(self, dist_root):
# type: (str) -> BuildResult
return BuildResult.from_request(self, dist_root=dist_root)
def result(self):
# type: () -> BuildResult
return BuildResult.from_request(self)


@attr.s(frozen=True)
class BuildResult(object):
@classmethod
def from_request(
cls,
build_request, # type: BuildRequest
dist_root, # type: str
):
# type: (...) -> BuildResult
dist_type = "sdists" if os.path.isfile(build_request.source_path) else "local_projects"

# For the purposes of building a wheel from source, the product should be uniqued by the
# wheel name which is unique on the host os up to the python and abi tags. In other words,
# the product of a CPython 2.7.6 wheel build and a CPython 2.7.18 wheel build should be
# functionally interchangeable if the two CPython interpreters have matching abis.
#
# However, this is foiled by at least two scenarios:
# 1. Running a vm / container with shared storage mounted. This can introduce a different
# platform on the host.
# 2. On macOS the same host can report / use different OS versions (c.f.: the
# MACOSX_DEPLOYMENT_TARGET environment variable and the 10.16 / 11.0 macOS Big Sur
# transitional case in particular).
#
# As such, we must be pessimistic and assume the wheel will be platform specific to the
# full extent possible.
interpreter = build_request.target.get_interpreter()
target_tags = "{python_tag}-{abi_tag}-{platform_tag}".format(
python_tag=interpreter.identity.python_tag,
abi_tag=interpreter.identity.abi_tag,
platform_tag=interpreter.identity.platform_tag,
def from_request(cls, build_request):
# type: (BuildRequest) -> BuildResult
built_wheel = BuiltWheelDir.create(
sdist=build_request.source_path,
fingerprint=build_request.fingerprint,
target=build_request.target,
)

dist_dir = os.path.join(
dist_root,
dist_type,
os.path.basename(build_request.source_path),
build_request.fingerprint,
target_tags,
)
return cls(request=build_request, atomic_dir=AtomicDirectory(dist_dir))
return cls(request=build_request, atomic_dir=AtomicDirectory(built_wheel.dist_dir))

request = attr.ib() # type: BuildRequest
_atomic_dir = attr.ib() # type: AtomicDirectory
Expand Down Expand Up @@ -584,7 +555,6 @@ def __init__(
@staticmethod
def _categorize_build_requests(
build_requests, # type: Iterable[BuildRequest]
dist_root, # type: str
check_compatible=True, # type: bool
):
# type: (...) -> Tuple[Iterable[BuildRequest], DefaultDict[str, OrderedSet[InstallRequest]]]
Expand All @@ -593,7 +563,7 @@ def _categorize_build_requests(
OrderedSet
) # type: DefaultDict[str, OrderedSet[InstallRequest]]
for build_request in build_requests:
build_result = build_request.result(dist_root)
build_result = build_request.result()
if not build_result.is_built:
TRACER.log(
"Building {} to {}".format(build_request.source_path, build_result.dist_dir)
Expand All @@ -610,13 +580,9 @@ def _categorize_build_requests(
)
return unsatisfied_build_requests, build_results

def _spawn_wheel_build(
self,
built_wheels_dir, # type: str
build_request, # type: BuildRequest
):
# type: (...) -> SpawnedJob[BuildResult]
build_result = build_request.result(built_wheels_dir)
def _spawn_wheel_build(self, build_request):
# type: (BuildRequest) -> SpawnedJob[BuildResult]
build_result = build_request.result()
build_job = get_pip(
interpreter=build_request.target.get_interpreter(),
version=self._pip_version,
Expand Down Expand Up @@ -648,21 +614,17 @@ def build_wheels(
# Nothing to build or install.
return {}

built_wheels_dir = CacheDir.BUILT_WHEELS.path()
spawn_wheel_build = functools.partial(self._spawn_wheel_build, built_wheels_dir)

with TRACER.timed(
"Building distributions for:" "\n {}".format("\n ".join(map(str, build_requests)))
):
build_requests, build_results = self._categorize_build_requests(
build_requests=build_requests,
dist_root=built_wheels_dir,
check_compatible=check_compatible,
)

for build_result in execute_parallel(
inputs=build_requests,
spawn_func=spawn_wheel_build,
spawn_func=self._spawn_wheel_build,
error_handler=Raise[BuildRequest, BuildResult](Untranslatable),
max_jobs=max_parallel_jobs,
):
Expand Down

0 comments on commit b12e409

Please sign in to comment.