diff --git a/CHANGES.md b/CHANGES.md index 334a87fad..d14881a94 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,15 @@ # Release Notes +## 2.24.1 + +This release fixes `pex3 cache prune` handling of cached Pips. +Previously, performing a `pip3 cache prune` would bump the last access +of all un-pruned cached Pips artificially. If you ran `pip3 cache prune` +in a daily or weekly cron job, this would mean Pips would never be +pruned. + +* Fix `pex3 cache prune` handling of cached Pips. (#2589) + ## 2.24.0 This release adds `pex3 cache prune` as a likely more useful Pex cache diff --git a/pex/cache/access.py b/pex/cache/access.py index faa85232d..c572d0c18 100644 --- a/pex/cache/access.py +++ b/pex/cache/access.py @@ -16,7 +16,7 @@ if TYPE_CHECKING: from typing import Iterator, Optional, Tuple, Union - from pex.cache.dirs import AtomicCacheDir, UnzipDir, VenvDirs # noqa + from pex.cache.dirs import AtomicCacheDir, UnzipDir, VenvDir, VenvDirs # noqa # N.B.: The lock file path is last in the lock state tuple to allow for a simple encoding scheme in @@ -105,18 +105,20 @@ def await_delete_lock(): _lock(exclusive=True) +def _last_access_file(pex_dir): + # type: (Union[UnzipDir, VenvDir, VenvDirs]) -> str + return os.path.join(pex_dir.path, ".last-access") + + def record_access( - atomic_cache_dir, # type: AtomicCacheDir + pex_dir, # type: Union[UnzipDir, VenvDir, VenvDirs] last_access=None, # type: Optional[float] ): # type: (...) -> None - # N.B.: We explicitly set atime and do not rely on the filesystem implicitly setting it when the - # directory is read since filesystems may be mounted noatime, nodiratime or relatime on Linux - # and similar toggles exist, at least in part, for some macOS file systems. atime = last_access or time.time() - mtime = os.stat(atomic_cache_dir.path).st_mtime - os.utime(atomic_cache_dir.path, (atime, mtime)) + with open(_last_access_file(pex_dir), "w") as fp: + os.utime(fp.name, (atime, atime)) def iter_all_cached_pex_dirs(): @@ -128,5 +130,5 @@ def iter_all_cached_pex_dirs(): UnzipDir.iter_all(), VenvDirs.iter_all() ) # type: Iterator[Union[UnzipDir, VenvDirs]] for pex_dir in pex_dirs: - last_access = os.stat(pex_dir.path).st_atime + last_access = os.stat(_last_access_file(pex_dir)).st_atime yield pex_dir, last_access diff --git a/pex/cache/dirs.py b/pex/cache/dirs.py index a43c85674..15cfa9389 100644 --- a/pex/cache/dirs.py +++ b/pex/cache/dirs.py @@ -189,7 +189,7 @@ def iter_transitive_dependents(self): UNZIPPED_PEXES = Value( "unzipped_pexes", - version=0, + version=1, name="Unzipped PEXes", description="The unzipped PEX files executed on this machine.", dependencies=[BOOTSTRAPS, USER_CODE, INSTALLED_WHEELS], @@ -197,7 +197,7 @@ def iter_transitive_dependents(self): VENVS = Value( "venvs", - version=0, + version=1, name="Virtual Environments", description="Virtual environments generated at runtime for `--venv` mode PEXes.", dependencies=[INSTALLED_WHEELS], diff --git a/pex/cache/prunable.py b/pex/cache/prunable.py index ea544aa29..ad63195b1 100644 --- a/pex/cache/prunable.py +++ b/pex/cache/prunable.py @@ -41,31 +41,24 @@ from pex.third_party import attr -@attr.s(frozen=True) -class PrunablePipCache(object): - pip = attr.ib() # type: Pip - pex_dir = attr.ib() # type: Union[UnzipDir, VenvDirs] - last_access = attr.ib() # type: float - - @attr.s(frozen=True) class Pips(object): @classmethod def scan(cls, pex_dirs_by_hash): - # type: (Mapping[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]]) -> Pips + # type: (Mapping[str, Tuple[Union[UnzipDir, VenvDirs], bool]]) -> Pips # True to prune the Pip version completely, False to just prune the Pip PEX. pips_to_prune = OrderedDict() # type: OrderedDict[Pip, bool] # N.B.: We just need 1 Pip per version (really per paired cache). Whether a Pip has # extra requirements installed does not affect cache management. - pip_caches_to_prune = OrderedDict() # type: OrderedDict[PipVersionValue, PrunablePipCache] - for pip in iter_all_pips(): - pex_dir, last_access, prunable = pex_dirs_by_hash[pip.pex_hash] + pip_caches_to_prune = OrderedDict() # type: OrderedDict[PipVersionValue, Pip] + for pip in iter_all_pips(record_access=False): + pex_dir, prunable = pex_dirs_by_hash[pip.pex_hash] if prunable: pips_to_prune[pip] = False else: - pip_caches_to_prune[pip.version] = PrunablePipCache(pip, pex_dir, last_access) + pip_caches_to_prune[pip.version] = pip for pip in pips_to_prune: if pip.version not in pip_caches_to_prune: pips_to_prune[pip] = True @@ -74,10 +67,10 @@ def scan(cls, pex_dirs_by_hash): (pip.pex_dir.base_dir if prune_version else pip.pex_dir.path) for pip, prune_version in pips_to_prune.items() ) - return cls(paths=pip_paths_to_prune, caches=tuple(pip_caches_to_prune.values())) + return cls(paths=pip_paths_to_prune, pips=tuple(pip_caches_to_prune.values())) paths = attr.ib() # type: Tuple[str, ...] - caches = attr.ib() # type: Tuple[PrunablePipCache, ...] + pips = attr.ib() # type: Tuple[Pip, ...] @attr.s(frozen=True) @@ -107,7 +100,7 @@ def scan(cls, cutoff): OrderedSet() ) # type: OrderedSet[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] unprunable_deps = [] # type: List[Union[BootstrapDir, UserCodeDir, InstalledWheelDir]] - pex_dirs_by_hash = {} # type: Dict[str, Tuple[Union[UnzipDir, VenvDirs], float, bool]] + pex_dirs_by_hash = {} # type: Dict[str, Tuple[Union[UnzipDir, VenvDirs], bool]] for pex_dir, last_access in access.iter_all_cached_pex_dirs(): prunable = pex_dir in prunable_pex_dirs if prunable: @@ -115,7 +108,7 @@ def scan(cls, cutoff): pex_deps.update(pex_dir.iter_deps()) else: unprunable_deps.extend(pex_dir.iter_deps()) - pex_dirs_by_hash[pex_dir.pex_hash] = pex_dir, last_access, prunable + pex_dirs_by_hash[pex_dir.pex_hash] = pex_dir, prunable pips = Pips.scan(pex_dirs_by_hash) return cls( diff --git a/pex/cli/commands/cache/command.py b/pex/cli/commands/cache/command.py index 3b4030ce6..d1f47e14a 100644 --- a/pex/cli/commands/cache/command.py +++ b/pex/cli/commands/cache/command.py @@ -21,7 +21,7 @@ InstalledWheelDir, VenvDirs, ) -from pex.cache.prunable import Prunable, PrunablePipCache +from pex.cache.prunable import Prunable from pex.cli.command import BuildTimeCommand from pex.cli.commands.cache.bytes import ByteAmount, ByteUnits from pex.cli.commands.cache.du import DiskUsage @@ -33,6 +33,7 @@ from pex.orderedset import OrderedSet from pex.pep_440 import Version from pex.pep_503 import ProjectName +from pex.pip.tool import Pip from pex.result import Error, Ok, Result from pex.typing import TYPE_CHECKING from pex.variables import ENV @@ -629,10 +630,10 @@ def prune_pip_caches(): if not prunable_wheels: return - def spawn_list(prunable_pip_cache): - # type: (PrunablePipCache) -> SpawnedJob[Tuple[ProjectNameAndVersion, ...]] + def spawn_list(pip): + # type: (Pip) -> SpawnedJob[Tuple[ProjectNameAndVersion, ...]] return SpawnedJob.stdout( - job=prunable_pip_cache.pip.spawn_cache_list(), + job=pip.spawn_cache_list(), result_func=lambda stdout: tuple( ProjectNameAndVersion.from_filename(wheel_file) for wheel_file in stdout.decode("utf-8").splitlines() @@ -640,10 +641,10 @@ def spawn_list(prunable_pip_cache): ), ) - pip_removes = [] # type: List[Tuple[PrunablePipCache, str]] - for prunable_pip_cache, project_name_and_versions in zip( - prunable.pips.caches, - execute_parallel(inputs=prunable.pips.caches, spawn_func=spawn_list), + pip_removes = [] # type: List[Tuple[Pip, str]] + for pip, project_name_and_versions in zip( + prunable.pips.pips, + execute_parallel(inputs=prunable.pips.pips, spawn_func=spawn_list), ): for pnav in project_name_and_versions: if ( @@ -652,7 +653,7 @@ def spawn_list(prunable_pip_cache): ) in prunable_wheels: pip_removes.append( ( - prunable_pip_cache, + pip, "{project_name}-{version}*".format( project_name=pnav.project_name, version=pnav.version ), @@ -673,22 +674,19 @@ def parse_remove(stdout): return 0 def spawn_remove(args): - # type: (Tuple[PrunablePipCache, str]) -> SpawnedJob[int] - prunable_pip_cache, wheel_name_glob = args + # type: (Tuple[Pip, str]) -> SpawnedJob[int] + pip, wheel_name_glob = args return SpawnedJob.stdout( - job=prunable_pip_cache.pip.spawn_cache_remove(wheel_name_glob), + job=pip.spawn_cache_remove(wheel_name_glob), result_func=parse_remove, ) removes_by_pip = Counter() # type: typing.Counter[str] - for prunable_pip_cache, remove_count in zip( - [prunable_pip_cache for prunable_pip_cache, _ in pip_removes], + for pip, remove_count in zip( + [pip for pip, _ in pip_removes], execute_parallel(inputs=pip_removes, spawn_func=spawn_remove), ): - removes_by_pip[prunable_pip_cache.pip.version.value] += remove_count - cache_access.record_access( - prunable_pip_cache.pex_dir, last_access=prunable_pip_cache.last_access - ) + removes_by_pip[pip.version.value] += remove_count if removes_by_pip: total = sum(removes_by_pip.values()) print( diff --git a/pex/layout.py b/pex/layout.py index 30724e73b..87ea707b0 100644 --- a/pex/layout.py +++ b/pex/layout.py @@ -320,8 +320,6 @@ def _ensure_installed( if not os.path.exists(install_to): with ENV.patch(PEX_ROOT=pex_root): cache_access.read_write() - else: - cache_access.record_access(install_to) with atomic_directory(install_to) as chroot: if not chroot.is_finalized(): with ENV.patch(PEX_ROOT=pex_root), TRACER.timed( @@ -374,6 +372,7 @@ def _ensure_installed( layout.extract_pex_info(chroot.work_dir) layout.extract_main(chroot.work_dir) layout.record(chroot.work_dir) + cache_access.record_access(install_to) return install_to diff --git a/pex/pex_bootstrapper.py b/pex/pex_bootstrapper.py index 0c0bfa680..d4fcda49a 100644 --- a/pex/pex_bootstrapper.py +++ b/pex/pex_bootstrapper.py @@ -507,6 +507,7 @@ def ensure_venv( pex, # type: PEX collisions_ok=True, # type: bool copy_mode=None, # type: Optional[CopyMode.Value] + record_access=True, # type: bool ): # type: (...) -> VenvPex pex_info = pex.pex_info() @@ -524,8 +525,6 @@ def ensure_venv( if not os.path.exists(venv_dir): with ENV.patch(PEX_ROOT=pex_info.pex_root): cache_access.read_write() - else: - cache_access.record_access(venv_dir) with atomic_directory(venv_dir) as venv: if not venv.is_finalized(): from pex.venv.virtualenv import Virtualenv @@ -626,7 +625,8 @@ def ensure_venv( ) break - + if record_access: + cache_access.record_access(venv_dir) return VenvPex(venv_dir, hermetic_scripts=pex_info.venv_hermetic_scripts) diff --git a/pex/pip/installation.py b/pex/pip/installation.py index 1fc282257..4bff7af87 100644 --- a/pex/pip/installation.py +++ b/pex/pip/installation.py @@ -45,6 +45,7 @@ def _create_pip( pip_pex, # type: PipPexDir interpreter=None, # type: Optional[PythonInterpreter] use_system_time=False, # type: bool + record_access=True, # type: bool ): # type: (...) -> Pip @@ -52,7 +53,7 @@ def _create_pip( pip_interpreter = interpreter or PythonInterpreter.get() pex = PEX(pip_pex.path, interpreter=pip_interpreter) - venv_pex = ensure_venv(pex, copy_mode=CopyMode.SYMLINK) + venv_pex = ensure_venv(pex, copy_mode=CopyMode.SYMLINK, record_access=record_access) pex_hash = pex.pex_info().pex_hash production_assert(pex_hash is not None) pip_venv = PipVenv( @@ -512,8 +513,14 @@ def iter_all( interpreter=None, # type: Optional[PythonInterpreter] use_system_time=False, # type: bool pex_root=ENV, # type: Union[str, Variables] + record_access=True, # type: bool ): # type: (...) -> Iterator[Pip] for pip_pex in PipPexDir.iter_all(pex_root=pex_root): - yield _create_pip(pip_pex, interpreter=interpreter, use_system_time=use_system_time) + yield _create_pip( + pip_pex, + interpreter=interpreter, + use_system_time=use_system_time, + record_access=record_access, + ) diff --git a/pex/version.py b/pex/version.py index c56fb3667..a2dc23914 100644 --- a/pex/version.py +++ b/pex/version.py @@ -1,4 +1,4 @@ # Copyright 2015 Pex project contributors. # Licensed under the Apache License, Version 2.0 (see LICENSE). -__version__ = "2.24.0" +__version__ = "2.24.1" diff --git a/tests/integration/cli/commands/test_cache_prune.py b/tests/integration/cli/commands/test_cache_prune.py index a76a6082e..4049c012a 100644 --- a/tests/integration/cli/commands/test_cache_prune.py +++ b/tests/integration/cli/commands/test_cache_prune.py @@ -9,7 +9,7 @@ import time from datetime import datetime, timedelta from textwrap import dedent -from typing import Dict, Tuple +from typing import Dict, Tuple, Union import attr # vendor:skip import colors # vendor:skip @@ -61,6 +61,13 @@ def lock(tmpdir): return tmpdir.join("lock.json") +def du(cache_dir): + # type: (Union[CacheDir.Value, str]) -> DiskUsage + return DiskUsage.collect( + cache_dir.path() if isinstance(cache_dir, CacheDir.Value) else cache_dir + ) + + def test_nothing_prunable( pex, # type: str pex_root, # type: str @@ -71,7 +78,7 @@ def test_nothing_prunable( pex_size = os.path.getsize(pex) subprocess.check_call(args=[pex, "-c", ""]) - pre_prune_du = DiskUsage.collect(pex_root) + pre_prune_du = du(pex_root) assert ( pre_prune_du.size > pex_size ), "Expected the unzipped PEX to be larger than the zipped pex." @@ -79,24 +86,24 @@ def test_nothing_prunable( # The default prune threshold should be high enough to never trigger in a test run (it's 2 # weeks old at the time of writing). run_pex3("cache", "prune").assert_success() - assert pre_prune_du == DiskUsage.collect(pex_root) + assert pre_prune_du == du(pex_root) def test_installed_wheel_prune_build_time(pex): # type: (str) -> None run_pex_command(args=["ansicolors==1.1.8", "-o", pex]).assert_success() - installed_wheels_size = DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + installed_wheels_size = du(CacheDir.INSTALLED_WHEELS).size assert installed_wheels_size > 0 - assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size - assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size - assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + assert 0 == du(CacheDir.UNZIPPED_PEXES).size + assert 0 == du(CacheDir.BOOTSTRAPS).size + assert 0 == du(CacheDir.USER_CODE).size run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() - assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size - assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size - assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size - assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + assert 0 == du(CacheDir.UNZIPPED_PEXES).size + assert 0 == du(CacheDir.INSTALLED_WHEELS).size + assert 0 == du(CacheDir.BOOTSTRAPS).size + assert 0 == du(CacheDir.USER_CODE).size def test_installed_wheel_prune_run_time( @@ -109,25 +116,23 @@ def test_installed_wheel_prune_run_time( pex_size = os.path.getsize(pex) shutil.rmtree(pex_root) - assert 0 == DiskUsage.collect(pex_root).size + assert 0 == du(pex_root).size assert b"| Moo! |" in subprocess.check_output(args=[pex, "Moo!"]) - pre_prune_du = DiskUsage.collect(pex_root) - assert DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size > 0 - assert DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size > 0 - assert DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size > 0 - assert ( - 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size - ), "There is no user code in the PEX." + pre_prune_du = du(pex_root) + assert du(CacheDir.INSTALLED_WHEELS).size > 0 + assert du(CacheDir.UNZIPPED_PEXES).size > 0 + assert du(CacheDir.BOOTSTRAPS).size > 0 + assert 0 == du(CacheDir.USER_CODE).size, "There is no user code in the PEX." assert ( pre_prune_du.size > pex_size ), "Expected the unzipped PEX to be larger than the zipped pex." run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() - assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size - assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size - assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size - assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + assert 0 == du(CacheDir.UNZIPPED_PEXES).size + assert 0 == du(CacheDir.INSTALLED_WHEELS).size + assert 0 == du(CacheDir.BOOTSTRAPS).size + assert 0 == du(CacheDir.USER_CODE).size @attr.s(frozen=True) @@ -194,29 +199,29 @@ def test_app_prune( # type: (...) -> None pex_size = os.path.getsize(ansicolors_zipapp_pex.path) - installed_wheels_size = DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size + installed_wheels_size = du(CacheDir.INSTALLED_WHEELS).size assert installed_wheels_size > 0 - assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size - assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size - assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + assert 0 == du(CacheDir.UNZIPPED_PEXES).size + assert 0 == du(CacheDir.BOOTSTRAPS).size + assert 0 == du(CacheDir.USER_CODE).size execute_ansicolors_pex(ansicolors_zipapp_pex) - pre_prune_du = DiskUsage.collect(pex_root) + pre_prune_du = du(pex_root) assert ( - DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size > installed_wheels_size + du(CacheDir.INSTALLED_WHEELS).size > installed_wheels_size ), "Expected .pyc files to be compiled leading to more disk space usage" - assert DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size > 0 - assert DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size > 0 - assert DiskUsage.collect(CacheDir.USER_CODE.path()).size > 0 + assert du(CacheDir.UNZIPPED_PEXES).size > 0 + assert du(CacheDir.BOOTSTRAPS).size > 0 + assert du(CacheDir.USER_CODE).size > 0 assert ( pre_prune_du.size > pex_size ), "Expected the unzipped PEX to be larger than the zipped pex." run_pex3("cache", "prune", "--older-than", "0 seconds").assert_success() - assert 0 == DiskUsage.collect(CacheDir.UNZIPPED_PEXES.path()).size - assert 0 == DiskUsage.collect(CacheDir.INSTALLED_WHEELS.path()).size - assert 0 == DiskUsage.collect(CacheDir.BOOTSTRAPS.path()).size - assert 0 == DiskUsage.collect(CacheDir.USER_CODE.path()).size + assert 0 == du(CacheDir.UNZIPPED_PEXES).size + assert 0 == du(CacheDir.INSTALLED_WHEELS).size + assert 0 == du(CacheDir.BOOTSTRAPS).size + assert 0 == du(CacheDir.USER_CODE).size def set_last_access_ago( @@ -513,18 +518,31 @@ def pip2(applicable_non_vendored_pips): return applicable_non_vendored_pips[1] +@pytest.fixture +def pip3(applicable_non_vendored_pips): + # type: (Tuple[PipVersionValue, ...]) -> PipVersionValue + if len(applicable_non_vendored_pips) < 3: + pytest.skip( + "This test requires 3 non-vendored Pip `--version`s be applicable, but only the " + "following are: {pips}".format(pips=" ".join(map(str, applicable_non_vendored_pips))) + ) + return applicable_non_vendored_pips[2] + + def test_pip_prune( tmpdir, # type: Tempdir pip1, # type: PipVersionValue pip2, # type: PipVersionValue + pip3, # type: PipVersionValue ): # type: (...) -> None create_ansicolors_pex(tmpdir, "--pip-version", str(pip1)) - create_ansicolors_pex(tmpdir, "--pip-version", str(pip2), "--no-wheel") + create_ansicolors_pex(tmpdir, "--pip-version", str(pip2)) + create_ansicolors_pex(tmpdir, "--pip-version", str(pip3), "--no-wheel") pips_by_version = {pip_dir.version: pip_dir for pip_dir in PipPexDir.iter_all()} - assert {pip1, pip2}.issubset(pips_by_version) + assert {pip1, pip2, pip3}.issubset(pips_by_version) pip_venvs_by_version = {} # type: Dict[PipVersionValue, VenvDirs] venv_dirs_by_pex_hash = {venv_dirs.pex_hash: venv_dirs for venv_dirs in VenvDirs.iter_all()} @@ -540,15 +558,25 @@ def test_pip_prune( else: set_last_access_one_second_ago(venv_dirs.path) pex_dir_to_last_access = dict(access.iter_all_cached_pex_dirs()) - result = run_pex3("cache", "prune", "--older-than", "1 hour") - result.assert_success() + + pip2_du = du(pips_by_version[pip2].base_dir) + pip3_du = du(pips_by_version[pip3].base_dir) + run_pex3("cache", "prune", "--older-than", "1 hour").assert_success() assert not os.path.exists(pips_by_version[pip1].base_dir), "Expected a full prune of pip1" + assert pip2_du.size == du(pips_by_version[pip2].base_dir).size + post_prune_pip3_du = du(pips_by_version[pip3].base_dir) + assert ( + post_prune_pip3_du.size < pip3_du.size + ), "Expected pip3 to have a built ansicolors wheel in its cache pruned." + assert ( + post_prune_pip3_du.files == pip3_du.files - 1 + ), "Expected pip3 to have a built ansicolors wheel in its cache pruned." pip1_venv_dirs = pip_venvs_by_version.pop(pip1) pex_dir_to_last_access.pop(pip1_venv_dirs) assert pex_dir_to_last_access == dict(access.iter_all_cached_pex_dirs()), ( "Expected other Pips to have their last access reset after calling `pip cache ...` to " - "prune Pip wheels.\n" + result.error + "prune Pip wheels." ) assert set(pip_venvs_by_version) == { pip_dir.version for pip_dir in PipPexDir.iter_all()