Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retain and index downloads when locking. #1650

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 64 additions & 6 deletions pex/resolve/lockfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,30 @@

from __future__ import absolute_import

import os
import shutil

from pex import resolver
from pex.common import pluralize, safe_open
from pex.common import pluralize, safe_mkdtemp, safe_open
from pex.requirements import LocalProjectRequirement, VCSRequirement
from pex.resolve import resolvers
from pex.resolve.locked_resolve import LockConfiguration
from pex.resolve.locked_resolve import Artifact, LockConfiguration
from pex.resolve.lockfile.download_manager import DownloadedArtifact, DownloadManager
from pex.resolve.lockfile.lockfile import Lockfile as Lockfile # For re-export.
from pex.resolve.requirement_configuration import RequirementConfiguration
from pex.resolve.resolver_configuration import PipConfiguration
from pex.resolver import Downloaded
from pex.result import Error
from pex.targets import Targets
from pex.third_party.pkg_resources import Requirement
from pex.tracer import TRACER
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper
from pex.variables import ENV
from pex.version import __version__

if TYPE_CHECKING:
from typing import List, Text, Union
from typing import List, Mapping, Optional, Text, Union


class ParseError(Exception):
Expand Down Expand Up @@ -77,6 +84,51 @@ def store(
json.dump(json_codec.as_json_data(lockfile), fp, sort_keys=True)


class CreateLockDownloadManager(DownloadManager):
@classmethod
def create(
cls,
download_dir, # type: str
downloaded, # type: Downloaded
pex_root=None, # type: Optional[str]
):
# type: (...) -> CreateLockDownloadManager

artifacts_by_filename = {
artifact.filename: artifact
for locked_resolve in downloaded.locked_resolves
for locked_requirement in locked_resolve.locked_requirements
for artifact in locked_requirement.iter_artifacts()
}
path_by_artifact = {
artifacts_by_filename[f]: os.path.join(root, f)
jsirois marked this conversation as resolved.
Show resolved Hide resolved
for root, _, files in os.walk(download_dir)
for f in files
}
return cls(path_by_artifact=path_by_artifact, pex_root=pex_root)

def __init__(
self,
path_by_artifact, # type: Mapping[Artifact, str]
pex_root=None, # type: Optional[str]
):
super(CreateLockDownloadManager, self).__init__(pex_root=pex_root)
self._path_by_artifact = path_by_artifact

def store_all(self):
for artifact in self._path_by_artifact:
self.store(artifact)

def save(
self,
artifact, # type: Artifact
path, # type: str
):
# type: (...) -> str
shutil.move(self._path_by_artifact[artifact], path)
return CacheHelper.hash(path)


def create(
lock_configuration, # type: LockConfiguration
requirement_configuration, # type: RequirementConfiguration
Expand Down Expand Up @@ -120,6 +172,8 @@ def create(
for constraint in requirement_configuration.parse_constraints(network_configuration)
)

dest = safe_mkdtemp()

try:
downloaded = resolver.download(
targets=targets,
Expand All @@ -140,13 +194,17 @@ def create(
build_isolation=pip_configuration.build_isolation,
max_parallel_jobs=pip_configuration.max_jobs,
lock_configuration=lock_configuration,
# We're just out for the lock data and not the distribution files downloaded to produce
# that data.
dest=None,
dest=dest,
)
except resolvers.ResolveError as e:
return Error(str(e))

with TRACER.timed("Indexing downloads"):
create_lock_download_manager = CreateLockDownloadManager.create(
download_dir=dest, downloaded=downloaded
)
create_lock_download_manager.store_all()

return Lockfile.create(
pex_version=__version__,
style=lock_configuration.style,
Expand Down
56 changes: 56 additions & 0 deletions pex/resolve/lockfile/download_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2022 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from __future__ import absolute_import

import os

from pex.common import atomic_directory
from pex.resolve.locked_resolve import Artifact
from pex.typing import TYPE_CHECKING
from pex.variables import ENV

if TYPE_CHECKING:
from typing import Optional

import attr # vendor:skip
else:
from pex.third_party import attr


@attr.s(frozen=True)
class DownloadedArtifact(object):
path = attr.ib() # type: str

def fingerprint(self):
# type: () -> str
with open(os.path.join(os.path.dirname(self.path), "sha1"), "rb") as fp:
jsirois marked this conversation as resolved.
Show resolved Hide resolved
return str(fp.read().decode("ascii"))


class DownloadManager(object):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

N.B.: The forthcoming lock resolver will have its own implementation of this; thus factoring up the logic of where to store downloads for re-use / cache-hits centrally here.

def __init__(self, pex_root=None):
# type: (Optional[str]) -> None
self._download_dir = os.path.join(pex_root or ENV.PEX_ROOT, "downloads")

def store(self, artifact):
# type: (Artifact) -> DownloadedArtifact

download_dir = os.path.join(self._download_dir, artifact.fingerprint.hash)
with atomic_directory(download_dir, exclusive=True) as atomic_dir:
Copy link
Member Author

@jsirois jsirois Mar 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is actually no need for this to be exclusive in this PR; however the lock resolver will also be implementing this and fetching URLs directly from PyPI most of the time. The exclusive lock will prevent two independent processes from trying to download the same artifact from PyPI concurrently and relieve a little pressure from PyPI as a result.

if not atomic_dir.is_finalized():
dest = os.path.join(atomic_dir.work_dir, artifact.filename)
internal_fingerprint = self.save(artifact, dest)
with open(os.path.join(atomic_dir.work_dir, "sha1"), "wb") as fp:
fp.write(internal_fingerprint.encode("ascii"))

return DownloadedArtifact(path=os.path.join(download_dir, artifact.filename))

def save(
self,
artifact, # type: Artifact
path, # type: str
):
# type: (...) -> str
"""Save the given `artifact` at `path` and return its sha1 hex digest."""
raise NotImplementedError()
34 changes: 32 additions & 2 deletions tests/integration/cli/commands/test_lock.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

import hashlib
import os
import re
from textwrap import dedent
Expand All @@ -22,6 +23,7 @@
from pex.testing import IS_MAC, IS_PYPY, PY310, PY_VER, IntegResults, ensure_python_interpreter
from pex.third_party.pkg_resources import Requirement
from pex.typing import TYPE_CHECKING
from pex.util import CacheHelper
from pex.version import __version__

if TYPE_CHECKING:
Expand Down Expand Up @@ -73,21 +75,49 @@ def test_create(tmpdir):
def test_create_style(tmpdir):
# type: (Any) -> None

pex_root = os.path.join(str(tmpdir), "pex_root")

def create_lock(
style, # type: str
interpreter_constraint=None, # type: Optional[str]
):
# type: (...) -> LockedRequirement
lock_file = os.path.join(str(tmpdir), "{}.lock".format(style))
args = ["lock", "create", "psutil==5.9.0", "-o", lock_file, "--style", style]
args = [
"lock",
"create",
"psutil==5.9.0",
"-o",
lock_file,
"--style",
style,
"--pex-root",
pex_root,
]
if interpreter_constraint:
args.extend(["--interpreter-constraint", interpreter_constraint])
run_pex3(*args).assert_success()
lock = lockfile.load(lock_file)
assert 1 == len(lock.locked_resolves)
locked_resolve = lock.locked_resolves[0]
assert 1 == len(locked_resolve.locked_requirements)
return locked_resolve.locked_requirements[0]
locked_requirement = locked_resolve.locked_requirements[0]
download_dir = os.path.join(
pex_root, "downloads", locked_requirement.artifact.fingerprint.hash
)
downloaded_artifact = os.path.join(download_dir, locked_requirement.artifact.filename)
assert os.path.exists(downloaded_artifact), (
"Expected the primary artifact to be downloaded as a side-effect of executing the lock "
"resolve."
)
downloaded_artifact_internal_fingerprint = os.path.join(download_dir, "sha1")
assert os.path.exists(downloaded_artifact_internal_fingerprint), (
"Expected the primary artifact to have an internal fingerprint established to short "
"circuit builds and installs."
)
with open(downloaded_artifact_internal_fingerprint) as fp:
assert CacheHelper.hash(downloaded_artifact, digest=hashlib.sha1()) == fp.read()
return locked_requirement

# See: https://pypi.org/project/psutil/5.9.0/#files

Expand Down