Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix --venv cached Python interpreter info. #2579

Merged
merged 3 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pex/cache/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def iter_transitive_dependents(self):

INTERPRETERS = Value(
"interpreters",
version=0,
version=1,
name="Interpreters",
description="Information about interpreters found on the system.",
)
Expand Down
103 changes: 83 additions & 20 deletions pex/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,53 @@ class Platlib(SitePackagesDir):
pass


_PATH_MAPPINGS = {}


@contextmanager
def path_mapping(
current_path, # type: str
final_path, # type: str
):
# type: (...) -> Iterator[None]

_PATH_MAPPINGS[current_path] = final_path
try:
yield
finally:
_PATH_MAPPINGS.pop(current_path)


@contextmanager
def path_mappings(mappings):
# type: (Mapping[str, str]) -> Iterator[None]

_PATH_MAPPINGS.update(mappings)
try:
yield
finally:
for current_path in mappings:
_PATH_MAPPINGS.pop(current_path)


def _adjust_to_final_path(path):
# type: (str) -> str
for current_path, final_path in _PATH_MAPPINGS.items():
if path.startswith(current_path):
prefix_pattern = re.escape(current_path)
return re.sub(prefix_pattern, final_path, path)
return path


def _adjust_to_current_path(path):
# type: (str) -> str
for current_path, final_path in _PATH_MAPPINGS.items():
if path.startswith(final_path):
prefix_pattern = re.escape(final_path)
return re.sub(prefix_pattern, current_path, path)
return path


class PythonIdentity(object):
class Error(Exception):
pass
Expand Down Expand Up @@ -333,14 +380,24 @@ def iter_tags():
site_packages = [] # type: List[SitePackagesDir]
for path in site_packages_paths:
if path == purelib:
site_packages.append(Purelib(path))
site_packages.append(Purelib(_adjust_to_current_path(path)))
elif path == platlib:
site_packages.append(Platlib(path))
site_packages.append(Platlib(_adjust_to_current_path(path)))
else:
site_packages.append(SitePackagesDir(path))
site_packages.append(SitePackagesDir(_adjust_to_current_path(path)))

return cls(
binary=_adjust_to_current_path(values.pop("binary")),
prefix=_adjust_to_current_path(values.pop("prefix")),
base_prefix=_adjust_to_current_path(values.pop("base_prefix")),
sys_path=[_adjust_to_current_path(entry) for entry in values.pop("sys_path")],
site_packages=site_packages,
extras_paths=[
_adjust_to_current_path(extras_path) for extras_path in values.pop("extras_paths")
],
paths={
name: _adjust_to_current_path(path) for name, path in values.pop("paths").items()
},
version=cast("Tuple[int, int, int]", version),
pypy_version=cast("Optional[Tuple[int, int, int]]", pypy_version),
supported_tags=iter_tags(),
Expand Down Expand Up @@ -402,26 +459,27 @@ def encode(self):
purelib = None # type: Optional[str]
platlib = None # type: Optional[str]
for entry in self._site_packages:
site_packages.append(entry.path)
entry_path = _adjust_to_final_path(entry.path)
site_packages.append(entry_path)
if isinstance(entry, Purelib):
purelib = entry.path
purelib = entry_path
elif isinstance(entry, Platlib):
platlib = entry.path
platlib = entry_path

values = dict(
__format_version__=self._FORMAT_VERSION,
binary=self._binary,
prefix=self._prefix,
base_prefix=self._base_prefix,
sys_path=self._sys_path,
binary=_adjust_to_final_path(self._binary),
prefix=_adjust_to_final_path(self._prefix),
base_prefix=_adjust_to_final_path(self._base_prefix),
sys_path=[_adjust_to_final_path(entry) for entry in self._sys_path],
site_packages=site_packages,
# N.B.: We encode purelib and platlib site-packages entries on the side like this to
# ensure older Pex versions that did not know the distinction can still use the
# interpreter cache.
purelib=purelib,
platlib=platlib,
extras_paths=self._extras_paths,
paths=self._paths,
extras_paths=[_adjust_to_final_path(extras_path) for extras_path in self._extras_paths],
paths={name: _adjust_to_final_path(path) for name, path in self._paths.items()},
packaging_version=self._packaging_version,
python_tag=self._python_tag,
abi_tag=self._abi_tag,
Expand Down Expand Up @@ -1068,20 +1126,25 @@ def create_interpreter(
import os
import sys

from pex import interpreter
from pex.atomic_directory import atomic_directory
from pex.common import safe_open
from pex.interpreter import PythonIdentity


encoded_identity = PythonIdentity.get(binary={binary!r}).encode()
with atomic_directory({cache_dir!r}) as cache_dir:
if not cache_dir.is_finalized():
with safe_open(
os.path.join(cache_dir.work_dir, {info_file!r}), 'w'
) as fp:
fp.write(encoded_identity)
with interpreter.path_mappings({path_mappings!r}):
encoded_identity = PythonIdentity.get(binary={binary!r}).encode()
with atomic_directory({cache_dir!r}) as cache_dir:
if not cache_dir.is_finalized():
with safe_open(
os.path.join(cache_dir.work_dir, {info_file!r}), 'w'
) as fp:
fp.write(encoded_identity)
""".format(
binary=binary, cache_dir=cache_dir, info_file=cls.INTERP_INFO_FILE
path_mappings=_PATH_MAPPINGS,
binary=binary,
cache_dir=cache_dir,
info_file=cls.INTERP_INFO_FILE,
)
),
],
Expand Down
170 changes: 90 additions & 80 deletions pex/pex_bootstrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os
import sys

from pex import pex_warnings
from pex import interpreter, pex_warnings
from pex.atomic_directory import atomic_directory
from pex.cache import access as cache_access
from pex.cache.dirs import CacheDir
Expand Down Expand Up @@ -527,92 +527,102 @@ def ensure_venv(
if not venv.is_finalized():
from pex.venv.virtualenv import Virtualenv

virtualenv = Virtualenv.create_atomic(
venv_dir=venv,
interpreter=pex.interpreter,
copies=pex_info.venv_copies,
system_site_packages=pex_info.venv_system_site_packages,
prompt=os.path.basename(ENV.PEX) if ENV.PEX else None,
)
with interpreter.path_mapping(venv.work_dir, venv_dir):
virtualenv = Virtualenv.create_atomic(
venv_dir=venv,
interpreter=pex.interpreter,
copies=pex_info.venv_copies,
system_site_packages=pex_info.venv_system_site_packages,
prompt=os.path.basename(ENV.PEX) if ENV.PEX else None,
)

pex_path = os.path.abspath(pex.path())

# A sha1 hash is 160 bits -> 20 bytes -> 40 hex characters. We start with 8 characters
# (32 bits) of entropy since that is short and _very_ unlikely to collide with another
# PEX venv on this machine. If we still collide after using the whole sha1 (for a total
# of 33 collisions), then the universe is broken and we raise. It's the least we can do.
venv_hash = hashlib.sha1(venv_dir.encode("utf-8")).hexdigest()
collisions = []
for chars in range(8, len(venv_hash) + 1):
entropy = venv_hash[:chars]
short_venv_dir = CacheDir.VENVS.path("s", entropy, pex_root=pex_info.pex_root)
with atomic_directory(short_venv_dir) as short_venv:
if short_venv.is_finalized():
collisions.append(short_venv_dir)
if entropy == venv_hash:
raise RuntimeError(
"The venv for {pex} at {venv} has hash collisions with {count} "
"other {venvs}!\n{collisions}".format(
pex=pex_path,
venv=venv_dir,
count=len(collisions),
venvs=pluralize(collisions, "venv"),
collisions="\n".join(
"{index}.) {venv_path}".format(
index=index, venv_path=os.path.realpath(path)
)
for index, path in enumerate(collisions, start=1)
),
pex_path = os.path.abspath(pex.path())

# A sha1 hash is 160 bits -> 20 bytes -> 40 hex characters. We start with 8
# characters (32 bits) of entropy since that is short and _very_ unlikely to collide
# with another PEX venv on this machine. If we still collide after using the whole
# sha1 (for a total of 33 collisions), then the universe is broken and we raise.
# It's the least we can do.
venv_hash = hashlib.sha1(venv_dir.encode("utf-8")).hexdigest()
collisions = []
for chars in range(8, len(venv_hash) + 1):
entropy = venv_hash[:chars]
short_venv_dir = CacheDir.VENVS.path("s", entropy, pex_root=pex_info.pex_root)
with atomic_directory(short_venv_dir) as short_venv:
if short_venv.is_finalized():
collisions.append(short_venv_dir)
if entropy == venv_hash:
raise RuntimeError(
"The venv for {pex} at {venv} has hash collisions with {count} "
"other {venvs}!\n{collisions}".format(
pex=pex_path,
venv=venv_dir,
count=len(collisions),
venvs=pluralize(collisions, "venv"),
collisions="\n".join(
"{index}.) {venv_path}".format(
index=index, venv_path=os.path.realpath(path)
)
for index, path in enumerate(collisions, start=1)
),
)
)
continue

with interpreter.path_mapping(short_venv.work_dir, short_venv_dir):
os.symlink(
os.path.relpath(venv_dir, short_venv_dir),
os.path.join(short_venv.work_dir, "venv"),
)
continue

os.symlink(venv_dir, os.path.join(short_venv.work_dir, "venv"))

# Loose PEXes don't need to unpack themselves to the PEX_ROOT before running;
# so we'll not have a stable base there to symlink from. As such, always copy
# for loose PEXes to ensure the PEX_ROOT venv is stable in the face of
# modification of the source loose PEX.
copy_mode = (
CopyMode.SYMLINK
if (pex.layout != Layout.LOOSE and not pex_info.venv_site_packages_copies)
else CopyMode.LINK
)

shebang = installer.populate_venv_from_pex(
virtualenv,
pex,
bin_path=pex_info.venv_bin_path,
python=os.path.join(
short_venv_dir,
"venv",
"bin",
os.path.basename(virtualenv.interpreter.binary),
),
collisions_ok=collisions_ok,
copy_mode=copy_mode,
hermetic_scripts=pex_info.venv_hermetic_scripts,
)
# Loose PEXes don't need to unpack themselves to the PEX_ROOT before
# running; so we'll not have a stable base there to symlink from. As
# such, always copy for loose PEXes to ensure the PEX_ROOT venv is
# stable in the face of modification of the source loose PEX.
copy_mode = (
CopyMode.SYMLINK
if (
pex.layout != Layout.LOOSE
and not pex_info.venv_site_packages_copies
)
else CopyMode.LINK
)

# There are popular Linux distributions with shebang length limits
# (BINPRM_BUF_SIZE in /usr/include/linux/binfmts.h) set at 128 characters, so
# we warn in the _very_ unlikely case that our shortened shebang is longer than
# this.
if len(shebang) > 128:
pex_warnings.warn(
"The venv for {pex} at {venv} has script shebangs of {shebang!r} with "
"{count} characters. On some systems this may be too long and cause "
"problems running the venv scripts. You may be able adjust PEX_ROOT "
"from {pex_root} to a shorter path as a work-around.".format(
pex=pex_path,
venv=venv_dir,
shebang=shebang,
count=len(shebang),
pex_root=pex_info.pex_root,
shebang = installer.populate_venv_from_pex(
virtualenv,
pex,
bin_path=pex_info.venv_bin_path,
python=os.path.join(
short_venv_dir,
"venv",
"bin",
os.path.basename(virtualenv.interpreter.binary),
),
collisions_ok=collisions_ok,
copy_mode=copy_mode,
hermetic_scripts=pex_info.venv_hermetic_scripts,
)
)

break
# There are popular Linux distributions with shebang length limits
# (BINPRM_BUF_SIZE in /usr/include/linux/binfmts.h) set at 128
# characters, so we warn in the _very_ unlikely case that our shortened
# shebang is longer than this.
if len(shebang) > 128:
pex_warnings.warn(
"The venv for {pex} at {venv} has script shebangs of "
"{shebang!r} with {count} characters. On some systems this may "
"be too long and cause problems running the venv scripts. You "
"may be able adjust PEX_ROOT from {pex_root} to a shorter path "
"as a work-around.".format(
pex=pex_path,
venv=venv_dir,
shebang=shebang,
count=len(shebang),
pex_root=pex_info.pex_root,
)
)

break

return VenvPex(venv_dir, hermetic_scripts=pex_info.venv_hermetic_scripts)

Expand Down
Loading