Skip to content

Commit

Permalink
Support for merging the cache image from shards. (#2675)
Browse files Browse the repository at this point in the history
Use this support to alter the CI job into a multi-sharded cache image
build job.

Fixes #2233.
  • Loading branch information
jsirois authored Feb 13, 2025
1 parent 7e2483d commit f129d20
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 34 deletions.
57 changes: 54 additions & 3 deletions .github/workflows/build-cache-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,64 @@ defaults:
env:
SCIENCE_AUTH_API_GITHUB_COM_BEARER: ${{ secrets.GITHUB_TOKEN }}
jobs:
build-cache-images:
name: Build ./dtox.sh Cache for ${{ matrix.tox-env }}
runs-on: ubuntu-24.04
strategy:
matrix:
include:
# TODO(John Sirois): Automate syncing these with ci.yml or else automate a sync check.
- tox-env: py27-pip20
- tox-env: py313-pip25_0_1
- tox-env: pypy310-pip24_3_1
- tox-env: py27-pip20-integration
- tox-env: py38-pip22_3_1-integration
- tox-env: py313-pip25_0_1-integration
- tox-env: py314-pip25_0_1-integration
- tox-env: pypy310-pip24_3_1-integration
steps:
- name: Checkout Pex
uses: actions/checkout@v4
- name: Setup Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Build & Export Cache Image for ${{ matrix.tox-env }}
run: |
python -mvenv .venv
source .venv/bin/activate
python -V
pip install -U pip
pip -V
pip install tox
tox --version
tox -e build-cache-image -- \
--color \
--build-style build \
--dist-dir export \
--post-action export \
--tox-env ${{ matrix.tox-env }}
- uses: actions/upload-artifact@v4
with:
path: |
export/*.tar
.gitignore
build-cache-image:
name: Build ./dtox.sh Cache Image
name: Merge and push unified ./dtox.sh Cache Image
runs-on: ubuntu-24.04
needs: build-cache-images
steps:
- name: Checkout Pex
uses: actions/checkout@v4
- name: Setup Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Build & Push Cache Image
- uses: actions/download-artifact@v4
- name: Merge & Push Cache Image
run: |
python -mvenv .venv
source .venv/bin/activate
Expand All @@ -35,4 +82,8 @@ jobs:
echo "${{ secrets.GITHUB_TOKEN }}" | \
docker login ghcr.io -u ${{ github.actor }} --password-stdin
tox -e build-cache-image -- --color --push
tox -e build-cache-image -- \
--color \
--build-style merge \
--dist-dir export \
--post-action push
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ jobs:
strategy:
matrix:
include:
# N.B.: When editing these, also edit build-cache-image.yml.

# Unit tests:
# -----------
- tox-env: py27-pip20
Expand Down
6 changes: 4 additions & 2 deletions docker/cache/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# A data image with the necessary binaries and libraries to develop pex.

# Populate the ~/.pex_dev cache.
FROM ghcr.io/pex-tool/pex/base:latest as cache
FROM ghcr.io/pex-tool/pex/base:latest AS cache

ARG FINGERPRINT=unset
ARG PEX_REPO=https://github.com/pex-tool/pex
ARG GIT_REF=HEAD

Expand All @@ -15,7 +16,8 @@ RUN git clone "${PEX_REPO}" /development/pex && \

WORKDIR /development/pex
COPY populate_cache.sh /root/
RUN /root/populate_cache.sh /development/pex_dev "${TOX_ENVS}"
RUN /root/populate_cache.sh /development/pex_dev "${TOX_ENVS}" && \
touch "/development/pex_dev/.fingerprint-${FINGERPRINT}"

# Grab just the ~/.pex_dev cache files for the final data-only image.
FROM scratch
Expand Down
228 changes: 200 additions & 28 deletions scripts/build-cache-image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,101 @@

from __future__ import annotations

import atexit
import glob
import hashlib
import itertools
import json
import logging
import os
import shutil
import subprocess
import sys
import tarfile
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from enum import Enum
from pathlib import Path, PurePath
from subprocess import CalledProcessError
from tempfile import mkdtemp
from typing import Any, Iterable, Iterator

import coloredlogs
import colors
import yaml


class BuildStyle(Enum):
BUILD = "build"
MERGE = "merge"

def __str__(self) -> str:
return self.value


class PostBuildAction(Enum):
PUSH = "push"
EXPORT = "export"

def __str__(self) -> str:
return self.value


_CACHE_INPUTS = (
Path("docker") / "cache",
Path("testing") / "__init__.py", # Sets up fixed set of pyenv interpreters for ITs.
Path("testing") / "devpi.py",
Path("testing") / "devpi-server.lock",
)


def fingerprint_cache_inputs(image_id: str | None = None) -> str:
def iter_files(path: Path) -> Iterator[Path]:
if path.is_dir():
for root, dirs, files in os.walk(path):
for f in files:
yield Path(root) / f
else:
yield path

hashes = {
str(path): hashlib.sha256(path.read_bytes()).hexdigest()
for path in sorted(
itertools.chain.from_iterable(iter_files(cache_input) for cache_input in _CACHE_INPUTS)
)
}

return hashlib.sha256(
json.dumps({"image_id": image_id, "hashes": hashes}, sort_keys=True).encode("utf-8")
).hexdigest()


def export_tarball_path(sub_image: str | None = None) -> Path:
path = Path(mkdtemp()) / f"cache-{sub_image or 'all'}.tar"
atexit.register(shutil.rmtree, str(path), ignore_errors=True)
return path


def create_image_tag(tag: str, sub_image: str | None = None) -> str:
image = "ghcr.io/pex-tool/pex/cache"
if sub_image:
image = f"{image}/{sub_image}"
return f"{image}:{tag}"


def build_cache_image(
tox_envs: list[str], tag: str, pex_repo: str, git_ref: str, push: bool = False
tox_envs: Iterable[str],
image_id: str | None,
image_tag: str,
pex_repo: str,
git_ref: str,
) -> None:
image_tag = f"ghcr.io/pex-tool/pex/cache:{tag}"
subprocess.run(
args=[
"docker",
"buildx",
"build",
"--progress",
"plain",
"--build-arg",
f"FINGERPRINT={fingerprint_cache_inputs(image_id=image_id)}",
"--build-arg",
f"PEX_REPO={pex_repo}",
"--build-arg",
Expand All @@ -33,11 +109,9 @@ def build_cache_image(
],
check=True,
)
if push:
subprocess.run(args=["docker", "push", image_tag], check=True)


def main() -> None:
def main() -> Any:
parser = ArgumentParser(
formatter_class=ArgumentDefaultsHelpFormatter,
description=(
Expand Down Expand Up @@ -73,12 +147,37 @@ def main() -> None:
help="The git ref to use within `--pex-repo`.",
)
parser.add_argument(
"--push",
default=False,
action="store_true",
help="Push the image to the registry after building and tagging it.",
"--build-style",
default=BuildStyle.BUILD,
choices=BuildStyle,
type=BuildStyle,
help="The method to use to build the cache image.",
)
parser.add_argument(
"--post-action",
dest="post_build_action",
default=None,
choices=PostBuildAction,
type=PostBuildAction,
help="An action to execute after building and tagging the cache image.",
)
parser.add_argument(
"--dist-dir",
default=Path("dist"),
type=Path,
help="The directory to import and export image tarballs.",
)
parser.add_argument(
"--tox-env",
dest="tox_envs",
action="append",
default=[],
help=(
"The tox environments to execute to build the cache image. By default, all Linux test "
"environments run in CI are selected. The option can either be repeated or environment "
"names can be joined by commas."
),
)

options = parser.parse_args()

coloredlogs.install(
Expand All @@ -89,27 +188,100 @@ def main() -> None:
logging.root.level, "Logging configured at level {level}.".format(level=options.log_level)
)

with (Path(".github") / "workflows" / "ci.yml").open() as fp:
data = yaml.full_load(fp)
tox_envs = sorted(
set(
sub_image: str | None = None
if options.build_style is BuildStyle.MERGE:
image_tag = create_image_tag(options.tag)
chroot = Path(mkdtemp())
atexit.register(shutil.rmtree, str(chroot), ignore_errors=True)

tarballs = glob.glob(str(options.dist_dir / "cache-*.tar"))
if len(tarballs) == 1:
merged_tarball = Path(tarballs[0])
else:
for index, tarball in enumerate(tarballs, start=1):
logger.info(f"Extracting {index} of {len(tarballs)} tarballs at {tarball}...")
with tarfile.open(tarball) as tf:
tf.extractall(chroot)

logger.info(f"Merging {len(tarballs)} extracted tarballs...")
merged_tarball = export_tarball_path()
with tarfile.open(merged_tarball, "w") as tf:
tf.add(chroot, arcname="/")

logger.info(f"Importing merged tarball to {image_tag}...")
subprocess.run(args=["docker", "import", merged_tarball, image_tag], check=True)
else:
with (Path(".github") / "workflows" / "ci.yml").open() as fp:
data = yaml.full_load(fp)
all_tox_envs = frozenset(
entry["tox-env"]
for entry in data["jobs"]["linux-tests"]["strategy"]["matrix"]["include"]
)
)
selected_tox_envs = (
frozenset(
itertools.chain.from_iterable(tox_envs.split(",") for tox_envs in options.tox_envs)
)
if options.tox_envs
else all_tox_envs
)
bad_tox_envs = selected_tox_envs - all_tox_envs
if bad_tox_envs:
return colors.red(
os.linesep.join(
(
"The following selected tox envs are not used in Linux CI test shards:",
*(f" {bad_tox_env}" for bad_tox_env in sorted(bad_tox_envs)),
"Valid tox envs are:",
*(f" {valid_tox_env}" for valid_tox_env in sorted(all_tox_envs)),
)
)
)
tox_envs = sorted(selected_tox_envs)

logger.info(f"Building caches for {len(tox_envs)} tox environments.")
for tox_env in tox_envs:
logger.debug(tox_env)
if options.tox_envs:
sub_image = (
tox_envs[0]
if len(tox_envs) == 1
else hashlib.sha256("|".join(tox_envs).encode("utf-8")).hexdigest()
)

build_cache_image(
tox_envs,
tag=options.tag,
pex_repo=options.pex_repo,
git_ref=options.git_ref,
push=options.push,
)
image_tag = create_image_tag(options.tag, sub_image=sub_image)
logger.info(f"Building caches for {len(tox_envs)} tox environments.")
for tox_env in tox_envs:
logger.debug(tox_env)

build_cache_image(
tox_envs,
image_id=sub_image,
image_tag=image_tag,
pex_repo=options.pex_repo,
git_ref=options.git_ref,
)

if options.post_build_action is PostBuildAction.EXPORT:
cache_tar = export_tarball_path(sub_image=sub_image)

container_name = cache_tar.stem
subprocess.run(args=["docker", "remove", "--force", container_name])
subprocess.run(
args=["docker", "create", "--name", container_name, image_tag, "true"], check=True
)

subprocess.run(args=["docker", "export", container_name, "--output", cache_tar], check=True)
subprocess.run(args=["docker", "remove", container_name])

options.dist_dir.mkdir(parents=True, exist_ok=True)
dst = options.dist_dir / cache_tar.name
shutil.move(cache_tar, dst)
os.chmod(dst, 0o644)
logger.info(f"Exported cache image to {dst}.")

if options.post_build_action is PostBuildAction.PUSH:
subprocess.run(args=["docker", "push", image_tag], check=True)


if __name__ == "__main__":
main()
try:
sys.exit(main())
except CalledProcessError as e:
sys.exit(colors.red(str(e)))
2 changes: 1 addition & 1 deletion testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,7 @@ def ensure_python_distribution(version):
# linking the wrong libpython, force `RPATH`, which is searched 1st by the linker,
# with with `--disable-new-dtags`.
env["LDFLAGS"] = "-Wl,--disable-new-dtags"
subprocess.check_call([pyenv, "install", "--keep", version], env=env)
subprocess.check_call([pyenv, "install", version], env=env)
subprocess.check_call([pip, "install", "-U", "pip<22.1"])

major, minor = version.split(".")[:2]
Expand Down
Loading

0 comments on commit f129d20

Please sign in to comment.