Skip to content

Commit

Permalink
[ci][docker] Send a PR to bump the Docker images nightly
Browse files Browse the repository at this point in the history
See #11768 for details

This adds a GitHub Action to check for the latest images on Docker Hub via the Docker API and update the `Jenkinsfile` accordingly. It sends this in as PR for a committer to review and merge.
  • Loading branch information
driazati committed Jun 22, 2022
1 parent 2708b6c commit eb87aea
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 3 deletions.
31 changes: 31 additions & 0 deletions .github/workflows/nightly_docker_update.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

name: Nightly Docker Update
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:

concurrency:
group: nightly-docker-update
cancel-in-progress: true

jobs:
open_update_pr:
permissions:
actions: write
checks: write
contents: write
id-token: write
issues: write
pull-requests: write
statuses: write
if: github.repository == 'driazati/tvm'
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- name: Open PR to update Docker images
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
set -eux
python tests/scripts/open_docker_update_pr.py
113 changes: 113 additions & 0 deletions tests/python/ci/test_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@
from test_utils import REPO_ROOT


def parameterize_named(*values):
keys = list(values[0].keys())
if len(keys) == 1:
return pytest.mark.parametrize(",".join(keys), [d[keys[0]] for d in values])

return pytest.mark.parametrize(",".join(keys), [tuple(d.values()) for d in values])


class TempGit:
def __init__(self, cwd):
self.cwd = cwd
Expand Down Expand Up @@ -788,6 +796,111 @@ def run(type, data, check):
)


@parameterize_named(
dict(
tlcpackstaging_body={
"results": [
{
"last_updated": "2022-06-01T00:00:00.123456Z",
"name": "abc-abc-123",
},
]
},
tlcpack_body={
"results": [
{
"last_updated": "2022-06-01T00:00:00.123456Z",
"name": "abc-abc-123",
},
]
},
expected="Tag names were the same, no update needed",
),
dict(
tlcpackstaging_body={
"results": [
{
"last_updated": "2022-06-01T00:00:00.123456Z",
"name": "abc-abc-234",
},
]
},
tlcpack_body={
"results": [
{
"last_updated": "2022-06-01T00:00:00.123456Z",
"name": "abc-abc-123",
},
]
},
expected="Using tlcpackstaging tag on tlcpack",
),
dict(
tlcpackstaging_body={
"results": [
{
"last_updated": "2022-06-01T00:00:00.123456Z",
"name": "abc-abc-123",
},
]
},
tlcpack_body={
"results": [
{
"last_updated": "2022-06-01T00:01:00.123456Z",
"name": "abc-abc-234",
},
]
},
expected="Found newer image, using: tlcpack",
),
)
def test_open_docker_update_pr(tmpdir_factory, tlcpackstaging_body, tlcpack_body, expected):
tag_script = REPO_ROOT / "tests" / "scripts" / "open_docker_update_pr.py"

git = TempGit(tmpdir_factory.mktemp("tmp_git_dir"))
git.run("init")
git.run("config", "user.name", "ci")
git.run("config", "user.email", "[email protected]")
git.run("checkout", "-b", "main")
git.run("remote", "add", "origin", "https://github.com/apache/tvm.git")
images = [
"ci_lint",
"ci_gpu",
"ci_cpu",
"ci_wasm",
"ci_i386",
"ci_qemu",
"ci_arm",
"ci_hexagon",
]

docker_data = {}
for image in images:
docker_data[f"repositories/tlcpackstaging/{image}/tags"] = tlcpackstaging_body
docker_data[f"repositories/tlcpack/{image.replace('_', '-')}/tags"] = tlcpack_body

proc = subprocess.run(
[
str(tag_script),
"--dry-run",
"--testing-docker-data",
json.dumps(docker_data),
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
encoding="utf-8",
cwd=git.cwd,
env={"GITHUB_TOKEN": "1234"},
check=False,
)

if proc.returncode != 0:
raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}")

assert_in(expected, proc.stdout)


@pytest.mark.parametrize(
"changed_files,name,check,expected_code",
[
Expand Down
11 changes: 8 additions & 3 deletions tests/scripts/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import re
import base64
import logging
from urllib import request
from urllib import request, error
from typing import Dict, Tuple, Any, Optional, List


Expand Down Expand Up @@ -85,8 +85,13 @@ def _request(self, full_url: str, body: Dict[str, Any], method: str) -> Dict[str
data = data.encode("utf-8")
req.add_header("Content-Length", len(data))

with request.urlopen(req, data) as response:
response = json.loads(response.read())
try:
with request.urlopen(req, data) as response:
response = json.loads(response.read())
except error.HTTPError as e:
logging.info(f"Error response: {e.read().decode()}")
raise e

return response

def put(self, url: str, data: Dict[str, Any]) -> Dict[str, Any]:
Expand Down
188 changes: 188 additions & 0 deletions tests/scripts/open_docker_update_pr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

import argparse
import re
import logging
import datetime
import os
import json
from urllib import error
from typing import List, Dict, Any, Optional, Callable
from git_utils import git, parse_remote, GitHubRepo
from cmd_utils import REPO_ROOT, init_log
from should_rebuild_docker import docker_api

JENKINSFILE = REPO_ROOT / "jenkins" / "Jenkinsfile.j2"
GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]
BRANCH = "nightly-docker-update"


def _testing_docker_api(data: Dict[str, Any]) -> Callable[[str], Dict[str, Any]]:
"""Returns a function that can be used in place of docker_api"""

def mock(url: str) -> Dict[str, Any]:
if url in data:
return data[url]
else:
raise error.HTTPError(url, 404, f"Not found: {url}", {}, None)

return mock


def parse_docker_date(d: str) -> datetime.datetime:
"""Turn a date string from the Docker API into a datetime object"""
return datetime.datetime.strptime(d, "%Y-%m-%dT%H:%M:%S.%fZ")


def latest_tag(user: str, repo: str) -> List[Dict[str, Any]]:
"""
Queries Docker Hub and finds the most recent tag for the specified image/repo pair
"""
r = docker_api(f"repositories/{user}/{repo}/tags")
results = r["results"]

for result in results:
result["last_updated"] = parse_docker_date(result["last_updated"])

results = list(sorted(results, key=lambda d: d["last_updated"]))
return results[-1]


def latest_tlcpackstaging_image(source: str) -> Optional[str]:
"""
Finds the latest full tag to use in the Jenkinsfile or returns None if no
update is needed
"""
name, current_tag = source.split(":")
user, repo = name.split("/")
logging.info(
f"Running with name: {name}, current_tag: {current_tag}, user: {user}, repo: {repo}"
)

staging_repo = repo.replace("-", "_")
latest_tlcpackstaging_tag = latest_tag(user="tlcpackstaging", repo=staging_repo)
logging.info(f"Found latest tlcpackstaging tag:\n{latest_tlcpackstaging_tag}")

if latest_tlcpackstaging_tag["name"] == current_tag:
logging.info(f"tlcpackstaging tag is the same as the one in the Jenkinsfile")

latest_tlcpack_tag = latest_tag(user="tlcpack", repo=repo)
logging.info(f"Found latest tlcpack tag:\n{latest_tlcpack_tag}")

if latest_tlcpack_tag["name"] == latest_tlcpackstaging_tag["name"]:
logging.info("Tag names were the same, no update needed")
return None

if latest_tlcpack_tag["last_updated"] > latest_tlcpackstaging_tag["last_updated"]:
new_spec = f"tlcpack/{repo}:{latest_tlcpack_tag['name']}"
else:
# Even if the image doesn't exist in tlcpack, it will fall back to tlcpackstaging
# so hardcode the username here
new_spec = f"tlcpack/{repo}:{latest_tlcpackstaging_tag['name']}"
logging.info("Using tlcpackstaging tag on tlcpack")

logging.info(f"Found newer image, using: {new_spec}")
return new_spec


if __name__ == "__main__":
init_log()
help = "Open a PR to update the Docker images to use the latest available in tlcpackstaging"
parser = argparse.ArgumentParser(description=help)
parser.add_argument("--remote", default="origin", help="ssh remote to parse")
parser.add_argument("--dry-run", action="store_true", help="don't send PR to GitHub")
parser.add_argument("--testing-docker-data", help="JSON data to mock Docker Hub API response")
args = parser.parse_args()

# Install test mock if necessary
if args.testing_docker_data is not None:
docker_api = _testing_docker_api(data=json.loads(args.testing_docker_data))

remote = git(["config", "--get", f"remote.{args.remote}.url"])
user, repo = parse_remote(remote)

# Read the existing images from the Jenkinsfile
logging.info(f"Reading {JENKINSFILE}")
with open(JENKINSFILE) as f:
content = f.readlines()

# Build a new Jenkinsfile with the latest images from tlcpack or tlcpackstaging
new_content = []
for line in content:
m = re.match(r"^(ci_[a-zA-Z0-9]+) = \'(.*)\'", line.strip())
if m is not None:
logging.info(f"Found match on line {line.strip()}")
groups = m.groups()
new_image = latest_tlcpackstaging_image(groups[1])
if new_image is None:
logging.info(f"No new image found")
new_content.append(line)
else:
logging.info(f"Using new image {new_image}")
new_content.append(f"{groups[0]} = '{new_image}'\n")
else:
new_content.append(line)

# Write out the new content
if args.dry_run:
logging.info(f"Dry run, would have written new content to {JENKINSFILE}")
else:
logging.info(f"Writing new content to {JENKINSFILE}")
with open(JENKINSFILE, "w") as f:
f.write("".join(new_content))

# Publish the PR
title = "[ci][docker] Nightly Docker image update"
body = "This bumps the Docker images to the latest versions from Docker Hub."
message = f"{title}\n\n\n{body}"

if args.dry_run:
logging.info("Dry run, would have committed Jenkinsfile")
else:
logging.info(f"Creating git commit")
git(["checkout", "-B", BRANCH])
git(["add", str(JENKINSFILE.relative_to(REPO_ROOT))])
git(["config", "user.name", "tvm-bot"])
git(["config", "user.email", "[email protected]"])
git(["commit", "-m", message])
git(["push", "--set-upstream", args.remote, BRANCH, "--force"])

logging.info(f"Sending PR to GitHub")
github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN)
data = {
"title": title,
"body": body,
"head": BRANCH,
"base": "main",
"maintainer_can_modify": True,
}
url = "pulls"
if args.dry_run:
logging.info(f"Dry run, would have sent {data} to {url}")
else:
try:
github.post(url, data=data)
except error.HTTPError as e:
# Ignore the exception if the PR already exists (which gives a 422). The
# existing PR will have been updated in place
if e.code == 422:
logging.info("PR already exists, ignoring error")
logging.exception(e)
else:
raise e

0 comments on commit eb87aea

Please sign in to comment.