diff --git a/.github/workflows/nightly_docker_update.yml b/.github/workflows/nightly_docker_update.yml new file mode 100644 index 000000000000..08945555af34 --- /dev/null +++ b/.github/workflows/nightly_docker_update.yml @@ -0,0 +1,31 @@ + +name: Nightly Docker Update +on: + schedule: + - cron: "0 0 * * *" + workflow_dispatch: + +concurrency: + group: nightly-docker-update + cancel-in-progress: true + +jobs: + open_update_pr: + permissions: + actions: write + checks: write + contents: write + id-token: write + issues: write + pull-requests: write + statuses: write + if: github.repository == 'apache/tvm' + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: Open PR to update Docker images + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + set -eux + python tests/scripts/open_docker_update_pr.py diff --git a/tests/python/ci/test_ci.py b/tests/python/ci/test_ci.py index b712b6780cd7..56edaa5a5cae 100644 --- a/tests/python/ci/test_ci.py +++ b/tests/python/ci/test_ci.py @@ -27,6 +27,14 @@ from test_utils import REPO_ROOT +def parameterize_named(*values): + keys = list(values[0].keys()) + if len(keys) == 1: + return pytest.mark.parametrize(",".join(keys), [d[keys[0]] for d in values]) + + return pytest.mark.parametrize(",".join(keys), [tuple(d.values()) for d in values]) + + class TempGit: def __init__(self, cwd): self.cwd = cwd @@ -788,6 +796,111 @@ def run(type, data, check): ) +@parameterize_named( + dict( + tlcpackstaging_body={ + "results": [ + { + "last_updated": "2022-06-01T00:00:00.123456Z", + "name": "abc-abc-123", + }, + ] + }, + tlcpack_body={ + "results": [ + { + "last_updated": "2022-06-01T00:00:00.123456Z", + "name": "abc-abc-123", + }, + ] + }, + expected="Tag names were the same, no update needed", + ), + dict( + tlcpackstaging_body={ + "results": [ + { + "last_updated": "2022-06-01T00:00:00.123456Z", + "name": "abc-abc-234", + }, + ] + }, + tlcpack_body={ + "results": [ + { + "last_updated": "2022-06-01T00:00:00.123456Z", + "name": "abc-abc-123", + }, + ] + }, + expected="Using tlcpackstaging tag on tlcpack", + ), + dict( + tlcpackstaging_body={ + "results": [ + { + "last_updated": "2022-06-01T00:00:00.123456Z", + "name": "abc-abc-123", + }, + ] + }, + tlcpack_body={ + "results": [ + { + "last_updated": "2022-06-01T00:01:00.123456Z", + "name": "abc-abc-234", + }, + ] + }, + expected="Found newer image, using: tlcpack", + ), +) +def test_open_docker_update_pr(tmpdir_factory, tlcpackstaging_body, tlcpack_body, expected): + tag_script = REPO_ROOT / "tests" / "scripts" / "open_docker_update_pr.py" + + git = TempGit(tmpdir_factory.mktemp("tmp_git_dir")) + git.run("init") + git.run("config", "user.name", "ci") + git.run("config", "user.email", "email@example.com") + git.run("checkout", "-b", "main") + git.run("remote", "add", "origin", "https://github.com/apache/tvm.git") + images = [ + "ci_lint", + "ci_gpu", + "ci_cpu", + "ci_wasm", + "ci_i386", + "ci_qemu", + "ci_arm", + "ci_hexagon", + ] + + docker_data = {} + for image in images: + docker_data[f"repositories/tlcpackstaging/{image}/tags"] = tlcpackstaging_body + docker_data[f"repositories/tlcpack/{image.replace('_', '-')}/tags"] = tlcpack_body + + proc = subprocess.run( + [ + str(tag_script), + "--dry-run", + "--testing-docker-data", + json.dumps(docker_data), + ], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding="utf-8", + cwd=git.cwd, + env={"GITHUB_TOKEN": "1234"}, + check=False, + ) + + if proc.returncode != 0: + raise RuntimeError(f"Process failed:\nstdout:\n{proc.stdout}\n\nstderr:\n{proc.stderr}") + + assert_in(expected, proc.stdout) + + @pytest.mark.parametrize( "changed_files,name,check,expected_code", [ diff --git a/tests/scripts/git_utils.py b/tests/scripts/git_utils.py index c5ea8d85e071..aeaca164c2c2 100644 --- a/tests/scripts/git_utils.py +++ b/tests/scripts/git_utils.py @@ -21,7 +21,7 @@ import re import base64 import logging -from urllib import request +from urllib import request, error from typing import Dict, Tuple, Any, Optional, List @@ -85,8 +85,13 @@ def _request(self, full_url: str, body: Dict[str, Any], method: str) -> Dict[str data = data.encode("utf-8") req.add_header("Content-Length", len(data)) - with request.urlopen(req, data) as response: - response = json.loads(response.read()) + try: + with request.urlopen(req, data) as response: + response = json.loads(response.read()) + except error.HTTPError as e: + logging.info(f"Error response: {e.read().decode()}") + raise e + return response def put(self, url: str, data: Dict[str, Any]) -> Dict[str, Any]: diff --git a/tests/scripts/open_docker_update_pr.py b/tests/scripts/open_docker_update_pr.py new file mode 100755 index 000000000000..6c1bcfa5285a --- /dev/null +++ b/tests/scripts/open_docker_update_pr.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import argparse +import re +import logging +import datetime +import os +import json +from urllib import error +from typing import List, Dict, Any, Optional, Callable +from git_utils import git, parse_remote, GitHubRepo +from cmd_utils import REPO_ROOT, init_log +from should_rebuild_docker import docker_api + +JENKINSFILE = REPO_ROOT / "jenkins" / "Jenkinsfile.j2" +GITHUB_TOKEN = os.environ["GITHUB_TOKEN"] +BRANCH = "nightly-docker-update" + + +def _testing_docker_api(data: Dict[str, Any]) -> Callable[[str], Dict[str, Any]]: + """Returns a function that can be used in place of docker_api""" + + def mock(url: str) -> Dict[str, Any]: + if url in data: + return data[url] + else: + raise error.HTTPError(url, 404, f"Not found: {url}", {}, None) + + return mock + + +def parse_docker_date(d: str) -> datetime.datetime: + """Turn a date string from the Docker API into a datetime object""" + return datetime.datetime.strptime(d, "%Y-%m-%dT%H:%M:%S.%fZ") + + +def latest_tag(user: str, repo: str) -> List[Dict[str, Any]]: + """ + Queries Docker Hub and finds the most recent tag for the specified image/repo pair + """ + r = docker_api(f"repositories/{user}/{repo}/tags") + results = r["results"] + + for result in results: + result["last_updated"] = parse_docker_date(result["last_updated"]) + + results = list(sorted(results, key=lambda d: d["last_updated"])) + return results[-1] + + +def latest_tlcpackstaging_image(source: str) -> Optional[str]: + """ + Finds the latest full tag to use in the Jenkinsfile or returns None if no + update is needed + """ + name, current_tag = source.split(":") + user, repo = name.split("/") + logging.info( + f"Running with name: {name}, current_tag: {current_tag}, user: {user}, repo: {repo}" + ) + + staging_repo = repo.replace("-", "_") + latest_tlcpackstaging_tag = latest_tag(user="tlcpackstaging", repo=staging_repo) + logging.info(f"Found latest tlcpackstaging tag:\n{latest_tlcpackstaging_tag}") + + if latest_tlcpackstaging_tag["name"] == current_tag: + logging.info(f"tlcpackstaging tag is the same as the one in the Jenkinsfile") + + latest_tlcpack_tag = latest_tag(user="tlcpack", repo=repo) + logging.info(f"Found latest tlcpack tag:\n{latest_tlcpack_tag}") + + if latest_tlcpack_tag["name"] == latest_tlcpackstaging_tag["name"]: + logging.info("Tag names were the same, no update needed") + return None + + if latest_tlcpack_tag["last_updated"] > latest_tlcpackstaging_tag["last_updated"]: + new_spec = f"tlcpack/{repo}:{latest_tlcpack_tag['name']}" + else: + # Even if the image doesn't exist in tlcpack, it will fall back to tlcpackstaging + # so hardcode the username here + new_spec = f"tlcpack/{repo}:{latest_tlcpackstaging_tag['name']}" + logging.info("Using tlcpackstaging tag on tlcpack") + + logging.info(f"Found newer image, using: {new_spec}") + return new_spec + + +if __name__ == "__main__": + init_log() + help = "Open a PR to update the Docker images to use the latest available in tlcpackstaging" + parser = argparse.ArgumentParser(description=help) + parser.add_argument("--remote", default="origin", help="ssh remote to parse") + parser.add_argument("--dry-run", action="store_true", help="don't send PR to GitHub") + parser.add_argument("--testing-docker-data", help="JSON data to mock Docker Hub API response") + args = parser.parse_args() + + # Install test mock if necessary + if args.testing_docker_data is not None: + docker_api = _testing_docker_api(data=json.loads(args.testing_docker_data)) + + remote = git(["config", "--get", f"remote.{args.remote}.url"]) + user, repo = parse_remote(remote) + + # Read the existing images from the Jenkinsfile + logging.info(f"Reading {JENKINSFILE}") + with open(JENKINSFILE) as f: + content = f.readlines() + + # Build a new Jenkinsfile with the latest images from tlcpack or tlcpackstaging + new_content = [] + for line in content: + m = re.match(r"^(ci_[a-zA-Z0-9]+) = \'(.*)\'", line.strip()) + if m is not None: + logging.info(f"Found match on line {line.strip()}") + groups = m.groups() + new_image = latest_tlcpackstaging_image(groups[1]) + if new_image is None: + logging.info(f"No new image found") + new_content.append(line) + else: + logging.info(f"Using new image {new_image}") + new_content.append(f"{groups[0]} = '{new_image}'\n") + else: + new_content.append(line) + + # Write out the new content + if args.dry_run: + logging.info(f"Dry run, would have written new content to {JENKINSFILE}") + else: + logging.info(f"Writing new content to {JENKINSFILE}") + with open(JENKINSFILE, "w") as f: + f.write("".join(new_content)) + + # Publish the PR + title = "[ci][docker] Nightly Docker image update" + body = "This bumps the Docker images to the latest versions from Docker Hub." + message = f"{title}\n\n\n{body}" + + if args.dry_run: + logging.info("Dry run, would have committed Jenkinsfile") + else: + logging.info(f"Creating git commit") + git(["checkout", "-B", BRANCH]) + git(["add", str(JENKINSFILE.relative_to(REPO_ROOT))]) + git(["config", "user.name", "tvm-bot"]) + git(["config", "user.email", "95660001+tvm-bot@users.noreply.github.com"]) + git(["commit", "-m", message]) + git(["push", "--set-upstream", args.remote, BRANCH, "--force"]) + + logging.info(f"Sending PR to GitHub") + github = GitHubRepo(user=user, repo=repo, token=GITHUB_TOKEN) + data = { + "title": title, + "body": body, + "head": BRANCH, + "base": "main", + "maintainer_can_modify": True, + } + url = "pulls" + if args.dry_run: + logging.info(f"Dry run, would have sent {data} to {url}") + else: + try: + github.post(url, data=data) + except error.HTTPError as e: + # Ignore the exception if the PR already exists (which gives a 422). The + # existing PR will have been updated in place + if e.code == 422: + logging.info("PR already exists, ignoring error") + logging.exception(e) + else: + raise e