Skip to content

Commit

Permalink
Add from-job and from-pr option to breeze ci-image load (apache#45287)
Browse files Browse the repository at this point in the history
* add from-job and from-pr option to ci-image load

* add from-job and from-pr option to ci-image load
  • Loading branch information
gopidesupavan authored and got686-yandex committed Jan 30, 2025
1 parent d52f7f8 commit 4beb041
Show file tree
Hide file tree
Showing 8 changed files with 195 additions and 12 deletions.
2 changes: 1 addition & 1 deletion dev/breeze/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,6 @@ PLEASE DO NOT MODIFY THE HASH BELOW! IT IS AUTOMATICALLY UPDATED BY PRE-COMMIT.

---------------------------------------------------------------------------------------------------------

Package config hash: 6624da65e725e16cee3203afa884da62b719903470063a9d859bb4fae71346c9c4ee58d182b7f2a53f53eb10a72d62b4ae38d4e3d8a03cc293ae97aa62ec180e
Package config hash: 79fadb6850f8cd60994498d51df4f29046aab45e4bb15944afe8bbeacf76770e379d0462dced117e4dc911426dff136fef9b2d6a930957f829413e0ae2261cc9

---------------------------------------------------------------------------------------------------------
12 changes: 12 additions & 0 deletions dev/breeze/doc/06_managing_docker_images.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,18 @@ You will find the artifacts for each image in the summary of the CI run. The art
contain the ".tar" image that should be used with ``--image-file`` flag of the load method. Make sure to
use the same ``--python`` version as the image was built with.

To load the image from specific PR, you can use the following command:

.. code-block:: bash
breeze ci-image load --from-pr 12345 --python 3.9 --platform linux/amd64
To load the image from specific job run (for example 12538475388), you can use the following command, find the run id from github action runs.

.. code-block:: bash
breeze ci-image load --from-job 12538475388 --python 3.9 --platform linux/amd64
.. image:: ./images/image_artifacts.png
:target: https://raw.githubusercontent.com/apache/airflow/main/dev/breeze/images/output_ci-image_load.svg
:width: 100%
Expand Down
34 changes: 25 additions & 9 deletions dev/breeze/doc/images/output_ci-image_load.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion dev/breeze/doc/images/output_ci-image_load.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
7872a5c2fc3666d543aec5d4feaa5cc4
f0f9a36fe07d5cdaf1b704ac473bb155
1 change: 1 addition & 0 deletions dev/breeze/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ dependencies = [
"tabulate>=0.9.0",
"tomli>=2.0.1; python_version < '3.11'",
"twine>=4.0.2",
"tqdm>=4.67.1"
]

[project.scripts]
Expand Down
30 changes: 29 additions & 1 deletion dev/breeze/src/airflow_breeze/commands/ci_image_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@
prepare_docker_build_command,
warm_up_docker_builder,
)
from airflow_breeze.utils.github import download_artifact_from_pr, download_artifact_from_run_id
from airflow_breeze.utils.image import run_pull_image, run_pull_in_parallel
from airflow_breeze.utils.mark_image_as_refreshed import mark_image_as_refreshed
from airflow_breeze.utils.md5_build_check import md5sum_check_if_build_is_needed
Expand All @@ -116,6 +117,22 @@
if TYPE_CHECKING:
from airflow_breeze.params.shell_params import ShellParams

option_from_job = click.option(
"--from-job",
required=False,
default="",
envvar="FROM_JOB",
help="Optional run id of the github action job to load the image from.",
)

option_from_pr = click.option(
"--from-pr",
default="",
required=False,
envvar="FROM_PR",
help="Optional pr number of the github action job to load the image from. loads the image from the latest job.",
)


@click.group(
cls=BreezeGroup, name="ci-image", help="Tools that developers can use to manually manage CI images"
Expand Down Expand Up @@ -633,13 +650,17 @@ def save(
@option_skip_image_file_deletion
@option_verbose
@option_ci_image_file_to_load
@option_from_job
@option_from_pr
@option_dry_run
def load(
python: str,
platform: str,
github_repository: str,
image_file: Path | None,
skip_image_file_deletion: bool,
from_job: str | None,
from_pr: str | None,
):
"""Load CI image from a file."""
perform_environment_checks()
Expand All @@ -648,8 +669,15 @@ def load(
github_repository=github_repository,
)
escaped_platform = platform.replace("/", "_")
path = f"/tmp/ci-image-save-{escaped_platform}-{python}.tar"

if from_job:
download_artifact_from_run_id(from_job, path)
elif from_pr:
download_artifact_from_pr(from_pr, path)

if not image_file:
image_file = Path(f"/tmp/ci-image-save-{escaped_platform}-{python}.tar")
image_file = Path(path)
if not image_file.exists():
get_console().print(f"[error]The image {image_file} does not exist.[/]")
sys.exit(1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,8 @@
"--image-file",
"--github-repository",
"--skip-image-file-deletion",
"--from-job",
"--from-pr",
],
},
],
Expand Down
124 changes: 124 additions & 0 deletions dev/breeze/src/airflow_breeze/utils/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
# under the License.
from __future__ import annotations

import os
import re
import sys
import tempfile
import zipfile
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -177,3 +180,124 @@ def get_tag_date(tag: str) -> str | None:
tag_object.committed_date if hasattr(tag_object, "committed_date") else tag_object.tagged_date
)
return datetime.fromtimestamp(timestamp, tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")


def download_artifact_from_run_id(run_id: str, output_file: str):
"""
Downloads a file from GitHub Actions artifact
:param run_id: run_id of the workflow
:param output_file: Path where the file should be downloaded
"""
import requests
from tqdm import tqdm

url = f"https://api.github.com/repos/apache/airflow/actions/runs/{run_id}/artifacts"
headers = {"Accept": "application/vnd.github.v3+json"}

session = requests.Session()
if os.getenv("GITHUB_TOKEN"):
headers["Authorization"] = f"Bearer {os.getenv ('GITHUB_TOKEN')}"

artifact_response = requests.get(url, headers=headers)

if artifact_response.status_code != 200:
get_console().print(
"[error]Describing artifacts failed with status code %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
artifact_response.status_code,
)
sys.exit(1)

download_url = None
file_name = os.path.splitext(os.path.basename(output_file))[0]
for artifact in artifact_response.json()["artifacts"]:
if artifact["name"].startswith(file_name):
download_url = artifact["archive_download_url"]
break

if not download_url:
get_console().print(f"[error]No artifact found for {file_name}")
sys.exit(1)

get_console().print(f"[info]Downloading artifact from {download_url} to {output_file}")

response = session.get(download_url, stream=True, headers=headers)

if response.status_code != 200:
get_console().print(
"[error] Downloading artifact failed with status code %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
response.status_code,
)
sys.exit(1)

total_size = int(response.headers.get("content-length", 0))
temp_file = tempfile.NamedTemporaryFile().name + "/file.zip"
os.makedirs(os.path.dirname(temp_file), exist_ok=True)

with tqdm(total=total_size, unit="B", unit_scale=True, desc=temp_file, ascii=True) as progress_bar:
with open(temp_file, "wb") as f:
for chunk in response.iter_content(chunk_size=1 * 1024 * 1024):
if chunk:
f.write(chunk)
progress_bar.update(len(chunk))

with zipfile.ZipFile(temp_file, "r") as zip_ref:
zip_ref.extractall("/tmp/")

os.remove(temp_file)


def download_artifact_from_pr(pr: str, output_file: str):
import requests

pr_number = pr.lstrip("#")
pr_url = f"https://api.github.com/repos/apache/airflow/pulls/{pr_number}"
workflow_run_url = "https://api.github.com/repos/apache/airflow/actions/runs"

headers = {"Accept": "application/vnd.github.v3+json"}

session = requests.Session()
if os.getenv("GITHUB_TOKEN"):
headers["Authorization"] = f"Bearer {os.getenv('GITHUB_TOKEN')}"

pull_response = session.get(pr_url, headers=headers)

if pull_response.status_code != 200:
get_console().print(
"[error]Fetching PR failed with status code %s, %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
pull_response.status_code,
pull_response.content,
)
sys.exit(1)

ref = pull_response.json()["head"]["ref"]

workflow_runs = session.get(
workflow_run_url, headers=headers, params={"event": "pull_request", "branch": ref}
)

if workflow_runs.status_code != 200:
get_console().print(
"[error]Fetching workflow runs failed with status code %s, %s, "
"you might need to provide GITHUB_TOKEN, set it as environment variable",
workflow_runs.status_code,
workflow_runs.content,
)
sys.exit(1)

data = workflow_runs.json()["workflow_runs"]
sorted_data = sorted(data, key=lambda x: datetime.fromisoformat(x["created_at"]), reverse=True)
run_id = None
# Filter only workflow with ci.yml, we may get multiple workflows for a PR ex: codeql-analysis.yml, news-fragment.yml

for run in sorted_data:
if run.get("path").endswith("ci.yml"):
run_id = run["id"]
break

get_console().print(f"[info]Found run id {run_id} for PR {pr}")

download_artifact_from_run_id(str(run_id), output_file)

0 comments on commit 4beb041

Please sign in to comment.