Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(CI): properly configure cancel duplicates #12625

Merged
merged 8 commits into from
Jan 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/actions/cancel-workflow-action
Submodule cancel-workflow-action deleted from ce1774
49 changes: 32 additions & 17 deletions .github/workflows/cancel_duplicates.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,40 @@
name: Cancel Duplicates
on:
workflow_run:
workflows: ["CI"]
types: ["requested"]
workflows:
- "Miscellaneous"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so once merge and will work only with Miscellaneous workflow then we'll add all workflows here right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only this workflow will trigger the cancel job, but a scheduled cancel job will try to cancel duplicates of all workflows.

types:
- requested

jobs:
cancel-duplicate-workflow-runs:
name: "Cancel duplicate workflow runs"
runs-on: ubuntu-latest
cancel-duplicate-runs:
name: Cancel duplicate workflow runs
runs-on: ubuntu-20.04
steps:
- name: Check number of queued tasks
id: check_queued
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPO: ${{ github.repository }}
run: |
get_count() {
echo $(curl -s -H "Authorization: token $GITHUB_TOKEN" \
"https://api.github.com/repos/$GITHUB_REPO/actions/runs?status=$1" | \
jq ".total_count")
}
count=$(( `get_count queued` + `get_count in_progress` ))
echo "Found $count unfinished jobs."
echo "::set-output name=count::$count"
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
if: steps.check_queued.outputs.count >= 20
uses: actions/checkout@v2
with:
persist-credentials: false
submodules: recursive
- uses: ./.github/actions/cancel-workflow-runs/
name: "Cancel duplicate workflow runs"
with:
cancelMode: duplicates
cancelFutureDuplicates: true
token: ${{ secrets.GITHUB_TOKEN }}
sourceRunId: ${{ github.event.workflow_run.id }}
notifyPRCancel: true
skipEventTypes: '["push", "pull_request", "pull_request_target"]'

- name: Cancel duplicate workflow runs
if: steps.check_queued.outputs.count >= 20
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_REPOSITORY: ${{ github.repository }}
run: |
pip install click requests typing_extensions python-dateutil
python ./scripts/cancel_github_workflows.py
3 changes: 0 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,6 @@
[submodule ".github/actions/file-changes-action"]
path = .github/actions/file-changes-action
url = https://github.com/trilom/file-changes-action
[submodule ".github/actions/cancel-workflow-action"]
path = .github/actions/cancel-workflow-action
url = https://github.com/styfle/cancel-workflow-action
[submodule ".github/actions/cached-dependencies"]
path = .github/actions/cached-dependencies
url = https://github.com/apache-superset/cached-dependencies
Expand Down
80 changes: 50 additions & 30 deletions scripts/cancel_github_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
export GITHUB_TOKEN=394ba3b48494ab8f930fbc93
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just wondering Is this a real github token here :) ?

Copy link
Member Author

@ktmud ktmud Jan 28, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. I just randomly typed one that look real.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A real token has 40 characters, this one has 24.

export GITHUB_REPOSITORY=apache/superset
# cancel previous jobs for a PR
# cancel previous jobs for a PR, will even cancel the running ones
./cancel_github_workflows.py 1042
# cancel previous jobs for a branch
./cancel_github_workflows.py my-branch
# cancel all jobs
# cancel all jobs of a PR, including the latest runs
./cancel_github_workflows.py 1024 --include-last
"""
import os
Expand Down Expand Up @@ -58,7 +58,21 @@ def request(method: Literal["GET", "POST", "DELETE", "PUT"], endpoint: str, **kw


def list_runs(repo: str, params=None):
return request("GET", f"/repos/{repo}/actions/runs", params=params)
"""List all github workflow runs.
Returns:
An iterator that will iterate through all pages of matching runs."""
page = 1
total_count = 10000
while page * 100 < total_count:
result = request(
"GET",
f"/repos/{repo}/actions/runs",
params={**params, "per_page": 100, "page": page},
)
total_count = result["total_count"]
for item in result["workflow_runs"]:
yield item
page += 1


def cancel_run(repo: str, run_id: Union[str, int]):
Expand All @@ -69,9 +83,9 @@ def get_pull_request(repo: str, pull_number: Union[str, int]):
return request("GET", f"/repos/{repo}/pulls/{pull_number}")


def get_runs_by_branch(
def get_runs(
repo: str,
branch: str,
branch: Optional[str] = None,
user: Optional[str] = None,
statuses: Iterable[str] = ("queued", "in_progress"),
events: Iterable[str] = ("pull_request", "push"),
Expand All @@ -81,15 +95,13 @@ def get_runs_by_branch(
item
for event in events
for status in statuses
for item in list_runs(
repo, {"event": event, "status": status, "per_page": 100}
)["workflow_runs"]
if item["head_branch"] == branch
for item in list_runs(repo, {"event": event, "status": status})
if (branch is None or (branch == item["head_branch"]))
and (user is None or (user == item["head_repository"]["owner"]["login"]))
]


def print_commit(commit):
def print_commit(commit, branch):
"""Print out commit message for verification"""
indented_message = " \n".join(commit["message"].split("\n"))
date_str = (
Expand All @@ -98,7 +110,7 @@ def print_commit(commit):
.strftime("%a, %d %b %Y %H:%M:%S")
)
print(
f"""HEAD {commit["id"]}
f"""HEAD {commit["id"]} ({branch})
Author: {commit["author"]["name"]} <{commit["author"]["email"]}>
Date: {date_str}
Expand Down Expand Up @@ -132,10 +144,10 @@ def print_commit(commit):
show_default=True,
help="Whether to also cancel running workflows.",
)
@click.argument("branch_or_pull")
@click.argument("branch_or_pull", required=False)
def cancel_github_workflows(
branch_or_pull: str,
repo,
branch_or_pull: Optional[str],
repo: str,
event: List[str],
include_last: bool,
include_running: bool,
Expand All @@ -145,24 +157,24 @@ def cancel_github_workflows(
raise ClickException("Please provide GITHUB_TOKEN as an env variable")

statuses = ("queued", "in_progress") if include_running else ("queued",)
events = event
pr = None

if branch_or_pull.isdigit():
if branch_or_pull is None:
title = "all jobs" if include_last else "all duplicate jobs"
elif branch_or_pull.isdigit():
pr = get_pull_request(repo, pull_number=branch_or_pull)
target_type = "pull request"
title = f"#{pr['number']} - {pr['title']}"
title = f"pull request #{pr['number']} - {pr['title']}"
else:
target_type = "branch"
title = branch_or_pull
title = f"branch [{branch_or_pull}]"

print(
f"\nCancel {'active' if include_running else 'previous'} "
f"workflow runs for {target_type}\n\n {title}\n"
f"workflow runs for {title}\n"
)

if pr:
# full branch name
runs = get_runs_by_branch(
runs = get_runs(
repo,
statuses=statuses,
events=event,
Expand All @@ -172,26 +184,33 @@ def cancel_github_workflows(
else:
user = None
branch = branch_or_pull
if ":" in branch:
if branch and ":" in branch:
[user, branch] = branch.split(":", 2)
runs = get_runs_by_branch(
repo, statuses=statuses, events=event, branch=branch_or_pull, user=user
runs = get_runs(
repo, branch=branch, user=user, statuses=statuses, events=events,
)

# sort old jobs to the front, so to cancel older jobs first
runs = sorted(runs, key=lambda x: x["created_at"])
if not runs:
if runs:
print(
f"Found {len(runs)} potential runs of\n"
f" status: {statuses}\n event: {events}\n"
)
else:
print(f"No {' or '.join(statuses)} workflow runs found.\n")
return

if not include_last:
# Only keep one item for each workflow
# Keep the latest run for each workflow and cancel all others
seen = set()
dups = []
for item in reversed(runs):
if item["workflow_id"] in seen:
key = f'{item["event"]}_{item["head_branch"]}_{item["workflow_id"]}'
if key in seen:
dups.append(item)
else:
seen.add(item["workflow_id"])
seen.add(key)
if not dups:
print(
"Only the latest runs are in queue. "
Expand All @@ -207,7 +226,8 @@ def cancel_github_workflows(
head_commit = entry["head_commit"]
if head_commit["id"] != last_sha:
last_sha = head_commit["id"]
print_commit(head_commit)
print("")
print_commit(head_commit, entry["head_branch"])
try:
print(f"[{entry['status']}] {entry['name']}", end="\r")
cancel_run(repo, entry["id"])
Expand Down