-
Notifications
You must be signed in to change notification settings - Fork 27
/
Copy pathlist-branch-pr
executable file
·454 lines (396 loc) · 17.8 KB
/
list-branch-pr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
#!/usr/bin/env python3
"""Get a list of pull requests that need building.
Requires a GITHUB_TOKEN in the environment.
This script's output is one commit to be built per line, with the following
fields separated by tab characters:
1. The pull request number where progress should be reported.
2. The commit hash to be checked out and built.
3. The type of build for this PR (i.e. whether it has been built before).
4. The name of an *.env file, without the .env suffix, of the repository that
this commit belongs to.
"""
import functools
import glob
import hashlib
import os
import os.path
import random
import sys
from argparse import ArgumentParser, Namespace
from collections import defaultdict
from datetime import datetime, timezone
from gql import Client, gql
from gql.transport.requests import RequestsHTTPTransport
from alibot_helpers.utilities import parse_env_file
from alibot_helpers.github_utilities import \
github_token, GithubCachedClient, setGithubStatus
DEFAULTENV_NAME = "DEFAULTS.env"
@functools.lru_cache(maxsize=None)
def query_repo_info(session, org, repo, base_branch, include_base_branch=False):
"""Query the given repo to get pull request statuses."""
return session.execute(QUERY, {
"repoOwner": org,
"repoName": repo,
"baseBranch": base_branch,
"includeBaseBranch": include_base_branch,
}, "statuses")["repository"]
@functools.lru_cache(maxsize=None)
def query_team_members(session, org, team_slug):
"""Fetch the logins of the members of the given team."""
return frozenset(member["login"] for member in session.execute(QUERY, {
"repoOwner": org,
"teamSlug": team_slug,
}, "team")["organization"]["team"]["members"]["nodes"])
class RepoConfig:
"""Encapsulates a build configuration of the CI worker."""
def __init__(self, build_config, definitions_dir,
mesos_role, container_name, config_suffix,
worker_index, worker_pool_size):
self.build_config = build_config
self.check_name = ""
self.repo_name = ""
self.branch_ref = "master"
self.trusted_users = frozenset()
self.trusted_team_slug = None
self.trusted_author_associations = {"OWNER", "MEMBER", "COLLABORATOR"}
self.worker_index = worker_index
self.worker_pool_size = worker_pool_size
# Parse .env files for this build config to initialise above variables.
mesos, docker = mesos_role, container_name + config_suffix
env_file_path = build_config + ".env"
self._parse_env_files(
os.path.join(definitions_dir, DEFAULTENV_NAME),
os.path.join(definitions_dir, mesos, DEFAULTENV_NAME),
os.path.join(definitions_dir, mesos, docker, DEFAULTENV_NAME),
os.path.join(definitions_dir, mesos, docker, env_file_path),
)
if not self.check_name or not self.repo_name:
raise ValueError("CHECK_NAME and PR_REPO are required")
print("repo", self.build_config,
"CHECK_NAME=%s REPO_NAME=%s BRANCH_REF=%s" % (
self.check_name, self.repo_name, self.branch_ref),
sep=": ", file=sys.stderr)
def _parse_env_files(self, *env_file_paths):
"""Apply settings from env_file_paths to this class."""
# This variable might be overridden, so only modify trusted_author_*
# once we've loaded the whole chain of .env files.
trust_collaborators = False
# Some variables are used elsewhere in the CI builder, so they
# are defined as shell variables. Handle these here (including
# multiple assignments on one line). Note that non-assignments
# containing "=" (e.g. command arguments), variables valid only
# for one command, and the like confuse this simple approach.
for env_file_path in env_file_paths:
if not os.path.exists(env_file_path):
continue
for var, value in parse_env_file(env_file_path):
if var == "PR_REPO":
self.repo_name = value
elif var == "PR_BRANCH":
self.branch_ref = value
elif var == "CHECK_NAME":
self.check_name = value
elif var == "TRUST_COLLABORATORS":
# Shell-style boolean: value is True if non-empty.
trust_collaborators = bool(value)
elif var == "TRUSTED_USERS":
self.trusted_users = frozenset(value.split(","))
elif var == "TRUSTED_TEAM":
self.trusted_team_slug = value or None
if trust_collaborators:
self.trusted_author_associations.add("CONTRIBUTOR")
def should_process(self, commit_sha):
"""Decide whether this worker should handle the given PR.
This is determined by the commit hash of the PR's HEAD.
"""
sha = hashlib.new("sha256")
sha.update(self.build_config.encode("utf-8"))
sha.update(commit_sha.encode("utf-8"))
intended_worker = int(sha.hexdigest(), 16) % self.worker_pool_size
return intended_worker == self.worker_index
def trust_pr(self, cgh, session, pull):
"""Determine whether the PR is trustworthy and can be built.
If we specified a list of trusted users, teams or if we trust
contributors, we need to check if this is the case for the given PR.
Some of these options (notably trusting a team) will actually consume
API calls, so you need to be careful about what you enable.
If the PR is not trusted, an appropriate status is set on its latest
commit.
"""
trusted = bool(
# If this PR has any approving reviews, trust it.
# We only include approving reviews in pull["reviews"].
pull["reviews"]["isApproved"] or
# Trust org members" and repo owners" PRs.
# If requested, trust previous contributors" PRs.
pull["authorAssociation"] in self.trusted_author_associations or
# Trust trusted users.
pull["author"]["login"] in self.trusted_users or
# If we trust a team and the user is a member, trust this PR.
self.trusted_team_slug and
pull["author"]["login"] in query_team_members(
session, self.repo_name.split("/")[0], self.trusted_team_slug
)
)
if not trusted:
setGithubStatus(cgh, Namespace(
commit="{repo}#{pr}@{commit}".format(
repo=self.repo_name,
pr=pull["number"],
commit=pull["commits"]["nodes"][0]["commit"]["oid"],
),
status="{}/pending".format(self.check_name),
message="Security: approval needed, not starting",
url=None,
), debug_print=False)
return trusted
def process_single_pr(self, pull_number, last_commit, pr_created,
is_draft=False, is_trusted=False):
"""Decide whether to queue this PR (or branch) for testing.
If it should be queued, return an object describing the PR, else,
return None.
"""
commit_hash = last_commit["oid"]
reviewed = tested = success = False
if last_commit["status"] is not None:
for ctx in last_commit["status"]["contexts"]:
context, cstate = ctx["context"], ctx["state"]
if context == "review" and cstate == "SUCCESS":
reviewed = True
if context == self.check_name and \
cstate in ("SUCCESS", "ERROR", "FAILURE"):
tested = True
success = cstate == "SUCCESS"
if self.should_process(commit_hash) and not is_draft and (reviewed or is_trusted):
state = "untested" if not tested else \
("succeeded" if success else "failed")
else:
state = "skip"
# The PR has been waiting for checks either since it was created or
# since its latest commit was pushed. commit["pushedDate"] was
# unfortunately deprecated and removed by GitHub, so committedDate
# will have to do. Eliminate a common source for errors by falling
# back to the PR creation time, in case the commit was created well
# before the PR.
waiting_since = max(pr_created, last_commit["committedDate"])
print(f"pr: {pull_number:6d}@{commit_hash:.7s}: wait={waiting_since} "
f"revd={reviewed:d} trust={is_trusted:d} tested={tested:d} "
f"success={success:d} => state={state}", file=sys.stderr)
return None if state == "skip" else (state, {
"number": pull_number,
"sha": commit_hash,
"build_config": self.build_config,
"waiting_since": waiting_since,
})
def process_pulls(self, cgh, session, repo_info, show_base_branch=False):
"""Return pull requests we can process, with relevant attributes."""
for pull in repo_info["pullRequests"]["nodes"]:
item = self.process_single_pr(
pull["number"], pull["commits"]["nodes"][0]["commit"],
pull["createdAt"],
is_draft=pull["isDraft"] or pull["title"].startswith("[WIP]"),
is_trusted=self.trust_pr(cgh, session, pull),
)
if item is not None:
yield item
if show_base_branch:
item = self.process_single_pr(self.branch_ref, repo_info["object"],
pull["createdAt"])
if item is not None:
yield item
def __repr__(self):
return "RepoConfig(%s)" % self.build_config
def main(args):
"""Script entry point."""
grouped = defaultdict(list)
# Find .env files for this worker, parse them and find PRs to process for
# each build config.
env_files = glob.glob(os.path.join(
args.definitions_dir, args.mesos_role,
args.container_name + args.config_suffix, "*.env",
))
transport = RequestsHTTPTransport(url="https://api.github.com/graphql",
auth=("bearer", github_token()))
with GithubCachedClient() as cgh:
with Client(transport=transport) as session:
for env_file in env_files:
env_file_name = os.path.basename(env_file)
if env_file_name == DEFAULTENV_NAME:
continue
try:
repo = RepoConfig(env_file_name[:-4],
args.definitions_dir, args.mesos_role,
args.container_name, args.config_suffix,
args.worker_index, args.worker_pool_size)
except ValueError as err:
print(env_file_name, err, sep=": ", file=sys.stderr)
else:
# Extend PR groups with PRs from this repo.
org, _, repo_name = repo.repo_name.partition("/")
repo_info = query_repo_info(session, org, repo_name,
repo.branch_ref,
args.show_base_branch)
for state, item in repo.process_pulls(cgh, session, repo_info,
args.show_base_branch):
grouped[state].append(item)
def print_prs(group, number=None):
"""Print N randomly chosen PRs from group on stdout."""
prs = grouped[group]
if number is not None and number <= len(prs):
prs = random.sample(prs, number)
# Sort by PR number so that older PRs are built first. Don't use
# .sort() here as it modifies the list in-place, and that list might
# also be used elsewhere.
# pr["waiting_since"] can apparently be None sometimes, which breaks
# sorting, so default to the empty string if so.
for pull in sorted(prs, key=lambda pr: pr["waiting_since"] or ""):
if group == "untested":
commit_timestr = pull["waiting_since"]
commit_time = datetime.fromisoformat(commit_timestr.replace("Z", "+00:00")) \
if commit_timestr else datetime.now(timezone.utc)
waiting_since = str(int(commit_time.timestamp()))
else:
# If this PR has been built before, the commit time is a bit
# meaningless -- the PR hasn't actually been waiting since
# then for this check.
waiting_since = ""
print(group, pull["number"], pull["sha"], pull["build_config"],
waiting_since, sep="\t")
if grouped["untested"]:
# If there are untested PRs waiting, build all of them first.
print_prs("untested")
elif grouped["failed"] and (not grouped["succeeded"] or random.random() < 0.7):
# Rebuild a failed PR, but sometimes fall through and rebuild a
# successful one instead (if there are any). This is so a single red PR
# can't stop all green PRs from being rebuilt occasionally.
print_prs("failed", 1)
elif grouped["succeeded"]:
print_prs("succeeded", 1)
else:
print("nothing to test:", grouped, file=sys.stderr)
def parse_args():
"""Parse command-line arguments."""
parser = ArgumentParser(description=__doc__, epilog="""\
Some options are required only when the corresponding environment variable
is not set. In case both are given, the command-line option overrides the
environment variable.""")
def add_env_arg(short_name, long_name, env_var, vtype=str, **kwargs):
"""Add an argument that falls back to an environment variable."""
if "help" in kwargs:
kwargs["help"] += (
" (required if %(var)s is empty; default %(var)s=%(value)s)"
% {"var": env_var, "value": os.environ.get(env_var, "")})
# Ignore empty values from the environment!
if os.environ.get(env_var):
env_value = os.environ[env_var]
try:
typed_env_value = vtype(env_value)
except ValueError:
# Fall through if the value is of the incorrect type. If we
# raised an error here, command-line options couldn't override
# invalid values from the environment. Instead, falling through
# makes the cmd-line option required, which is what we want.
pass
else:
parser.add_argument(
short_name, long_name, required=False, type=vtype,
default=typed_env_value, **kwargs)
return
parser.add_argument(
short_name, long_name, required=True, type=vtype, **kwargs)
parser.add_argument(
"--definitions-dir", metavar="DIR",
default=os.path.join("ali-bot", "ci", "repo-config"),
help=("directory where .env files are located in a hierarchy; expects "
"a directory structure of the form DIR/ROLE/CONTAINER/*.env "
"(default %(default)s)"))
parser.add_argument(
"-b", "--show-base-branch", action="store_true",
help=("Also consider checks on the latest commit of each repo's base "
"branch."))
add_env_arg("-i", "--worker-index", "WORKER_INDEX", vtype=int,
help="Index for the current worker")
add_env_arg("-n", "--worker-pool-size", "WORKERS_POOL_SIZE", vtype=int,
help="Total number of workers")
add_env_arg("-r", "--mesos-role", "MESOS_ROLE",
help="Mesos role of the current worker")
add_env_arg("-c", "--container-name", "CUR_CONTAINER",
help="Short name of the container we're running in, e.g. slc8")
parser.add_argument(
"-s", "--config-suffix", metavar="SUFFIX",
default=os.environ.get("ALIBOT_CONFIG_SUFFIX", ""),
help=("Suffix to disambiguate which .env files should be chosen for the"
" current Mesos role and container (default ALIBOT_CONFIG_SUFFIX="
"%(default)s or empty if undefined). If %(metavar)s starts with a"
" dash, use -s=%(metavar)s instead of -s %(metavar)s."))
return parser.parse_args()
QUERY = gql("""\
query statuses(
$repoOwner: String!
$repoName: String!
$baseBranch: String!
$includeBaseBranch: Boolean!
) {
repository(owner: $repoOwner, name: $repoName) {
# Fetch status for the latest commit on the base branch too, if requested.
object(expression: $baseBranch) @include(if: $includeBaseBranch) {
...commitInfo
}
pullRequests(
last: 75
baseRefName: $baseBranch
states: OPEN
orderBy: { field: UPDATED_AT, direction: DESC }
) {
nodes {
number
title
isDraft
createdAt
authorAssociation
author {
login
}
# APPROVED reviews do not include dismissed approvals, so no need to
# check the commit ID matches.
# We only need to know whether this PR has been approved at all (the
# only values that matter for totalCount are 0 and >0), so limit our
# query to the latest APPROVED review, if any.
reviews(last: 1, states: APPROVED) {
isApproved: totalCount
}
commits(last: 1) {
nodes {
commit {
...commitInfo
}
}
}
}
}
}
}
fragment commitInfo on Commit {
oid
committedDate
status {
contexts {
context
state
}
}
}
query team($repoOwner: String!, $teamSlug: String!) {
organization(login: $repoOwner) {
team(slug: $teamSlug) {
members {
nodes {
login
}
}
}
}
}
""")
if __name__ == "__main__":
main(parse_args())