Skip to content

Commit

Permalink
[Release] Cache known author translations locally
Browse files Browse the repository at this point in the history
This bypasses unnecessary calls to the Github and JIRA API.
Additionally, having a local cache allows us to remember names
that we had to manually discover ourselves.
  • Loading branch information
Andrew Or committed Dec 17, 2014
1 parent 6f80b74 commit b85044e
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 29 deletions.
18 changes: 9 additions & 9 deletions dev/create-release/generate-contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@

# You must set the following before use!
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
RELEASE_TAG = os.environ.get("START_COMMIT", "v1.2.0-rc2")
PREVIOUS_RELEASE_TAG = os.environ.get("END_COMMIT", "v1.1.0")
RELEASE_TAG = os.environ.get("RELEASE_TAG", "v1.2.0-rc2")
PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")

# If the release tags are not provided, prompt the user to provide them
while not tag_exists(RELEASE_TAG):
RELEASE_TAG = raw_input("Please provide a valid release tag: ")
while not tag_exists(PREVIOUS_RELEASE_TAG):
print "Please specify the previous release tag."
PREVIOUS_RELEASE_TAG = raw_input(\
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")
"For instance, if you are releasing v1.2.0, you should specify v1.1.0: ")

# Gather commits found in the new tag but not in the old tag.
# This filters commits based on both the git hash and the PR number.
Expand Down Expand Up @@ -84,9 +84,9 @@ def print_indented(_list):
filtered_commits = []
def is_release(commit_title):
return re.findall("\[release\]", commit_title.lower()) or\
"preparing spark release" in commit_title.lower() or\
"preparing development version" in commit_title.lower() or\
"CHANGES.txt" in commit_title
"preparing spark release" in commit_title.lower() or\
"preparing development version" in commit_title.lower() or\
"CHANGES.txt" in commit_title
def is_maintenance(commit_title):
return "maintenance" in commit_title.lower() or\
"manually close" in commit_title.lower()
Expand All @@ -96,7 +96,7 @@ def is_revert(commit_title):
return "revert" in commit_title.lower()
def is_docs(commit_title):
return re.findall("docs*", commit_title.lower()) or\
"programming guide" in commit_title.lower()
"programming guide" in commit_title.lower()
for c in new_commits:
t = c.get_title()
if not t: continue
Expand Down Expand Up @@ -182,7 +182,7 @@ def populate(issue_type, components):
jira_type = jira_issue.fields.issuetype.name
jira_type = translate_issue_type(jira_type, issue, warnings)
jira_components = [translate_component(c.name, _hash, warnings)\
for c in jira_issue.fields.components]
for c in jira_issue.fields.components]
all_components = set(jira_components + commit_components)
populate(jira_type, all_components)
# For docs without an associated JIRA, manually add it ourselves
Expand Down Expand Up @@ -213,7 +213,7 @@ def populate(issue_type, components):
# e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
else:
contributions = ["%s in %s" % (issue_type, nice_join(comps)) \
for issue_type, comps in author_info[author].items()]
for issue_type, comps in author_info[author].items()]
contribution = "; ".join(contributions)
# Do not use python's capitalize() on the whole string to preserve case
assert contribution
Expand Down
59 changes: 59 additions & 0 deletions dev/create-release/known_translations
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# This is a mapping of names to be translated through translate-contributors.py

This comment has been minimized.

Copy link
@nchammas

nchammas Dec 17, 2014

Contributor

@andrewor14 I believe you need to add a RAT exception for this new file. The build is currently broken. cc @pwendell

This comment has been minimized.

Copy link
@nchammas

nchammas Dec 17, 2014

Contributor
# The format expected on each line should be: <old name> - <new name>
CodingCat - Nan Zhu
CrazyJvm - Chao Chen
EugenCepoi - Eugen Cepoi
GraceH - Jie Huang
JerryLead - Lijie Xu
Leolh - Liu Hao
Lewuathe - Kai Sasaki
RongGu - Rong Gu
Shiti - Shiti Saxena
Victsm - Min Shen
WangTaoTheTonic - Wang Tao
XuTingjun - Tingjun Xu
YanTangZhai - Yantang Zhai
alexdebrie - Alex DeBrie
alokito - Alok Saldanha
anantasty - Anant Asthana
andrewor14 - Andrew Or
aniketbhatnagar - Aniket Bhatnagar
arahuja - Arun Ahuja
brkyvz - Burak Yavuz
chesterxgchen - Chester Chen
chiragaggarwal - Chirag Aggarwal
chouqin - Qiping Li
cocoatomo - Tomohiko K.
coderfi - Fairiz Azizi
coderxiang - Shuo Xiang
davies - Davies Liu
epahomov - Egor Pahomov
falaki - Hossein Falaki
freeman-lab - Jeremy Freeman
industrial-sloth - Jascha Swisher
jackylk - Jacky Li
jayunit100 - Jay Vyas
jerryshao - Saisai Shao
jkbradley - Joseph Bradley
lianhuiwang - Lianhui Wang
lirui-intel - Rui Li
luluorta - Lu Lu
luogankun - Gankun Luo
maji2014 - Derek Ma
mccheah - Matthew Cheah
mengxr - Xiangrui Meng
nartz - Nathan Artz
odedz - Oded Zimerman
ravipesala - Ravindra Pesala
roxchkplusony - Victor Tso
scwf - Wang Fei
shimingfei - Shiming Fei
surq - Surong Quan
suyanNone - Su Yan
tedyu - Ted Yu
tigerquoll - Dale Richardson
wangxiaojing - Xiaojing Wang
watermen - Yadong Qi
witgo - Guoqiang Li
xinyunh - Xinyun Huang
zsxwing - Shixiong Zhu
4 changes: 2 additions & 2 deletions dev/create-release/releaseutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ def yesOrNoPrompt(msg):

# Utility functions run git commands (written with Git 1.8.5)
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
def run_cmd_error(cmd): return Popen(cmd, stderr=PIPE).communicate()[1]
def run_cmd_error(cmd): return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1]
def get_date(commit_hash):
return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", commit_hash])
def tag_exists(tag):
stderr = run_cmd_error(["git", "checkout", tag])
stderr = run_cmd_error(["git", "show", tag])
return "error" not in stderr

# A type-safe representation of a commit
Expand Down
64 changes: 46 additions & 18 deletions dev/create-release/translate-contributors.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,19 @@
jira_client = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
github_client = Github(GITHUB_API_TOKEN)

# Load known author translations that are cached locally
known_translations = {}
known_translations_file_name = "known_translations"
known_translations_file = open(known_translations_file_name, "r")
for line in known_translations_file:
if line.startswith("#"): continue
[old_name, new_name] = line.split(" - ")
known_translations[old_name] = new_name
known_translations_file.close()

# Open again in case the user adds new mappings
known_translations_file = open(known_translations_file_name, "a")

# Generate candidates for the given author. This should only be called if the given author
# name does not represent a full name as this operation is somewhat expensive. Under the
# hood, it makes several calls to the Github and JIRA API servers to find the candidates.
Expand All @@ -83,17 +96,17 @@
def generate_candidates(author, issues):
candidates = []
# First check for full name of Github user
github_name = get_github_name(new_author, github_client)
github_name = get_github_name(author, github_client)
if github_name:
candidates.append((github_name, "Full name of Github user %s" % new_author))
candidates.append((github_name, "Full name of Github user %s" % author))
else:
candidates.append((NOT_FOUND, "No full name found for Github user %s" % new_author))
candidates.append((NOT_FOUND, "No full name found for Github user %s" % author))
# Then do the same for JIRA user
jira_name = get_jira_name(new_author, jira_client)
jira_name = get_jira_name(author, jira_client)
if jira_name:
candidates.append((jira_name, "Full name of JIRA user %s" % new_author))
candidates.append((jira_name, "Full name of JIRA user %s" % author))
else:
candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % new_author))
candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % author))
# Then do the same for the assignee of each of the associated JIRAs
# Note that a given issue may not have an assignee, or the assignee may not have a full name
for issue in issues:
Expand Down Expand Up @@ -135,15 +148,24 @@ def generate_candidates(author, issues):
print "\n========================== Translating contributor list =========================="
lines = contributors_file.readlines()
for i, line in enumerate(lines):
author = line.split(" - ")[0]
print "Processing author %s (%d/%d)" % (author, i + 1, len(lines))
if not author:
print " ERROR: Expected the following format <author> - <contributions>"
print " ERROR: Actual = %s" % line
if not is_valid_author(author):
new_author = author.split("/")[0]
issues = author.split("/")[1:]
candidates = generate_candidates(new_author, issues)
temp_author = line.split(" - ")[0]
print "Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines))
if not temp_author:
error_msg = " ERROR: Expected the following format <author> - <contributions>\n"
error_msg += " ERROR: Actual = %s" % line
print error_msg
warnings.append(error_msg)
new_contributors_file.write(line)
new_contributors_file.flush()
continue
author = temp_author.split("/")[0]
# Use the local copy of known translations where possible
if author in known_translations:
line = line.replace(temp_author, known_translations[author])
elif not is_valid_author(author):
new_author = author
issues = temp_author.split("/")[1:]
candidates = generate_candidates(author, issues)
# Print out potential replacement candidates along with the sources, e.g.
# [X] No full name found for Github user andrewor14
# [X] No assignee found for SPARK-1763
Expand All @@ -169,7 +191,7 @@ def generate_candidates(author, issues):
for p in good_prompts: print p
# In interactive mode, additionally provide "custom" option and await user response
if INTERACTIVE_MODE:
print " [%d] %s - Raw Github username" % (raw_index, new_author)
print " [%d] %s - Raw Github username" % (raw_index, author)
print " [%d] Custom" % custom_index
response = raw_input(" Your choice: ")
last_index = custom_index
Expand All @@ -191,9 +213,15 @@ def generate_candidates(author, issues):
if is_valid_author(new_author):
new_author = capitalize_author(new_author)
else:
warnings.append("Unable to find a valid name %s for author %s" % (new_author, author))
warnings.append("Unable to find a valid name %s for author %s" % (author, temp_author))
print " * Replacing %s with %s" % (author, new_author)
line = line.replace(author, new_author)
# If we are in interactive mode, prompt the user whether we want to remember this new mapping
if INTERACTIVE_MODE and\
author not in known_translations and\
yesOrNoPrompt(" Add mapping %s -> %s to known translations file?" % (author, new_author)):
known_translations_file.write("%s - %s\n" % (author, new_author))
known_translations_file.flush()
line = line.replace(temp_author, author)
new_contributors_file.write(line)
new_contributors_file.flush()
print "==================================================================================\n"
Expand Down

0 comments on commit b85044e

Please sign in to comment.