Skip to content

Commit a0761ec

Browse files
texasmichellepwendell
authored andcommitted
[SPARK-1684] [PROJECT INFRA] Merge script should standardize SPARK-XXX prefix
Cleans up the pull request title in the merge script to follow conventions outlined in the wiki under Contributing Code. https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-ContributingCode [MODULE] SPARK-XXXX: Description Author: texasmichelle <[email protected]> Closes #5149 from texasmichelle/master and squashes the following commits: 9b6b0a7 [texasmichelle] resolved variable scope issue 7d5fa20 [texasmichelle] only prompt if title has been modified 8c195bb [texasmichelle] removed erroneous line 4f1ed46 [texasmichelle] Deque removal, logic simplifications, & prompt user to pick a title (orig or modified) df73f6a [texasmichelle] reworked regex's to enforce brackets around JIRA ref 43b5aed [texasmichelle] Merge remote-tracking branch 'apache/master' 25229c6 [texasmichelle] Merge remote-tracking branch 'apache/master' aa20a6e [texasmichelle] Move code into main() and add doctest for new text parsing method 48520ba [texasmichelle] SPARK-1684: Corrected import statement 042099d [texasmichelle] SPARK-1684 Merge script should standardize SPARK-XXX prefix 8f4a7d1 [texasmichelle] SPARK-1684 Merge script should standardize SPARK-XXX prefix
1 parent 41ef78a commit a0761ec

File tree

1 file changed

+140
-59
lines changed

1 file changed

+140
-59
lines changed

dev/merge_spark_pr.py

Lines changed: 140 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,6 @@
5555
# Prefix added to temporary branches
5656
BRANCH_PREFIX = "PR_TOOL"
5757

58-
os.chdir(SPARK_HOME)
59-
6058

6159
def get_json(url):
6260
try:
@@ -85,10 +83,6 @@ def continue_maybe(prompt):
8583
if result.lower() != "y":
8684
fail("Okay, exiting")
8785

88-
89-
original_head = run_cmd("git rev-parse HEAD")[:8]
90-
91-
9286
def clean_up():
9387
print "Restoring head pointer to %s" % original_head
9488
run_cmd("git checkout %s" % original_head)
@@ -101,7 +95,7 @@ def clean_up():
10195

10296

10397
# merge the requested PR and return the merge hash
104-
def merge_pr(pr_num, target_ref):
98+
def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
10599
pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
106100
target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper())
107101
run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name))
@@ -274,7 +268,7 @@ def get_version_json(version_str):
274268
asf_jira.transition_issue(
275269
jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)
276270

277-
print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
271+
print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
278272

279273

280274
def resolve_jira_issues(title, merge_branches, comment):
@@ -286,68 +280,155 @@ def resolve_jira_issues(title, merge_branches, comment):
286280
resolve_jira_issue(merge_branches, comment, jira_id)
287281

288282

289-
branches = get_json("%s/branches" % GITHUB_API_BASE)
290-
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
291-
# Assumes branch names can be sorted lexicographically
292-
latest_branch = sorted(branch_names, reverse=True)[0]
293-
294-
pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
295-
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
296-
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
283+
def standardize_jira_ref(text):
284+
"""
285+
Standardize the [SPARK-XXXXX] [MODULE] prefix
286+
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
287+
288+
>>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
289+
'[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful'
290+
>>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
291+
'[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in pull requests'
292+
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
293+
'[SPARK-5954] [MLLIB] Top by key'
294+
>>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
295+
'[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
296+
>>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
297+
'[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
298+
>>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark")
299+
'[SPARK-1146] [WIP] Vagrant support for Spark'
300+
>>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
301+
'[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
302+
>>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
303+
'[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved words in DDL parser.'
304+
>>> standardize_jira_ref("Additional information for users building from source code")
305+
'Additional information for users building from source code'
306+
"""
307+
jira_refs = []
308+
components = []
309+
310+
# If the string is compliant, no need to process any further
311+
if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)):
312+
return text
313+
314+
# Extract JIRA ref(s):
315+
pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
316+
for ref in pattern.findall(text):
317+
# Add brackets, replace spaces with a dash, & convert to uppercase
318+
jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
319+
text = text.replace(ref, '')
320+
321+
# Extract spark component(s):
322+
# Look for alphanumeric chars, spaces, dashes, periods, and/or commas
323+
pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
324+
for component in pattern.findall(text):
325+
components.append(component.upper())
326+
text = text.replace(component, '')
327+
328+
# Cleanup any remaining symbols:
329+
pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
330+
if (pattern.search(text) is not None):
331+
text = pattern.search(text).groups()[0]
332+
333+
# Assemble full text (JIRA ref(s), module(s), remaining text)
334+
clean_text = ' '.join(jira_refs).strip() + " " + ' '.join(components).strip() + " " + text.strip()
335+
336+
# Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
337+
clean_text = re.sub(r'\s+', ' ', clean_text.strip())
338+
339+
return clean_text
340+
341+
def main():
342+
global original_head
343+
344+
os.chdir(SPARK_HOME)
345+
original_head = run_cmd("git rev-parse HEAD")[:8]
346+
347+
branches = get_json("%s/branches" % GITHUB_API_BASE)
348+
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
349+
# Assumes branch names can be sorted lexicographically
350+
latest_branch = sorted(branch_names, reverse=True)[0]
351+
352+
pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
353+
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
354+
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
355+
356+
url = pr["url"]
357+
358+
# Decide whether to use the modified title or not
359+
modified_title = standardize_jira_ref(pr["title"])
360+
if modified_title != pr["title"]:
361+
print "I've re-written the title as follows to match the standard format:"
362+
print "Original: %s" % pr["title"]
363+
print "Modified: %s" % modified_title
364+
result = raw_input("Would you like to use the modified title? (y/n): ")
365+
if result.lower() == "y":
366+
title = modified_title
367+
print "Using modified title:"
368+
else:
369+
title = pr["title"]
370+
print "Using original title:"
371+
print title
372+
else:
373+
title = pr["title"]
297374

298-
url = pr["url"]
299-
title = pr["title"]
300-
body = pr["body"]
301-
target_ref = pr["base"]["ref"]
302-
user_login = pr["user"]["login"]
303-
base_ref = pr["head"]["ref"]
304-
pr_repo_desc = "%s/%s" % (user_login, base_ref)
375+
body = pr["body"]
376+
target_ref = pr["base"]["ref"]
377+
user_login = pr["user"]["login"]
378+
base_ref = pr["head"]["ref"]
379+
pr_repo_desc = "%s/%s" % (user_login, base_ref)
305380

306-
# Merged pull requests don't appear as merged in the GitHub API;
307-
# Instead, they're closed by asfgit.
308-
merge_commits = \
309-
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
381+
# Merged pull requests don't appear as merged in the GitHub API;
382+
# Instead, they're closed by asfgit.
383+
merge_commits = \
384+
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
310385

311-
if merge_commits:
312-
merge_hash = merge_commits[0]["commit_id"]
313-
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
386+
if merge_commits:
387+
merge_hash = merge_commits[0]["commit_id"]
388+
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
314389

315-
print "Pull request %s has already been merged, assuming you want to backport" % pr_num
316-
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
390+
print "Pull request %s has already been merged, assuming you want to backport" % pr_num
391+
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
317392
"%s^{commit}" % merge_hash]).strip() != ""
318-
if not commit_is_downloaded:
319-
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
393+
if not commit_is_downloaded:
394+
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
320395

321-
print "Found commit %s:\n%s" % (merge_hash, message)
322-
cherry_pick(pr_num, merge_hash, latest_branch)
323-
sys.exit(0)
396+
print "Found commit %s:\n%s" % (merge_hash, message)
397+
cherry_pick(pr_num, merge_hash, latest_branch)
398+
sys.exit(0)
324399

325-
if not bool(pr["mergeable"]):
326-
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
327-
"Continue? (experts only!)"
328-
continue_maybe(msg)
400+
if not bool(pr["mergeable"]):
401+
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
402+
"Continue? (experts only!)"
403+
continue_maybe(msg)
329404

330-
print ("\n=== Pull Request #%s ===" % pr_num)
331-
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
332-
title, pr_repo_desc, target_ref, url))
333-
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
405+
print ("\n=== Pull Request #%s ===" % pr_num)
406+
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
407+
title, pr_repo_desc, target_ref, url))
408+
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
334409

335-
merged_refs = [target_ref]
410+
merged_refs = [target_ref]
336411

337-
merge_hash = merge_pr(pr_num, target_ref)
412+
merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc)
338413

339-
pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
340-
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
341-
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
414+
pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
415+
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
416+
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
342417

343-
if JIRA_IMPORTED:
344-
if JIRA_USERNAME and JIRA_PASSWORD:
345-
continue_maybe("Would you like to update an associated JIRA?")
346-
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
347-
resolve_jira_issues(title, merged_refs, jira_comment)
418+
if JIRA_IMPORTED:
419+
if JIRA_USERNAME and JIRA_PASSWORD:
420+
continue_maybe("Would you like to update an associated JIRA?")
421+
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
422+
resolve_jira_issues(title, merged_refs, jira_comment)
423+
else:
424+
print "JIRA_USERNAME and JIRA_PASSWORD not set"
425+
print "Exiting without trying to close the associated JIRA."
348426
else:
349-
print "JIRA_USERNAME and JIRA_PASSWORD not set"
427+
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
350428
print "Exiting without trying to close the associated JIRA."
351-
else:
352-
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
353-
print "Exiting without trying to close the associated JIRA."
429+
430+
if __name__ == "__main__":
431+
import doctest
432+
doctest.testmod()
433+
434+
main()

0 commit comments

Comments
 (0)