Skip to content

Commit d3c6b56

Browse files
authored
Merge pull request #8431 from jsquyres/pr/cherry-pick-checker
First cut at Git commit checks as Github Actions
2 parents 418081e + f54b614 commit d3c6b56

File tree

4 files changed

+379
-0
lines changed

4 files changed

+379
-0
lines changed

.github/workflows/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Be aware that changes to the contents of these files will affect the
2+
Pull Request in which you make the changes!
3+
4+
For example, if you create a PR that changes one of the Github Actions
5+
in this directory, it will be used in the CI *for that PR*.
6+
7+
You have been warned. :smile:
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"cherry pick required" : 0
3+
}
Lines changed: 336 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,336 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
5+
Sanity tests on git commits in a Github Pull Request.
6+
7+
This script is designed to run as a Github Action. It assumes environment
8+
variables that are available in the Github Action environment. Specifically:
9+
10+
* GITHUB_WORKSPACE: directory where the git clone is located
11+
* GITHUB_SHA: the git commit SHA of the artificial Github PR test merge commit
12+
* GITHUB_BASE_REF: the git ref for the base branch
13+
* GITHUB_TOKEN: token authorizing Github API usage
14+
* GITHUB_REPOSITORY: "org/repo" name of the Github repository of this PR
15+
* GITHUB_REF: string that includes this Github PR number
16+
17+
This script tests each git commit between (and not including) GITHUB_SHA and
18+
GITHUB_BASE_REF multiple ways:
19+
20+
1. Ensure that the committer and author do not match any bad patterns (e.g.,
21+
"root@", "localhost", etc.).
22+
23+
2. Ensure that a proper "Signed-off-by" line exists in the commit message.
24+
- Merge commits and reverts are exempted from this check.
25+
26+
3. If required (by the git-commit-checks.json config file), ensure that a
27+
"(cherry picked from commit ...)" line exists in the commit message.
28+
- Commits that are solely comprised of submodule updates are exempted from
29+
this check.
30+
- This check can also be disabled by adding "bot:notacherrypick" in the
31+
Pull Request description.
32+
33+
4. If a "(cherry picked from commit ...)" message exists, ensure that the commit
34+
hash it mentions exists in the git repository.
35+
36+
If all checks pass, the script exits with status 0. Otherwise, it exits with
37+
status 1.
38+
39+
"""
40+
41+
import os
42+
import re
43+
import git
44+
import json
45+
import copy
46+
import argparse
47+
48+
from github import Github
49+
50+
GOOD = "good"
51+
BAD = "bad"
52+
53+
GITHUB_WORKSPACE = os.environ.get('GITHUB_WORKSPACE')
54+
GITHUB_SHA = os.environ.get('GITHUB_SHA')
55+
GITHUB_BASE_REF = os.environ.get('GITHUB_BASE_REF')
56+
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
57+
GITHUB_REPOSITORY = os.environ.get('GITHUB_REPOSITORY')
58+
GITHUB_REF = os.environ.get('GITHUB_REF')
59+
60+
# Sanity check
61+
if (GITHUB_WORKSPACE is None or
62+
GITHUB_SHA is None or
63+
GITHUB_BASE_REF is None or
64+
GITHUB_TOKEN is None or
65+
GITHUB_REPOSITORY is None or
66+
GITHUB_REF is None):
67+
print("Error: this script is designed to run as a Github Action")
68+
exit(1)
69+
70+
#----------------------------------------------------------------------------
71+
72+
"""
73+
Simple helper to make a 1-line git commit message summary.
74+
"""
75+
def make_commit_message(repo, hash):
76+
commit = repo.commit(hash)
77+
lines = commit.message.split('\n')
78+
message = lines[0][:50]
79+
if len(lines[0]) > 50:
80+
message += "..."
81+
82+
return message
83+
84+
#----------------------------------------------------------------------------
85+
86+
"""
87+
The results dictionary is in the following format:
88+
89+
results[GOOD or BAD][commit hash][check name] = message
90+
91+
If the message is None, there's nothing to print.
92+
93+
A git commit hash will be in either the GOOD or the BAD results -- not both.
94+
"""
95+
def print_results(results, repo, hashes):
96+
def _print_list(entries, prefix=""):
97+
for hash, entry in entries.items():
98+
print(f"{prefix}* {hash[:8]}: {make_commit_message(repo, hash)}")
99+
for check_name, message in entry.items():
100+
if message is not None:
101+
print(f"{prefix} * {check_name}: {message}")
102+
103+
# First, print all the commits that have only-good results
104+
if len(results[GOOD]) > 0:
105+
print("\nThe following commits passed all tests:\n")
106+
_print_list(results[GOOD])
107+
108+
# Now print all the results that are bad
109+
if len(results[BAD]) > 0:
110+
# The "::error ::" token will cause Github to highlight these
111+
# lines as errors
112+
print(f"\n::error ::The following commits caused this test to fail\n")
113+
_print_list(results[BAD], "::error ::")
114+
115+
#----------------------------------------------------------------------------
116+
117+
"""
118+
Global regexp, because we use it every time we call
119+
check_signed_off() (i.e., for each commit in this PR)
120+
"""
121+
prog_sob = re.compile(r'Signed-off-by: (.+) <(.+)>')
122+
123+
def check_signed_off(config, repo, commit):
124+
# If the message starts with "Revert" or if the commit is a
125+
# merge, don't require a signed-off-by
126+
if commit.message.startswith("Revert "):
127+
return GOOD, "skipped (revert)"
128+
elif len(commit.parents) == 2:
129+
return GOOD, "skipped (merge)"
130+
131+
matches = prog_sob.search(commit.message)
132+
if not matches:
133+
return BAD, "does not contain a valid Signed-off-by line"
134+
135+
return GOOD, None
136+
137+
#----------------------------------------------------------------------------
138+
139+
def check_email(config, repo, commit):
140+
emails = {
141+
"author" : commit.author.email.lower(),
142+
"committer" : commit.committer.email.lower(),
143+
}
144+
145+
for id, email in emails.items():
146+
for pattern in config['bad emails']:
147+
match = re.search(pattern, email)
148+
if match:
149+
return BAD, f"{id} email address ({email}) contains '{pattern}'"
150+
151+
return GOOD, None
152+
153+
#----------------------------------------------------------------------------
154+
155+
"""
156+
Global regexp, because we use it every time we call check_cherry_pick()
157+
(i.e., for each commit in this PR)
158+
"""
159+
prog_cp = re.compile(r'\(cherry picked from commit ([a-z0-9]+)\)')
160+
161+
def check_cherry_pick(config, repo, commit):
162+
def _is_entriely_submodule_updates(repo, commit):
163+
# If it's a merge commit, that doesn't fit our definition of
164+
# "entirely submodule updates"
165+
if len(commit.parents) == 2:
166+
return False
167+
168+
# Check the diffs of this commit compared to the prior commit,
169+
# and see if all the changes are updates to submodules.
170+
submodule_paths = [ x.path for x in repo.submodules ]
171+
diffs = repo.commit(f"{commit}~1").tree.diff(commit)
172+
for diff in diffs:
173+
if diff.a_path not in submodule_paths:
174+
# If we get here, we found a diff that was not exclusively
175+
# a submodule update.
176+
return False
177+
178+
# If we get here, then all the diffs were submodule updates.
179+
return True
180+
181+
# If this commit is solely comprised of submodule updates, don't
182+
# require a cherry pick message.
183+
if len(repo.submodules) > 0 and _is_entirely_submodule_updates(repo, commit):
184+
return GOOD, "skipped (submodules updates)"
185+
186+
non_existent = dict()
187+
found_cherry_pick_line = False
188+
for match in prog_cp.findall(commit.message):
189+
found_cherry_pick_line = True
190+
try:
191+
c = repo.commit(match)
192+
except ValueError as e:
193+
# These errors mean that the git library recognized the
194+
# hash as a valid commit, but the GitHub Action didn't
195+
# fetch the entire repo, so we don't have all the meta
196+
# data about this commit. Bottom line: it's a good hash.
197+
# So -- no error here.
198+
pass
199+
except git.BadName as e:
200+
# Use a dictionary to track the non-existent hashes, just
201+
# on the off chance that the same non-existent hash exists
202+
# more than once in a single commit message (i.e., the
203+
# dictionary will effectively give us de-duplication for
204+
# free).
205+
non_existent[match] = True
206+
207+
# Process the results for this commit
208+
if found_cherry_pick_line:
209+
if len(non_existent) == 0:
210+
return GOOD, None
211+
else:
212+
str = f"contains a cherry pick message that refers to non-existent commit"
213+
if len(non_existent) > 1:
214+
str += "s"
215+
str += ": "
216+
str += ", ".join(non_existent)
217+
return BAD, str
218+
219+
else:
220+
if config['cherry pick required']:
221+
return BAD, "does not include a cherry pick message"
222+
else:
223+
return GOOD, None
224+
225+
#----------------------------------------------------------------------------
226+
227+
def check_all_commits(config, repo):
228+
# Get a list of commits that we'll be examining. Use the progromatic form
229+
# of "git log GITHUB_BASE_REF..GITHUB_SHA" (i.e., "git log ^GITHUB_BASE_REF
230+
# GITHUB_SHA") to do the heavy lifting to find that set of commits.
231+
git_cli = git.cmd.Git(GITHUB_WORKSPACE)
232+
hashes = git_cli.log(f"--pretty=format:%h", f"origin/{GITHUB_BASE_REF}..{GITHUB_SHA}").splitlines()
233+
234+
# The first entry in the list will be the artificial Github merge commit for
235+
# this PR. We don't want to examine this commit.
236+
del hashes[0]
237+
238+
#------------------------------------------------------------------------
239+
240+
# Make an empty set of nested dictionaries to fill in, below. We initially
241+
# create a "full" template dictionary (with all the hashes for both GOOD and
242+
# BAD results), but will trim some of them later.
243+
template = { hash : dict() for hash in hashes }
244+
results = {
245+
GOOD : copy.deepcopy(template),
246+
BAD : copy.deepcopy(template),
247+
}
248+
249+
for hash in hashes:
250+
overall = GOOD
251+
252+
# Do the checks on this commit
253+
commit = repo.commit(hash)
254+
for check_fn in [check_signed_off, check_email, check_cherry_pick]:
255+
result, message = check_fn(config, repo, commit)
256+
overall = BAD if result == BAD else overall
257+
258+
results[result][hash][check_fn.__name__] = message
259+
260+
# Trim the results dictionary so that a hash only appears in GOOD *or*
261+
# BAD -- not both. Specifically:
262+
#
263+
# 1. If a hash has BAD results, delete all of its results from GOOD.
264+
# 2. If a hash has only GOOD results, delete its empty entry from BAD.
265+
if overall == BAD:
266+
del results[GOOD][hash]
267+
else:
268+
del results[BAD][hash]
269+
270+
return results, hashes
271+
272+
#----------------------------------------------------------------------------
273+
274+
"""
275+
If "bot:notacherrypick" is in the PR description, then disable the
276+
cherry-pick message requirement.
277+
"""
278+
def check_github_pr_description(config):
279+
g = Github(GITHUB_TOKEN)
280+
repo = g.get_repo(GITHUB_REPOSITORY)
281+
282+
# Extract the PR number from GITHUB_REF
283+
match = re.search("/(\d+)/", GITHUB_REF)
284+
pr_num = int(match.group(1))
285+
pr = repo.get_pull(pr_num)
286+
287+
if "bot:notacherrypick" in pr.body:
288+
config['cherry pick required'] = False
289+
290+
#----------------------------------------------------------------------------
291+
292+
def load_config():
293+
# Defaults
294+
config = {
295+
'cherry pick required' : False,
296+
'permit empty' : False,
297+
'bad emails' : [
298+
'^root@',
299+
'localhost',
300+
'localdomain',
301+
],
302+
}
303+
304+
# If the config file exists, read it in and replace default values
305+
# with the values from the file.
306+
filename = os.path.join(GITHUB_WORKSPACE, '.github',
307+
'workflows', 'git-commit-checks.json')
308+
if os.path.exists(filename):
309+
with open(filename) as fp:
310+
new_config = json.load(fp)
311+
for key in new_config:
312+
config[key] = new_config[key]
313+
314+
return config
315+
316+
#----------------------------------------------------------------------------
317+
318+
def main():
319+
config = load_config()
320+
check_github_pr_description(config)
321+
322+
repo = git.Repo(GITHUB_WORKSPACE)
323+
results, hashes = check_all_commits(config, repo)
324+
print_results(results, repo, hashes)
325+
326+
if len(results[BAD]) == 0:
327+
print("\nTest passed: everything was good!")
328+
exit(0)
329+
else:
330+
print("\nTest failed: sad panda")
331+
exit(1)
332+
333+
#----------------------------------------------------------------------------
334+
335+
if __name__ == "__main__":
336+
main()
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: Git commit checker
2+
3+
on:
4+
pull_request:
5+
# We don't need this to be run on all types of PR behavior
6+
# See https://docs.github.com/en/actions/reference/events-that-trigger-workflows#pull_request
7+
types:
8+
- opened
9+
- synchronize
10+
- edited
11+
12+
jobs:
13+
ci:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- name: Check out the code
17+
uses: actions/checkout@v2
18+
with:
19+
# Get all branches and history
20+
fetch-depth: 0
21+
22+
- name: Setup Python
23+
uses: actions/setup-python@v2
24+
with:
25+
python-version: '3.x'
26+
27+
- name: Get the GitPython and PyGithub modules
28+
run: pip install gitpython PyGithub
29+
30+
- name: Check all git commits
31+
run: $GITHUB_WORKSPACE/.github/workflows/git-commit-checks.py
32+
env:
33+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

0 commit comments

Comments
 (0)