blob: 21fafd79cc09650f21a54e712879ba23ff4e41ca [file] [log] [blame]
#!/usr/bin/env python
"""
A script that runs clang-format on changes detected via git. It will
report if running clang-format generated any changes.
In CI, the script considers it a failure if running clang-format makes a change.
In the pre-commit hook, the user is prompted to apply any clang-format changes.
Running tools/clang_format.py manually with no arguments should replicate the pre-commit hook behavior.
Only files that are in CLANG_FORMAT_WHITELIST are checked.
"""
import subprocess
import os
import argparse
import difflib
import re
# Whitelist of directories to check. All files that in that directory
# (recursively) will be checked.
CLANG_FORMAT_WHITELIST = ["torch/csrc/jit/", "test/cpp/jit/"]
CPP_FILE_REGEX = re.compile("^.*\\.(h|cpp|cc|c|hpp)$")
CPP_FILE_REGEX = re.compile(".*\\.(h|cpp|cc|c|hpp)$")
# @@ -start,count +start,count @@
CHUNK_PATTERN = r"^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@"
def parse_args():
parser = argparse.ArgumentParser(
description="Execute clang-format on your working copy changes."
)
parser.add_argument(
"-d",
"--diff",
default="HEAD",
help="Git revision to diff against to get changes",
)
parser.add_argument(
"--accept-changes",
action="store_true",
default=False,
help=(
"If true, apply whatever changes clang-format creates. "
"Otherwise, just print the changes and exit"
),
)
parser.add_argument(
"--check-all",
action="store_true",
default=False,
help="If true, check all whitelisted files instead of just working copy changes",
)
parser.add_argument("--verbose", "-v", action="store_true", default=False)
return parser.parse_args()
def get_whitelisted_files():
"""
Parse CLANG_FORMAT_WHITELIST and resolve all directories.
Returns the set of whitelist cpp source files.
"""
matches = []
for dir in CLANG_FORMAT_WHITELIST:
for root, dirnames, filenames in os.walk(dir):
for filename in filenames:
if CPP_FILE_REGEX.match(filename):
matches.append(os.path.join(root, filename))
return set(matches)
def get_changed_files(rev):
"""
Get all changed files between the working tree and `rev`
"""
changed_files = (
subprocess.check_output(
["git", "diff-index", "--diff-filter=AMU", "--name-only", rev]
)
.decode()
.split("\n")
)
return set(changed_files)
def get_changed_lines(filename, revision):
"""
Given a filename and revision diff, return all the changed lines noted in the diff
Returns a list of (start_line, end_line) tuples.
"""
command = ["git", "diff-index", "--unified=0", revision, filename]
output = subprocess.check_output(command).decode()
changed_lines = []
for chunk in re.finditer(CHUNK_PATTERN, output, re.MULTILINE):
start = int(chunk.group(1))
count = int(chunk.group(2) or 1)
changed_lines.append((start, start + count))
return changed_lines
def run_clang_format(filename, lines, in_place):
args = ["clang-format", filename]
line_args = ["-lines={}:{}".format(i[0], i[1]) for i in lines]
args.extend(line_args)
if in_place:
args.append("-i")
return subprocess.check_output(args).decode()
def get_clang_format_diff(filename, lines):
"""
Return a diff of the changes that running clang-format would make (or None).
"""
formatted_text = run_clang_format(filename, lines, in_place=False)
with open(filename) as orig:
orig_text = orig.read()
if formatted_text != orig_text:
orig_lines = orig_text.split("\n")
formatted_lines = formatted_text.split("\n")
return difflib.unified_diff(
orig_lines, formatted_lines, "original", "formatted"
)
def main():
args = parse_args()
whitelisted_files = get_whitelisted_files()
if args.check_all:
files_to_check = whitelisted_files
else:
changed_files = get_changed_files(args.diff)
files_to_check = changed_files & whitelisted_files
if args.verbose:
print("Running clang-format on whitelisted files: ")
for f in files_to_check:
print(f)
name_to_lines = {}
for f in files_to_check:
changed_lines = get_changed_lines(f, args.diff)
if len(changed_lines) != 0:
name_to_lines[f] = changed_lines
if len(name_to_lines) == 0:
return
name_to_diff = {}
for filename, lines in name_to_lines.items():
diff = get_clang_format_diff(filename, lines)
if diff is not None:
name_to_diff[filename] = diff
if args.accept_changes:
# run clang-format on the necessary files
for name, lines in name_to_lines.items():
run_clang_format(name, lines, in_place=True)
# add the changes so they will be committed
args = ["git", "add"]
args.extend(name_to_lines.keys())
subprocess.check_output(args)
else:
if len(name_to_diff) == 0:
return
print("ERROR: Running clang-format created changes: ")
for name, diff in name_to_diff.items():
print("In " + name)
for l in diff:
print(l)
print("\n")
if __name__ == "__main__":
main()