| #!/usr/bin/env python |
| """ |
| A script that runs clang-format on changes detected via git. It will |
| report if running clang-format generated any changes. |
| |
| In CI, the script considers it a failure if running clang-format makes a change. |
| In the pre-commit hook, the user is prompted to apply any clang-format changes. |
| Running tools/clang_format.py manually with no arguments should replicate the pre-commit hook behavior. |
| |
| Only files that are in CLANG_FORMAT_WHITELIST are checked. |
| """ |
| import subprocess |
| import os |
| import argparse |
| import difflib |
| import re |
| |
| |
| # Whitelist of directories to check. All files that in that directory |
| # (recursively) will be checked. |
| CLANG_FORMAT_WHITELIST = ["torch/csrc/jit/", "test/cpp/jit/"] |
| |
| CPP_FILE_REGEX = re.compile("^.*\\.(h|cpp|cc|c|hpp)$") |
| CPP_FILE_REGEX = re.compile(".*\\.(h|cpp|cc|c|hpp)$") |
| # @@ -start,count +start,count @@ |
| CHUNK_PATTERN = r"^@@\s+-\d+(?:,\d+)?\s+\+(\d+)(?:,(\d+))?\s+@@" |
| |
| |
| def parse_args(): |
| parser = argparse.ArgumentParser( |
| description="Execute clang-format on your working copy changes." |
| ) |
| parser.add_argument( |
| "-d", |
| "--diff", |
| default="HEAD", |
| help="Git revision to diff against to get changes", |
| ) |
| parser.add_argument( |
| "--accept-changes", |
| action="store_true", |
| default=False, |
| help=( |
| "If true, apply whatever changes clang-format creates. " |
| "Otherwise, just print the changes and exit" |
| ), |
| ) |
| parser.add_argument( |
| "--check-all", |
| action="store_true", |
| default=False, |
| help="If true, check all whitelisted files instead of just working copy changes", |
| ) |
| parser.add_argument("--verbose", "-v", action="store_true", default=False) |
| return parser.parse_args() |
| |
| |
| def get_whitelisted_files(): |
| """ |
| Parse CLANG_FORMAT_WHITELIST and resolve all directories. |
| Returns the set of whitelist cpp source files. |
| """ |
| matches = [] |
| for dir in CLANG_FORMAT_WHITELIST: |
| for root, dirnames, filenames in os.walk(dir): |
| for filename in filenames: |
| if CPP_FILE_REGEX.match(filename): |
| matches.append(os.path.join(root, filename)) |
| return set(matches) |
| |
| |
| def get_changed_files(rev): |
| """ |
| Get all changed files between the working tree and `rev` |
| """ |
| changed_files = ( |
| subprocess.check_output( |
| ["git", "diff-index", "--diff-filter=AMU", "--name-only", rev] |
| ) |
| .decode() |
| .split("\n") |
| ) |
| return set(changed_files) |
| |
| |
| def get_changed_lines(filename, revision): |
| """ |
| Given a filename and revision diff, return all the changed lines noted in the diff |
| Returns a list of (start_line, end_line) tuples. |
| """ |
| command = ["git", "diff-index", "--unified=0", revision, filename] |
| output = subprocess.check_output(command).decode() |
| changed_lines = [] |
| for chunk in re.finditer(CHUNK_PATTERN, output, re.MULTILINE): |
| start = int(chunk.group(1)) |
| count = int(chunk.group(2) or 1) |
| changed_lines.append((start, start + count)) |
| |
| return changed_lines |
| |
| |
| def run_clang_format(filename, lines, in_place): |
| args = ["clang-format", filename] |
| line_args = ["-lines={}:{}".format(i[0], i[1]) for i in lines] |
| args.extend(line_args) |
| if in_place: |
| args.append("-i") |
| |
| return subprocess.check_output(args).decode() |
| |
| |
| def get_clang_format_diff(filename, lines): |
| """ |
| Return a diff of the changes that running clang-format would make (or None). |
| """ |
| formatted_text = run_clang_format(filename, lines, in_place=False) |
| with open(filename) as orig: |
| orig_text = orig.read() |
| if formatted_text != orig_text: |
| orig_lines = orig_text.split("\n") |
| formatted_lines = formatted_text.split("\n") |
| return difflib.unified_diff( |
| orig_lines, formatted_lines, "original", "formatted" |
| ) |
| |
| |
| def main(): |
| args = parse_args() |
| |
| whitelisted_files = get_whitelisted_files() |
| |
| if args.check_all: |
| files_to_check = whitelisted_files |
| else: |
| changed_files = get_changed_files(args.diff) |
| files_to_check = changed_files & whitelisted_files |
| |
| if args.verbose: |
| print("Running clang-format on whitelisted files: ") |
| for f in files_to_check: |
| print(f) |
| |
| name_to_lines = {} |
| for f in files_to_check: |
| changed_lines = get_changed_lines(f, args.diff) |
| if len(changed_lines) != 0: |
| name_to_lines[f] = changed_lines |
| |
| if len(name_to_lines) == 0: |
| return |
| |
| name_to_diff = {} |
| for filename, lines in name_to_lines.items(): |
| diff = get_clang_format_diff(filename, lines) |
| if diff is not None: |
| name_to_diff[filename] = diff |
| |
| if args.accept_changes: |
| # run clang-format on the necessary files |
| for name, lines in name_to_lines.items(): |
| run_clang_format(name, lines, in_place=True) |
| |
| # add the changes so they will be committed |
| args = ["git", "add"] |
| args.extend(name_to_lines.keys()) |
| subprocess.check_output(args) |
| else: |
| if len(name_to_diff) == 0: |
| return |
| |
| print("ERROR: Running clang-format created changes: ") |
| for name, diff in name_to_diff.items(): |
| print("In " + name) |
| for l in diff: |
| print(l) |
| print("\n") |
| |
| |
| if __name__ == "__main__": |
| main() |