| #!/usr/bin/env python |
| # |
| # ===- git-clang-format - ClangFormat Git Integration ---------*- python -*--===# |
| # |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| # |
| # ===------------------------------------------------------------------------===# |
| |
| r""" |
| clang-format git integration |
| ============================ |
| |
| This file provides a clang-format integration for git. Put it somewhere in your |
| path and ensure that it is executable. Then, "git clang-format" will invoke |
| clang-format on the changes in current files or a specific commit. |
| |
| For further details, run: |
| git clang-format -h |
| |
| Requires Python 2.7 or Python 3 |
| """ |
| |
| from __future__ import absolute_import, division, print_function |
| import argparse |
| import collections |
| import contextlib |
| import errno |
| import os |
| import re |
| import subprocess |
| import sys |
| from clang_format_utils import get_and_check_clang_format, CLANG_FORMAT_PATH |
| |
| usage = "git clang-format [OPTIONS] [<commit>] [<commit>] [--] [<file>...]" |
| |
| desc = """ |
| If zero or one commits are given, run clang-format on all lines that differ |
| between the working directory and <commit>, which defaults to HEAD. Changes are |
| only applied to the working directory. |
| |
| If two commits are given (requires --diff), run clang-format on all lines in the |
| second <commit> that differ from the first <commit>. |
| |
| If --binary is unspecified, we will try to fetch the correct clang-format |
| binary for PyTorch |
| |
| The following git-config settings set the default of the corresponding option: |
| clangFormat.binary |
| clangFormat.commit |
| clangFormat.extension |
| clangFormat.style |
| """ |
| |
| # Name of the temporary index file in which save the output of clang-format. |
| # This file is created within the .git directory. |
| temp_index_basename = "clang-format-index" |
| |
| |
| Range = collections.namedtuple("Range", "start, count") |
| |
| |
| def main(): |
| config = load_git_config() |
| |
| # In order to keep '--' yet allow options after positionals, we need to |
| # check for '--' ourselves. (Setting nargs='*' throws away the '--', while |
| # nargs=argparse.REMAINDER disallows options after positionals.) |
| argv = sys.argv[1:] |
| try: |
| idx = argv.index("--") |
| except ValueError: |
| dash_dash = [] |
| else: |
| dash_dash = argv[idx:] |
| argv = argv[:idx] |
| |
| default_extensions = ",".join( |
| [ |
| # From clang/lib/Frontend/FrontendOptions.cpp, all lower case |
| "c", |
| "h", # C |
| "m", # ObjC |
| "mm", # ObjC++ |
| "cc", |
| "cp", |
| "cpp", |
| "c++", |
| "cxx", |
| "hh", |
| "hpp", |
| "hxx", # C++ |
| "cu", # CUDA |
| # Other languages that clang-format supports |
| "proto", |
| "protodevel", # Protocol Buffers |
| "java", # Java |
| "js", # JavaScript |
| "ts", # TypeScript |
| "cs", # C Sharp |
| ] |
| ) |
| |
| p = argparse.ArgumentParser( |
| usage=usage, |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| description=desc, |
| ) |
| p.add_argument("--binary", default=None, help="path to clang-format"), |
| p.add_argument( |
| "--commit", |
| default=config.get("clangformat.commit", "HEAD"), |
| help="default commit to use if none is specified", |
| ), |
| p.add_argument( |
| "--diff", |
| action="store_true", |
| help="print a diff instead of applying the changes", |
| ) |
| p.add_argument( |
| "--extensions", |
| default=config.get("clangformat.extensions", default_extensions), |
| help=( |
| "comma-separated list of file extensions to format, " |
| "excluding the period and case-insensitive" |
| ), |
| ), |
| p.add_argument( |
| "-f", "--force", action="store_true", help="allow changes to unstaged files" |
| ) |
| p.add_argument( |
| "-p", "--patch", action="store_true", help="select hunks interactively" |
| ) |
| p.add_argument( |
| "-q", "--quiet", action="count", default=0, help="print less information" |
| ) |
| p.add_argument( |
| "--style", |
| default=config.get("clangformat.style", None), |
| help="passed to clang-format", |
| ), |
| p.add_argument( |
| "-v", "--verbose", action="count", default=0, help="print extra information" |
| ) |
| # We gather all the remaining positional arguments into 'args' since we need |
| # to use some heuristics to determine whether or not <commit> was present. |
| # However, to print pretty messages, we make use of metavar and help. |
| p.add_argument( |
| "args", |
| nargs="*", |
| metavar="<commit>", |
| help="revision from which to compute the diff", |
| ) |
| p.add_argument( |
| "ignored", |
| nargs="*", |
| metavar="<file>...", |
| help="if specified, only consider differences in these files", |
| ) |
| opts = p.parse_args(argv) |
| |
| opts.verbose -= opts.quiet |
| del opts.quiet |
| |
| ok = get_and_check_clang_format(opts.verbose) |
| if not ok: |
| # We have to invert because False -> 0, which is the code to be returned if everything is okay. |
| return not ok |
| |
| if opts.binary is None: |
| opts.binary = CLANG_FORMAT_PATH |
| |
| commits, files = interpret_args(opts.args, dash_dash, opts.commit) |
| if len(commits) > 1: |
| if not opts.diff: |
| die("--diff is required when two commits are given") |
| else: |
| if len(commits) > 2: |
| die("at most two commits allowed; %d given" % len(commits)) |
| changed_lines = compute_diff_and_extract_lines(commits, files) |
| if opts.verbose >= 1: |
| ignored_files = set(changed_lines) |
| filter_by_extension(changed_lines, opts.extensions.lower().split(",")) |
| if opts.verbose >= 1: |
| ignored_files.difference_update(changed_lines) |
| if ignored_files: |
| print("Ignoring changes in the following files (wrong extension):") |
| for filename in ignored_files: |
| print(" %s" % filename) |
| if changed_lines: |
| print("Running clang-format on the following files:") |
| for filename in changed_lines: |
| print(" %s" % filename) |
| if not changed_lines: |
| print("no modified files to format") |
| return |
| # The computed diff outputs absolute paths, so we must cd before accessing |
| # those files. |
| cd_to_toplevel() |
| if len(commits) > 1: |
| old_tree = commits[1] |
| new_tree = run_clang_format_and_save_to_tree( |
| changed_lines, revision=commits[1], binary=opts.binary, style=opts.style |
| ) |
| else: |
| old_tree = create_tree_from_workdir(changed_lines) |
| new_tree = run_clang_format_and_save_to_tree( |
| changed_lines, binary=opts.binary, style=opts.style |
| ) |
| if opts.verbose >= 1: |
| print("old tree: %s" % old_tree) |
| print("new tree: %s" % new_tree) |
| if old_tree == new_tree: |
| if opts.verbose >= 0: |
| print("clang-format did not modify any files") |
| elif opts.diff: |
| print_diff(old_tree, new_tree) |
| else: |
| changed_files = apply_changes( |
| old_tree, new_tree, force=opts.force, patch_mode=opts.patch |
| ) |
| if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: |
| print("changed files:") |
| for filename in changed_files: |
| print(" %s" % filename) |
| |
| |
| def load_git_config(non_string_options=None): |
| """Return the git configuration as a dictionary. |
| |
| All options are assumed to be strings unless in `non_string_options`, in which |
| is a dictionary mapping option name (in lower case) to either "--bool" or |
| "--int".""" |
| if non_string_options is None: |
| non_string_options = {} |
| out = {} |
| for entry in run("git", "config", "--list", "--null").split("\0"): |
| if entry: |
| name, value = entry.split("\n", 1) |
| if name in non_string_options: |
| value = run("git", "config", non_string_options[name], name) |
| out[name] = value |
| return out |
| |
| |
| def interpret_args(args, dash_dash, default_commit): |
| """Interpret `args` as "[commits] [--] [files]" and return (commits, files). |
| |
| It is assumed that "--" and everything that follows has been removed from |
| args and placed in `dash_dash`. |
| |
| If "--" is present (i.e., `dash_dash` is non-empty), the arguments to its |
| left (if present) are taken as commits. Otherwise, the arguments are checked |
| from left to right if they are commits or files. If commits are not given, |
| a list with `default_commit` is used.""" |
| if dash_dash: |
| if len(args) == 0: |
| commits = [default_commit] |
| else: |
| commits = args |
| for commit in commits: |
| object_type = get_object_type(commit) |
| if object_type not in ("commit", "tag"): |
| if object_type is None: |
| die("'%s' is not a commit" % commit) |
| else: |
| die( |
| "'%s' is a %s, but a commit was expected" |
| % (commit, object_type) |
| ) |
| files = dash_dash[1:] |
| elif args: |
| commits = [] |
| while args: |
| if not disambiguate_revision(args[0]): |
| break |
| commits.append(args.pop(0)) |
| if not commits: |
| commits = [default_commit] |
| files = args |
| else: |
| commits = [default_commit] |
| files = [] |
| return commits, files |
| |
| |
| def disambiguate_revision(value): |
| """Returns True if `value` is a revision, False if it is a file, or dies.""" |
| # If `value` is ambiguous (neither a commit nor a file), the following |
| # command will die with an appropriate error message. |
| run("git", "rev-parse", value, verbose=False) |
| object_type = get_object_type(value) |
| if object_type is None: |
| return False |
| if object_type in ("commit", "tag"): |
| return True |
| die("`%s` is a %s, but a commit or filename was expected" % (value, object_type)) |
| |
| |
| def get_object_type(value): |
| """Returns a string description of an object's type, or None if it is not |
| a valid git object.""" |
| cmd = ["git", "cat-file", "-t", value] |
| p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| stdout, stderr = p.communicate() |
| if p.returncode != 0: |
| return None |
| return convert_string(stdout.strip()) |
| |
| |
| def compute_diff_and_extract_lines(commits, files): |
| """Calls compute_diff() followed by extract_lines().""" |
| diff_process = compute_diff(commits, files) |
| changed_lines = extract_lines(diff_process.stdout) |
| diff_process.stdout.close() |
| diff_process.wait() |
| if diff_process.returncode != 0: |
| # Assume error was already printed to stderr. |
| sys.exit(2) |
| return changed_lines |
| |
| |
| def compute_diff(commits, files): |
| """Return a subprocess object producing the diff from `commits`. |
| |
| The return value's `stdin` file object will produce a patch with the |
| differences between the working directory and the first commit if a single |
| one was specified, or the difference between both specified commits, filtered |
| on `files` (if non-empty). Zero context lines are used in the patch.""" |
| git_tool = "diff-index" |
| if len(commits) > 1: |
| git_tool = "diff-tree" |
| cmd = ["git", git_tool, "-p", "-U0"] + commits + ["--"] |
| cmd.extend(files) |
| p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| p.stdin.close() |
| return p |
| |
| |
| def extract_lines(patch_file): |
| """Extract the changed lines in `patch_file`. |
| |
| The return value is a dictionary mapping filename to a list of (start_line, |
| line_count) pairs. |
| |
| The input must have been produced with ``-U0``, meaning unidiff format with |
| zero lines of context. The return value is a dict mapping filename to a |
| list of line `Range`s.""" |
| matches = {} |
| for line in patch_file: |
| line = convert_string(line) |
| match = re.search(r"^\+\+\+\ [^/]+/(.*)", line) |
| if match: |
| filename = match.group(1).rstrip("\r\n") |
| match = re.search(r"^@@ -[0-9,]+ \+(\d+)(,(\d+))?", line) |
| if match: |
| start_line = int(match.group(1)) |
| line_count = 1 |
| if match.group(3): |
| line_count = int(match.group(3)) |
| if line_count > 0: |
| matches.setdefault(filename, []).append(Range(start_line, line_count)) |
| return matches |
| |
| |
| def filter_by_extension(dictionary, allowed_extensions): |
| """Delete every key in `dictionary` that doesn't have an allowed extension. |
| |
| `allowed_extensions` must be a collection of lowercase file extensions, |
| excluding the period.""" |
| allowed_extensions = frozenset(allowed_extensions) |
| for filename in list(dictionary.keys()): |
| base_ext = filename.rsplit(".", 1) |
| if len(base_ext) == 1 and "" in allowed_extensions: |
| continue |
| if len(base_ext) == 1 or base_ext[1].lower() not in allowed_extensions: |
| del dictionary[filename] |
| |
| |
| def cd_to_toplevel(): |
| """Change to the top level of the git repository.""" |
| toplevel = run("git", "rev-parse", "--show-toplevel") |
| os.chdir(toplevel) |
| |
| |
| def create_tree_from_workdir(filenames): |
| """Create a new git tree with the given files from the working directory. |
| |
| Returns the object ID (SHA-1) of the created tree.""" |
| return create_tree(filenames, "--stdin") |
| |
| |
| def run_clang_format_and_save_to_tree( |
| changed_lines, revision=None, binary="clang-format", style=None |
| ): |
| """Run clang-format on each file and save the result to a git tree. |
| |
| Returns the object ID (SHA-1) of the created tree.""" |
| |
| def iteritems(container): |
| try: |
| return container.iteritems() # Python 2 |
| except AttributeError: |
| return container.items() # Python 3 |
| |
| def index_info_generator(): |
| for filename, line_ranges in iteritems(changed_lines): |
| if revision: |
| git_metadata_cmd = [ |
| "git", |
| "ls-tree", |
| "%s:%s" % (revision, os.path.dirname(filename)), |
| os.path.basename(filename), |
| ] |
| git_metadata = subprocess.Popen( |
| git_metadata_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE |
| ) |
| stdout = git_metadata.communicate()[0] |
| mode = oct(int(stdout.split()[0], 8)) |
| else: |
| mode = oct(os.stat(filename).st_mode) |
| # Adjust python3 octal format so that it matches what git expects |
| if mode.startswith("0o"): |
| mode = "0" + mode[2:] |
| blob_id = clang_format_to_blob( |
| filename, line_ranges, revision=revision, binary=binary, style=style |
| ) |
| yield "%s %s\t%s" % (mode, blob_id, filename) |
| |
| return create_tree(index_info_generator(), "--index-info") |
| |
| |
| def create_tree(input_lines, mode): |
| """Create a tree object from the given input. |
| |
| If mode is '--stdin', it must be a list of filenames. If mode is |
| '--index-info' is must be a list of values suitable for "git update-index |
| --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any other mode |
| is invalid.""" |
| assert mode in ("--stdin", "--index-info") |
| cmd = ["git", "update-index", "--add", "-z", mode] |
| with temporary_index_file(): |
| p = subprocess.Popen(cmd, stdin=subprocess.PIPE) |
| for line in input_lines: |
| p.stdin.write(to_bytes("%s\0" % line)) |
| p.stdin.close() |
| if p.wait() != 0: |
| die("`%s` failed" % " ".join(cmd)) |
| tree_id = run("git", "write-tree") |
| return tree_id |
| |
| |
| def clang_format_to_blob( |
| filename, line_ranges, revision=None, binary="clang-format", style=None |
| ): |
| """Run clang-format on the given file and save the result to a git blob. |
| |
| Runs on the file in `revision` if not None, or on the file in the working |
| directory if `revision` is None. |
| |
| Returns the object ID (SHA-1) of the created blob.""" |
| clang_format_cmd = [binary] |
| if style: |
| clang_format_cmd.extend(["-style=" + style]) |
| clang_format_cmd.extend( |
| [ |
| "-lines=%s:%s" % (start_line, start_line + line_count - 1) |
| for start_line, line_count in line_ranges |
| ] |
| ) |
| if revision: |
| clang_format_cmd.extend(["-assume-filename=" + filename]) |
| git_show_cmd = ["git", "cat-file", "blob", "%s:%s" % (revision, filename)] |
| git_show = subprocess.Popen( |
| git_show_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE |
| ) |
| git_show.stdin.close() |
| clang_format_stdin = git_show.stdout |
| else: |
| clang_format_cmd.extend([filename]) |
| git_show = None |
| clang_format_stdin = subprocess.PIPE |
| try: |
| clang_format = subprocess.Popen( |
| clang_format_cmd, stdin=clang_format_stdin, stdout=subprocess.PIPE |
| ) |
| if clang_format_stdin == subprocess.PIPE: |
| clang_format_stdin = clang_format.stdin |
| except OSError as e: |
| if e.errno == errno.ENOENT: |
| die('cannot find executable "%s"' % binary) |
| else: |
| raise |
| clang_format_stdin.close() |
| hash_object_cmd = ["git", "hash-object", "-w", "--path=" + filename, "--stdin"] |
| hash_object = subprocess.Popen( |
| hash_object_cmd, stdin=clang_format.stdout, stdout=subprocess.PIPE |
| ) |
| clang_format.stdout.close() |
| stdout = hash_object.communicate()[0] |
| if hash_object.returncode != 0: |
| die("`%s` failed" % " ".join(hash_object_cmd)) |
| if clang_format.wait() != 0: |
| die("`%s` failed" % " ".join(clang_format_cmd)) |
| if git_show and git_show.wait() != 0: |
| die("`%s` failed" % " ".join(git_show_cmd)) |
| return convert_string(stdout).rstrip("\r\n") |
| |
| |
| @contextlib.contextmanager |
| def temporary_index_file(tree=None): |
| """Context manager for setting GIT_INDEX_FILE to a temporary file and deleting |
| the file afterward.""" |
| index_path = create_temporary_index(tree) |
| old_index_path = os.environ.get("GIT_INDEX_FILE") |
| os.environ["GIT_INDEX_FILE"] = index_path |
| try: |
| yield |
| finally: |
| if old_index_path is None: |
| del os.environ["GIT_INDEX_FILE"] |
| else: |
| os.environ["GIT_INDEX_FILE"] = old_index_path |
| os.remove(index_path) |
| |
| |
| def create_temporary_index(tree=None): |
| """Create a temporary index file and return the created file's path. |
| |
| If `tree` is not None, use that as the tree to read in. Otherwise, an |
| empty index is created.""" |
| gitdir = run("git", "rev-parse", "--git-dir") |
| path = os.path.join(gitdir, temp_index_basename) |
| if tree is None: |
| tree = "--empty" |
| run("git", "read-tree", "--index-output=" + path, tree) |
| return path |
| |
| |
| def print_diff(old_tree, new_tree): |
| """Print the diff between the two trees to stdout.""" |
| # We use the porcelain 'diff' and not plumbing 'diff-tree' because the output |
| # is expected to be viewed by the user, and only the former does nice things |
| # like color and pagination. |
| # |
| # We also only print modified files since `new_tree` only contains the files |
| # that were modified, so unmodified files would show as deleted without the |
| # filter. |
| subprocess.check_call(["git", "diff", "--diff-filter=M", old_tree, new_tree, "--"]) |
| |
| |
| def apply_changes(old_tree, new_tree, force=False, patch_mode=False): |
| """Apply the changes in `new_tree` to the working directory. |
| |
| Bails if there are local changes in those files and not `force`. If |
| `patch_mode`, runs `git checkout --patch` to select hunks interactively.""" |
| changed_files = ( |
| run( |
| "git", |
| "diff-tree", |
| "--diff-filter=M", |
| "-r", |
| "-z", |
| "--name-only", |
| old_tree, |
| new_tree, |
| ) |
| .rstrip("\0") |
| .split("\0") |
| ) |
| if not force: |
| unstaged_files = run("git", "diff-files", "--name-status", *changed_files) |
| if unstaged_files: |
| print( |
| "The following files would be modified but " "have unstaged changes:", |
| file=sys.stderr, |
| ) |
| print(unstaged_files, file=sys.stderr) |
| print("Please commit, stage, or stash them first.", file=sys.stderr) |
| sys.exit(2) |
| if patch_mode: |
| # In patch mode, we could just as well create an index from the new tree |
| # and checkout from that, but then the user will be presented with a |
| # message saying "Discard ... from worktree". Instead, we use the old |
| # tree as the index and checkout from new_tree, which gives the slightly |
| # better message, "Apply ... to index and worktree". This is not quite |
| # right, since it won't be applied to the user's index, but oh well. |
| with temporary_index_file(old_tree): |
| subprocess.check_call(["git", "checkout", "--patch", new_tree]) |
| index_tree = old_tree |
| else: |
| with temporary_index_file(new_tree): |
| run("git", "checkout-index", "-a", "-f") |
| return changed_files |
| |
| |
| def run(*args, **kwargs): |
| stdin = kwargs.pop("stdin", "") |
| verbose = kwargs.pop("verbose", True) |
| strip = kwargs.pop("strip", True) |
| for name in kwargs: |
| raise TypeError("run() got an unexpected keyword argument '%s'" % name) |
| p = subprocess.Popen( |
| args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE |
| ) |
| stdout, stderr = p.communicate(input=stdin) |
| |
| stdout = convert_string(stdout) |
| stderr = convert_string(stderr) |
| |
| if p.returncode == 0: |
| if stderr: |
| if verbose: |
| print("`%s` printed to stderr:" % " ".join(args), file=sys.stderr) |
| print(stderr.rstrip(), file=sys.stderr) |
| if strip: |
| stdout = stdout.rstrip("\r\n") |
| return stdout |
| if verbose: |
| print("`%s` returned %s" % (" ".join(args), p.returncode), file=sys.stderr) |
| if stderr: |
| print(stderr.rstrip(), file=sys.stderr) |
| sys.exit(2) |
| |
| |
| def die(message): |
| print("error:", message, file=sys.stderr) |
| sys.exit(2) |
| |
| |
| def to_bytes(str_input): |
| # Encode to UTF-8 to get binary data. |
| if isinstance(str_input, bytes): |
| return str_input |
| return str_input.encode("utf-8") |
| |
| |
| def to_string(bytes_input): |
| if isinstance(bytes_input, str): |
| return bytes_input |
| return bytes_input.encode("utf-8") |
| |
| |
| def convert_string(bytes_input): |
| try: |
| return to_string(bytes_input.decode("utf-8")) |
| except AttributeError: # 'str' object has no attribute 'decode'. |
| return str(bytes_input) |
| except UnicodeError: |
| return str(bytes_input) |
| |
| |
| if __name__ == "__main__": |
| main() |