| #!/usr/bin/env python3 | 
 | """ | 
 | A script that runs clang-format on all C/C++ files in CLANG_FORMAT_ALLOWLIST. There is | 
 | also a diff mode which simply checks if clang-format would make any changes, which is useful for | 
 | CI purposes. | 
 |  | 
 | If clang-format is not available, the script also downloads a platform-appropriate binary from | 
 | and S3 bucket and verifies it against a precommited set of blessed binary hashes. | 
 | """ | 
 | import argparse | 
 | import asyncio | 
 | import re | 
 | import os | 
 | import sys | 
 | from clang_format_utils import get_and_check_clang_format, CLANG_FORMAT_PATH | 
 |  | 
 | # Allowlist of directories to check. All files that in that directory | 
 | # (recursively) will be checked. | 
 | # If you edit this, please edit the allowlist in clang_format_ci.sh as well. | 
 | CLANG_FORMAT_ALLOWLIST = ["torch/csrc/jit/", "test/cpp/jit/", "test/cpp/tensorexpr/"] | 
 |  | 
 | # Only files with names matching this regex will be formatted. | 
 | CPP_FILE_REGEX = re.compile(".*\\.(h|cpp|cc|c|hpp)$") | 
 |  | 
 |  | 
 | def get_allowlisted_files(): | 
 |     """ | 
 |     Parse CLANG_FORMAT_ALLOWLIST and resolve all directories. | 
 |     Returns the set of allowlist cpp source files. | 
 |     """ | 
 |     matches = [] | 
 |     for dir in CLANG_FORMAT_ALLOWLIST: | 
 |         for root, dirnames, filenames in os.walk(dir): | 
 |             for filename in filenames: | 
 |                 if CPP_FILE_REGEX.match(filename): | 
 |                     matches.append(os.path.join(root, filename)) | 
 |     return set(matches) | 
 |  | 
 |  | 
 | async def run_clang_format_on_file(filename, semaphore, verbose=False): | 
 |     """ | 
 |     Run clang-format on the provided file. | 
 |     """ | 
 |     # -style=file picks up the closest .clang-format, -i formats the files inplace. | 
 |     cmd = "{} -style=file -i {}".format(CLANG_FORMAT_PATH, filename) | 
 |     async with semaphore: | 
 |         proc = await asyncio.create_subprocess_shell(cmd) | 
 |         _ = await proc.wait() | 
 |     if verbose: | 
 |         print("Formatted {}".format(filename)) | 
 |  | 
 |  | 
 | async def file_clang_formatted_correctly(filename, semaphore, verbose=False): | 
 |     """ | 
 |     Checks if a file is formatted correctly and returns True if so. | 
 |     """ | 
 |     ok = True | 
 |     # -style=file picks up the closest .clang-format | 
 |     cmd = "{} -style=file {}".format(CLANG_FORMAT_PATH, filename) | 
 |  | 
 |     async with semaphore: | 
 |         proc = await asyncio.create_subprocess_shell(cmd, stdout=asyncio.subprocess.PIPE) | 
 |         # Read back the formatted file. | 
 |         stdout, _ = await proc.communicate() | 
 |  | 
 |     formatted_contents = stdout.decode() | 
 |     # Compare the formatted file to the original file. | 
 |     with open(filename) as orig: | 
 |         orig_contents = orig.read() | 
 |         if formatted_contents != orig_contents: | 
 |             ok = False | 
 |             if verbose: | 
 |                 print("{} is not formatted correctly".format(filename)) | 
 |  | 
 |     return ok | 
 |  | 
 |  | 
 | async def run_clang_format(max_processes, diff=False, verbose=False): | 
 |     """ | 
 |     Run clang-format to all files in CLANG_FORMAT_ALLOWLIST that match CPP_FILE_REGEX. | 
 |     """ | 
 |     # Check to make sure the clang-format binary exists. | 
 |     if not os.path.exists(CLANG_FORMAT_PATH): | 
 |         print("clang-format binary not found") | 
 |         return False | 
 |  | 
 |     # Gather command-line options for clang-format. | 
 |     args = [CLANG_FORMAT_PATH, "-style=file"] | 
 |  | 
 |     if not diff: | 
 |         args.append("-i") | 
 |  | 
 |     ok = True | 
 |  | 
 |     # Semaphore to bound the number of subprocesses that can be created at once to format files. | 
 |     semaphore = asyncio.Semaphore(max_processes) | 
 |  | 
 |     # Format files in parallel. | 
 |     if diff: | 
 |         for f in asyncio.as_completed([file_clang_formatted_correctly(f, semaphore, verbose) for f in get_allowlisted_files()]): | 
 |             ok &= await f | 
 |  | 
 |         if ok: | 
 |             print("All files formatted correctly") | 
 |         else: | 
 |             print("Some files not formatted correctly") | 
 |     else: | 
 |         await asyncio.gather(*[run_clang_format_on_file(f, semaphore, verbose) for f in get_allowlisted_files()]) | 
 |  | 
 |     return ok | 
 |  | 
 | def parse_args(args): | 
 |     """ | 
 |     Parse and return command-line arguments. | 
 |     """ | 
 |     parser = argparse.ArgumentParser( | 
 |         description="Execute clang-format on your working copy changes." | 
 |     ) | 
 |     parser.add_argument( | 
 |         "-d", | 
 |         "--diff", | 
 |         action="store_true", | 
 |         default=False, | 
 |         help="Determine whether running clang-format would produce changes", | 
 |     ) | 
 |     parser.add_argument("--verbose", "-v", action="store_true", default=False) | 
 |     parser.add_argument("--max-processes", type=int, default=50, | 
 |                         help="Maximum number of subprocesses to create to format files in parallel") | 
 |     return parser.parse_args(args) | 
 |  | 
 |  | 
 | def main(args): | 
 |     # Parse arguments. | 
 |     options = parse_args(args) | 
 |     # Get clang-format and make sure it is the right binary and it is in the right place. | 
 |     ok = get_and_check_clang_format(options.verbose) | 
 |     # Invoke clang-format on all files in the directories in the allowlist. | 
 |     if ok: | 
 |         loop = asyncio.get_event_loop() | 
 |         ok = loop.run_until_complete(run_clang_format(options.max_processes, options.diff, options.verbose)) | 
 |  | 
 |     # We have to invert because False -> 0, which is the code to be returned if everything is okay. | 
 |     return not ok | 
 |  | 
 |  | 
 | if __name__ == "__main__": | 
 |     sys.exit(main(sys.argv[1:])) |