[lint] Add the rest of the grep linters (#67932)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/67932

Also various improvements to grep_linter.py, including the ability to
specify a replacement pattern.

Test Plan: Imported from OSS

Reviewed By: H-Huang

Differential Revision: D32250603

Pulled By: suo

fbshipit-source-id: e07eb182e9473a268e2b805a68a859b91228bfbb
diff --git a/.lintrunner.toml b/.lintrunner.toml
index 173f5fe..e7b7f2f 100644
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@@ -152,9 +152,9 @@
     'python3',
     'tools/linter/adapters/grep_linter.py',
     '--pattern=# type:\s*ignore(?!\[)',
-    '--linter_name=TYPEIGNORE',
-    '--error_name=unqualified type: ignore',
-    """--error_description=\
+    '--linter-name=TYPEIGNORE',
+    '--error-name=unqualified type: ignore',
+    """--error-description=\
         This line has an unqualified `type: ignore`; \
         please convert it to `type: ignore[xxxx]`\
     """,
@@ -170,9 +170,9 @@
     'python3',
     'tools/linter/adapters/grep_linter.py',
     '--pattern=# type:\s*ignore(?!\[)',
-    '--linter_name=TYPEIGNORE',
-    '--error_name=unqualified noqa',
-    """--error_description=\
+    '--linter-name=TYPEIGNORE',
+    '--error-name=unqualified noqa',
+    """--error-description=\
         This line has an unqualified `noqa`; \
         please convert it to `noqa: XXXX`\
     """,
@@ -224,3 +224,100 @@
     '--',
     '@{{PATHSFILE}}',
 ]
+
+[[linter]]
+name = 'SPACES'
+include_patterns = ['**']
+exclude_patterns = [
+    '**/contrib/**',
+    '**/*.diff',
+    'third_party/**',
+]
+args = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    '--pattern=[[:blank:]]$',
+    '--linter-name=SPACES',
+    '--error-name=trailing spaces',
+    '--replace-pattern=s/[[:blank:]]+$//',
+    """--error-description=\
+        This line has trailing spaces; please remove them.\
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
+
+[[linter]]
+name = 'TABS'
+include_patterns = ['**']
+exclude_patterns = [
+    '**/*.svg',
+    '**/*Makefile',
+    '**/contrib/**',
+    'third_party/**',
+    '**/.gitattributes',
+    '**/.gitmodules',
+]
+args = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    '--pattern=\t',
+    '--linter-name=TABS',
+    '--error-name=saw some tabs',
+    '--replace-pattern=s/\t/    /',
+    """--error-description=\
+        This line has tabs; please replace them with spaces.\
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
+
+[[linter]]
+name = 'INCLUDE'
+include_patterns = [
+    'c10/**',
+    'aten/**',
+    'torch/csrc/**',
+]
+exclude_patterns = [
+    'aten/src/ATen/native/quantized/cpu/qnnpack/**',
+]
+args = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    '--pattern=#include "',
+    '--linter-name=INCLUDE',
+    '--error-name=quoted include',
+    '--replace-pattern=s/#include "(.*)"$/#include <\1>/',
+    """--error-description=\
+        This #include uses quotes; please convert it to #include <xxxx>\
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
+
+[[linter]]
+name = 'PYPIDEP'
+include_patterns = ['.github/**']
+exclude_patterns = [
+    '**/*.rst',
+    '**/*.py',
+    '**/*.md',
+    '**/*.diff',
+]
+args = [
+    'python3',
+    'tools/linter/adapters/grep_linter.py',
+    """--pattern=\
+    (pip|pip3|python -m pip|python3 -m pip|python3 -mpip|python -mpip) \
+    install ([a-z][\\.a-z-0-9]*+(?!(=|.*\\.whl))([[:blank:]]|))+\
+    """,
+    '--linter-name=PYPIDEP',
+    '--error-name=unpinned PyPI install',
+    """--error-description=\
+        This line has unpinned PyPi installs; \
+        please pin them to a specific version: e.g. 'thepackage==1.2'\
+    """,
+    '--',
+    '@{{PATHSFILE}}'
+]
diff --git a/tools/linter/adapters/grep_linter.py b/tools/linter/adapters/grep_linter.py
index a6338f6..000d4be4 100644
--- a/tools/linter/adapters/grep_linter.py
+++ b/tools/linter/adapters/grep_linter.py
@@ -1,3 +1,7 @@
+"""
+Generic linter that greps for a pattern and optionally suggests replacements.
+"""
+
 import argparse
 import json
 import logging
@@ -40,56 +44,111 @@
     return name.replace("\\", "/") if IS_WINDOWS else name
 
 
-def run_command(
-    args: List[str],
-) -> "subprocess.CompletedProcess[bytes]":
+def run_command(args: List[str],) -> "subprocess.CompletedProcess[bytes]":
     logging.debug("$ %s", " ".join(args))
     start_time = time.monotonic()
     try:
-        return subprocess.run(
-            args,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-        )
+        return subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
     finally:
         end_time = time.monotonic()
         logging.debug("took %dms", (end_time - start_time) * 1000)
 
 
+def lint_file(
+    matching_line: str,
+    replace_pattern: str,
+    linter_name: str,
+    error_name: str,
+    error_description: str,
+) -> LintMessage:
+    # matching_line looks like:
+    #   tools/linter/clangtidy_linter.py:13:import foo.bar.baz
+    split = matching_line.split(":")
+    filename = split[0]
+
+    original = None
+    replacement = None
+    if replace_pattern:
+        with open(filename, "r") as f:
+            original = f.read()
+
+        try:
+            proc = run_command(["sed", "-r", replace_pattern, filename])
+            replacement = proc.stdout.decode("utf-8")
+        except Exception as err:
+            return LintMessage(
+                path=None,
+                line=None,
+                char=None,
+                code=linter_name,
+                severity=LintSeverity.ERROR,
+                name="command-failed",
+                original=None,
+                replacement=None,
+                description=(
+                    f"Failed due to {err.__class__.__name__}:\n{err}"
+                    if not isinstance(err, subprocess.CalledProcessError)
+                    else (
+                        "COMMAND (exit code {returncode})\n"
+                        "{command}\n\n"
+                        "STDERR\n{stderr}\n\n"
+                        "STDOUT\n{stdout}"
+                    ).format(
+                        returncode=err.returncode,
+                        command=" ".join(as_posix(x) for x in err.cmd),
+                        stderr=err.stderr.decode("utf-8").strip() or "(empty)",
+                        stdout=err.stdout.decode("utf-8").strip() or "(empty)",
+                    )
+                ),
+                bypassChangedLineFiltering=None,
+            )
+
+    return LintMessage(
+        path=split[0],
+        line=int(split[1]),
+        char=None,
+        code=linter_name,
+        severity=LintSeverity.ERROR,
+        name=error_name,
+        original=original,
+        replacement=replacement,
+        description=error_description,
+        bypassChangedLineFiltering=None,
+    )
+
+
 def main() -> None:
     parser = argparse.ArgumentParser(
-        description="grep wrapper linter.",
-        fromfile_prefix_chars="@",
+        description="grep wrapper linter.", fromfile_prefix_chars="@",
     )
     parser.add_argument(
-        "--pattern",
-        required=True,
-        help="pattern to grep for",
+        "--pattern", required=True, help="pattern to grep for",
     )
     parser.add_argument(
-        "--linter_name",
-        required=True,
-        help="name of the linter",
+        "--linter-name", required=True, help="name of the linter",
     )
     parser.add_argument(
-        "--error_name",
+        "--error-name",
         required=True,
         help="human-readable description of what the error is",
     )
     parser.add_argument(
-        "--error_description",
+        "--error-description",
         required=True,
         help="message to display when the pattern is found",
     )
     parser.add_argument(
-        "--verbose",
-        action="store_true",
-        help="verbose logging",
+        "--replace-pattern",
+        help=(
+            "the form of a pattern passed to `sed -r`. "
+            "If specified, this will become proposed replacement text."
+        ),
     )
     parser.add_argument(
-        "filenames",
-        nargs="+",
-        help="paths to lint",
+        "--verbose", action="store_true", help="verbose logging",
+    )
+    parser.add_argument(
+        "filenames", nargs="+", help="paths to lint",
     )
     args = parser.parse_args()
 
@@ -105,7 +164,7 @@
 
     try:
         proc = run_command(["grep", "-nPH", args.pattern, *args.filenames])
-    except OSError as err:
+    except Exception as err:
         err_msg = LintMessage(
             path=None,
             line=None,
@@ -117,6 +176,18 @@
             replacement=None,
             description=(
                 f"Failed due to {err.__class__.__name__}:\n{err}"
+                if not isinstance(err, subprocess.CalledProcessError)
+                else (
+                    "COMMAND (exit code {returncode})\n"
+                    "{command}\n\n"
+                    "STDERR\n{stderr}\n\n"
+                    "STDOUT\n{stdout}"
+                ).format(
+                    returncode=err.returncode,
+                    command=" ".join(as_posix(x) for x in err.cmd),
+                    stderr=err.stderr.decode("utf-8").strip() or "(empty)",
+                    stdout=err.stdout.decode("utf-8").strip() or "(empty)",
+                )
             ),
             bypassChangedLineFiltering=None,
         )
@@ -125,21 +196,15 @@
 
     lines = proc.stdout.decode().splitlines()
     for line in lines:
-        # tools/linter/clangtidy_linter.py:13:import foo.bar.baz
-        split = line.split(":")
-        msg = LintMessage(
-            path=split[0],
-            line=int(split[1]),
-            char=None,
-            code=args.linter_name,
-            severity=LintSeverity.ERROR,
-            name=args.error_name,
-            original=None,
-            replacement=None,
-            description=args.error_description,
-            bypassChangedLineFiltering=None,
+        lint_message = lint_file(
+            line,
+            args.replace_pattern,
+            args.linter_name,
+            args.error_name,
+            args.error_description,
         )
-        print(json.dumps(msg._asdict()), flush=True)
+        print(json.dumps(lint_message._asdict()), flush=True)
+
 
 if __name__ == "__main__":
     main()