Refactor local lint (#58798) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58798 In #58623 there was a bug in `make quicklint` where ShellCheck would run on the entire repo when there were no files. This PR fixes that by refactoring out common stuff (like skipping quicklint when there are no files, let checks do their own file filtering) and pushes the logic into a runner class. Test Plan: Imported from OSS Reviewed By: samestep Differential Revision: D28649889 Pulled By: driazati fbshipit-source-id: b19f32cdb63396c806cb689b2f6daf97e1724d44

commit: a679bb5ecf14702b0ba75edf799e7fd2e893250c [log] [tgz]
author: driazati <driazati@users.noreply.github.com> Mon May 24 13:49:47 2021 -0700
committer: Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com> Mon May 24 13:52:53 2021 -0700
tree: 4248ce8f5e4f94e26ecbf3d6c49b0e8c4fa8cb07
parent: a7f4f80903b331ecb9485ccf534df1dfc5748cff [diff]
diff --git a/Makefile b/Makefile
index f328d07..8d61fd0 100644
--- a/Makefile
+++ b/Makefile

@@ -46,7 +46,6 @@
 		--job 'shellcheck' \
 		--step 'Extract scripts from GitHub Actions workflows'
 	@$(PYTHON) tools/actions_local_runner.py \
-		--file-filter '.sh' \
 		$(CHANGED_ONLY) \
 		--job 'shellcheck'
 
@@ -90,13 +89,11 @@
 
 flake8:
 	@$(PYTHON) tools/actions_local_runner.py \
-		--file-filter '.py' \
 		$(CHANGED_ONLY) \
 		--job 'flake8-py3'
 
 mypy:
 	@$(PYTHON) tools/actions_local_runner.py \
-		--file-filter '.py' \
 		$(CHANGED_ONLY) \
 		--job 'mypy'
 

diff --git a/tools/actions_local_runner.py b/tools/actions_local_runner.py
index 4082abd..369f713 100755
--- a/tools/actions_local_runner.py
+++ b/tools/actions_local_runner.py

@@ -13,7 +13,7 @@
 import shlex
 import configparser
 
-from typing import List, Dict, Any, Optional, Tuple, Union
+from typing import List, Dict, Any, Optional, Union, NamedTuple, Set
 
 REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
@@ -87,20 +87,28 @@
 
 
 def print_results(job_name: str, passed: bool, streams: List[str]) -> None:
-    header(job_name, passed)
+    icon = color(col.GREEN, "✓") if passed else color(col.RED, "x")
+    print(f"{icon} {color(col.BLUE, job_name)}")
+
     for stream in streams:
         stream = stream.strip()
         if stream != "":
             print(stream)
 
 
+class CommandResult(NamedTuple):
+    passed: bool
+    stdout: str
+    stderr: str
+
+
 async def shell_cmd(
     cmd: Union[str, List[str]],
     env: Optional[Dict[str, Any]] = None,
     redirect: bool = True,
-) -> Tuple[bool, str, str]:
+) -> CommandResult:
     if isinstance(cmd, list):
-        cmd_str = ' '.join(shlex.quote(arg) for arg in cmd)
+        cmd_str = " ".join(shlex.quote(arg) for arg in cmd)
     else:
         cmd_str = cmd
 
@@ -117,176 +125,191 @@
 
     passed = proc.returncode == 0
     if not redirect:
-        return passed, "", ""
+        return CommandResult(passed, "", "")
 
-    return passed, stdout.decode().strip(), stderr.decode().strip()
+    return CommandResult(passed, stdout.decode().strip(), stderr.decode().strip())
 
 
-def header(name: str, passed: bool) -> None:
-    PASS = color(col.GREEN, "✓")
-    FAIL = color(col.RED, "x")
-    icon = PASS if passed else FAIL
-    print(f"{icon} {color(col.BLUE, name)}")
+class Check:
+    name: str
 
+    def __init__(self, files: Optional[List[str]], quiet: bool):
+        self.quiet = quiet
+        self.files = files
 
-def get_flake_excludes() -> List[str]:
-    config = configparser.ConfigParser()
-    config.read(os.path.join(REPO_ROOT, ".flake8"))
-
-    excludes = re.split(r',\s*', config["flake8"]["exclude"].strip())
-    excludes = [e.strip() for e in excludes if e.strip() != ""]
-    return excludes
-
-
-async def run_flake8(files: Optional[List[str]], quiet: bool) -> bool:
-    cmd = ["flake8"]
-
-    excludes = get_flake_excludes()
-
-    def should_include(name: str) -> bool:
-        for exclude in excludes:
-            if fnmatch.fnmatch(name, pat=exclude):
-                return False
-            if name.startswith(exclude) or ("./" + name).startswith(exclude):
-                return False
-        return True
-
-    if files is not None:
-        files = [f for f in files if should_include(f)]
-
-        if len(files) == 0:
-            print_results("flake8", True, [])
+    async def run(self) -> bool:
+        result = await self.run_helper()
+        if result is None:
             return True
 
-        # Running quicklint, pass in an explicit list of files (unlike mypy,
-        # flake8 will still use .flake8 to filter this list by the 'exclude's
-        # in the config
-        cmd += files
+        streams = []
+        if not result.passed:
+            streams = [
+                result.stderr,
+                result.stdout,
+            ]
+        print_results(self.name, result.passed, streams)
+        return result.passed
 
-    passed, stdout, stderr = await shell_cmd(cmd)
-    print_results("flake8", passed, [stdout, stderr])
-    return passed
+    async def run_helper(self) -> Optional[CommandResult]:
+        if self.files is not None:
+            relevant_files = self.filter_files(self.files)
+            if len(relevant_files) == 0:
+                # No files, do nothing
+                return CommandResult(passed=True, stdout="", stderr="")
+
+            return await self.quick(relevant_files)
+
+        return await self.full()
+
+    def filter_ext(self, files: List[str], extensions: Set[str]) -> List[str]:
+        def passes(filename: str) -> bool:
+            return os.path.splitext(filename)[1] in extensions
+
+        return [f for f in files if passes(f)]
+
+    def filter_files(self, files: List[str]) -> List[str]:
+        return files
+
+    async def quick(self, files: List[str]) -> CommandResult:
+        raise NotImplementedError
+
+    async def full(self) -> Optional[CommandResult]:
+        raise NotImplementedError
 
 
-async def run_mypy(files: Optional[List[str]], quiet: bool) -> bool:
-    env = os.environ.copy()
-    if should_color():
-        # Secret env variable: https://github.com/python/mypy/issues/7771
-        env["MYPY_FORCE_COLOR"] = "1"
+class Flake8(Check):
+    name = "flake8"
 
-    if files is not None:
-        # Running quick lint, use mypy-wrapper instead so it checks that the files
-        # actually should be linted
+    def filter_files(self, files: List[str]) -> List[str]:
+        config = configparser.ConfigParser()
+        config.read(os.path.join(REPO_ROOT, ".flake8"))
 
-        passed, stdout, stderr = await shell_cmd(
-            [sys.executable, "tools/mypy_wrapper.py"] + [
-                os.path.join(REPO_ROOT, f) for f in files
+        excludes = re.split(r",\s*", config["flake8"]["exclude"].strip())
+        excludes = [e.strip() for e in excludes if e.strip() != ""]
+
+        def should_include(name: str) -> bool:
+            for exclude in excludes:
+                if fnmatch.fnmatch(name, pat=exclude):
+                    return False
+                if name.startswith(exclude) or f"./{name}".startswith(exclude):
+                    return False
+            return True
+
+        files = self.filter_ext(files, {".py"})
+        return [f for f in files if should_include(f)]
+
+    async def quick(self, files: List[str]) -> CommandResult:
+        return await shell_cmd(["flake8"] + files)
+
+    async def full(self) -> CommandResult:
+        return await shell_cmd(["flake8"])
+
+
+class Mypy(Check):
+    name = "mypy (skipped typestub generation)"
+
+    def filter_files(self, files: List[str]) -> List[str]:
+        return self.filter_ext(files, {".py", ".pyi"})
+
+    def env(self) -> Dict[str, Any]:
+        env = os.environ.copy()
+        if should_color():
+            # Secret env variable: https://github.com/python/mypy/issues/7771
+            env["MYPY_FORCE_COLOR"] = "1"
+        return env
+
+    async def quick(self, files: List[str]) -> CommandResult:
+        return await shell_cmd(
+            [sys.executable, "tools/mypy_wrapper.py"]
+            + [os.path.join(REPO_ROOT, f) for f in files],
+            env=self.env(),
+        )
+
+    async def full(self) -> None:
+        env = self.env()
+        # hackily change the name
+        self.name = "mypy"
+
+        await shell_cmd(
+            [
+                sys.executable,
+                "tools/actions_local_runner.py",
+                "--job",
+                "mypy",
+                "--file",
+                ".github/workflows/lint.yml",
+                "--step",
+                "Run autogen",
             ],
+            redirect=False,
             env=env,
         )
 
-        print_results("mypy (skipped typestub generation)", passed, [
-            stdout + "\n",
-            stderr + "\n",
-        ])
-        return passed
-
-    # Not running quicklint, so use lint.yml
-    _, _, _ = await shell_cmd(
-        [
-            sys.executable,
-            "tools/actions_local_runner.py",
-            "--job",
-            "mypy",
-            "--file",
-            ".github/workflows/lint.yml",
-            "--step",
-            "Run autogen",
-        ],
-        redirect=False,
-        env=env,
-    )
-    passed, _, _ = await shell_cmd(
-        [
-            sys.executable,
-            "tools/actions_local_runner.py",
-            "--job",
-            "mypy",
-            "--file",
-            ".github/workflows/lint.yml",
-            "--step",
-            "Run mypy",
-        ],
-        redirect=False,
-        env=env,
-    )
-    return passed
-
-
-
-async def run_shellcheck(files: Optional[List[str]], quiet: bool) -> bool:
-    if files is not None:
-        # The files list should already be filtered by '--file-filter ".sh"' when
-        # calling this script
-        passed, stdout, stderr = await shell_cmd(
-            ["tools/run_shellcheck.sh"] + [
-                os.path.join(REPO_ROOT, f) for f in files
+        await shell_cmd(
+            [
+                sys.executable,
+                "tools/actions_local_runner.py",
+                "--job",
+                "mypy",
+                "--file",
+                ".github/workflows/lint.yml",
+                "--step",
+                "Run mypy",
             ],
+            redirect=False,
+            env=env,
         )
-        print_results("shellcheck: Run ShellCheck", passed, [
-            stdout + "\n",
-            stderr + "\n",
-        ])
-        return passed
-
-    # Not running quicklint, so use lint.yml
-    passed, _, _ = await shell_cmd(
-        [
-            sys.executable,
-            "tools/actions_local_runner.py",
-            "--job",
-            "shellcheck",
-            "--file",
-            ".github/workflows/lint.yml",
-            "--step",
-            "Run ShellCheck",
-        ],
-        redirect=False,
-    )
-    return passed
 
 
-async def run_step(
-    step: Dict[str, Any], job_name: str, files: Optional[List[str]], quiet: bool
-) -> bool:
-    env = os.environ.copy()
-    env["GITHUB_WORKSPACE"] = "/tmp"
-    script = step["run"]
+class ShellCheck(Check):
+    name = "shellcheck: Run ShellCheck"
 
-    if quiet:
-        # TODO: Either lint that GHA scripts only use 'set -eux' or make this more
-        # resilient
-        script = script.replace("set -eux", "set -eu")
-        script = re.sub(r"^time ", "", script, flags=re.MULTILINE)
-    name = f'{job_name}: {step["name"]}'
+    def filter_files(self, files: List[str]) -> List[str]:
+        return self.filter_ext(files, {".sh"})
 
-    passed, stderr, stdout = await shell_cmd(script, env=env)
-    if not passed:
-        print_results(name, passed, [stdout, stderr])
-    else:
-        print_results(name, passed, [])
+    async def quick(self, files: List[str]) -> CommandResult:
+        return await shell_cmd(
+            ["tools/run_shellcheck.sh"] + [os.path.join(REPO_ROOT, f) for f in files],
+        )
 
-    return passed
+    async def full(self) -> None:
+        await shell_cmd(
+            [
+                sys.executable,
+                "tools/actions_local_runner.py",
+                "--job",
+                "shellcheck",
+                "--file",
+                ".github/workflows/lint.yml",
+                "--step",
+                "Run ShellCheck",
+            ],
+            redirect=False,
+        )
 
 
-async def run_steps(
-    steps: List[Dict[str, Any]], job_name: str, files: Optional[List[str]], quiet: bool
-) -> bool:
-    coros = [run_step(step, job_name, files, quiet) for step in steps]
-    return all(await asyncio.gather(*coros))
+class YamlStep(Check):
+    def __init__(self, step: Dict[str, Any], job_name: str, quiet: bool):
+        super().__init__(files=None, quiet=quiet)
+        self.step = step
+        self.name = f'{job_name}: {self.step["name"]}'
+
+    async def full(self) -> CommandResult:
+        env = os.environ.copy()
+        env["GITHUB_WORKSPACE"] = "/tmp"
+        script = self.step["run"]
+
+        if self.quiet:
+            # TODO: Either lint that GHA scripts only use 'set -eux' or make this more
+            # resilient
+            script = script.replace("set -eux", "set -eu")
+            script = re.sub(r"^time ", "", script, flags=re.MULTILINE)
+
+        return await shell_cmd(script, env=env)
 
 
-def relevant_changed_files(file_filters: Optional[List[str]]) -> Optional[List[str]]:
+def changed_files() -> Optional[List[str]]:
     changed_files: Optional[List[str]] = None
     try:
         changed_files = sorted(find_changed_files())
@@ -298,16 +321,7 @@
         )
         return None
 
-    if file_filters is None:
-        return changed_files
-    else:
-        relevant_files = []
-        for f in changed_files:
-            for file_filter in file_filters:
-                if f.endswith(file_filter):
-                    relevant_files.append(f)
-                    break
-        return relevant_files
+    return changed_files
 
 
 def grab_specific_steps(
@@ -332,11 +346,6 @@
     )
     parser.add_argument("--file", help="YAML file with actions")
     parser.add_argument(
-        "--file-filter",
-        help="only pass through files with this extension",
-        nargs="*",
-    )
-    parser.add_argument(
         "--changed-only",
         help="only run on changed files",
         action="store_true",
@@ -349,12 +358,8 @@
     parser.add_argument("--step", action="append", help="steps to run (in order)")
     args = parser.parse_args()
 
-    relevant_files = None
     quiet = not args.no_quiet
 
-    if args.changed_only:
-        relevant_files = relevant_changed_files(args.file_filter)
-
     if args.file is None:
         # If there is no .yml file provided, fall back to the list of known
         # jobs. We use this for flake8 and mypy since they run different
@@ -363,7 +368,12 @@
             raise RuntimeError(
                 f"Job {args.job} not found and no .yml file was provided"
             )
-        future = ad_hoc_steps[args.job](relevant_files, quiet)
+
+        files = None
+        if args.changed_only:
+            files = changed_files()
+
+        checks = [ad_hoc_steps[args.job](files, quiet)]
     else:
         if args.step is None:
             raise RuntimeError("1+ --steps must be provided")
@@ -380,18 +390,21 @@
 
         # Pull the relevant sections out of the provided .yml file and run them
         relevant_steps = grab_specific_steps(args.step, job)
-        future = run_steps(relevant_steps, args.job, relevant_files, quiet)
+        checks = [
+            YamlStep(step=step, job_name=args.job, quiet=quiet)
+            for step in relevant_steps
+        ]
 
     loop = asyncio.get_event_loop()
-    loop.run_until_complete(future)
+    loop.run_until_complete(asyncio.gather(*[check.run() for check in checks]))
 
 
 # These are run differently locally in order to enable quicklint, so dispatch
 # out to special handlers instead of using lint.yml
 ad_hoc_steps = {
-    "mypy": run_mypy,
-    "flake8-py3": run_flake8,
-    "shellcheck": run_shellcheck,
+    "mypy": Mypy,
+    "flake8-py3": Flake8,
+    "shellcheck": ShellCheck,
 }
 
 if __name__ == "__main__":

diff --git a/tools/test/test_actions_local_runner.py b/tools/test/test_actions_local_runner.py
index 2b2c26d..ba4e6fd 100644
--- a/tools/test/test_actions_local_runner.py
+++ b/tools/test/test_actions_local_runner.py

@@ -24,38 +24,23 @@
         def test_step_extraction(self) -> None:
             fake_job = {
                 "steps": [
-                    {
-                        "name": "test1",
-                        "run": "echo hi"
-                    },
-                    {
-                        "name": "test2",
-                        "run": "echo hi"
-                    },
-                    {
-                        "name": "test3",
-                        "run": "echo hi"
-                    },
+                    {"name": "test1", "run": "echo hi"},
+                    {"name": "test2", "run": "echo hi"},
+                    {"name": "test3", "run": "echo hi"},
                 ]
             }
 
             actual = actions_local_runner.grab_specific_steps(["test2"], fake_job)
             expected = [
-                {
-                    "name": "test2",
-                    "run": "echo hi"
-                },
+                {"name": "test2", "run": "echo hi"},
             ]
             self.assertEqual(actual, expected)
 
         async def test_runner(self) -> None:
-            fake_step = {
-                "name": "say hello",
-                "run": "echo hi"
-            }
+            fake_step = {"name": "say hello", "run": "echo hi"}
             f = io.StringIO()
             with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_steps([fake_step], "test", None, True)
+                await actions_local_runner.YamlStep(fake_step, "test", True).run()
 
             result = f.getvalue()
             self.assertIn("say hello", result)
@@ -80,7 +65,9 @@
 
         def test_lint(self):
             cmd = ["make", "lint", "-j", str(multiprocessing.cpu_count())]
-            proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE)
+            proc = subprocess.run(
+                cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
+            )
             stdout = proc.stdout.decode()
 
             for line in self.expected:
@@ -90,7 +77,9 @@
 
         def test_quicklint(self):
             cmd = ["make", "quicklint", "-j", str(multiprocessing.cpu_count())]
-            proc = subprocess.run(cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE)
+            proc = subprocess.run(
+                cmd, cwd=actions_local_runner.REPO_ROOT, stdout=subprocess.PIPE
+            )
             stdout = proc.stdout.decode()
 
             for line in self.expected:
@@ -99,7 +88,6 @@
             # TODO: See https://github.com/pytorch/pytorch/issues/57967
             self.assertIn("mypy (skipped typestub generation)", stdout)
 
-
     class TestQuicklint(unittest.IsolatedAsyncioTestCase):
         test_files = [
             os.path.join("caffe2", "some_cool_file.py"),
@@ -108,17 +96,21 @@
             os.path.join("torch", "some_stubs.pyi"),
             os.path.join("test.sh"),
         ]
-        test_py_files = [f for f in test_files if f.endswith(".py") or f.endswith(".pyi")]
+        test_py_files = [
+            f for f in test_files if f.endswith(".py") or f.endswith(".pyi")
+        ]
         test_sh_files = [f for f in test_files if f.endswith(".sh")]
         maxDiff = None
 
         def setUp(self, *args, **kwargs):
             for name in self.test_files:
-                bad_code = textwrap.dedent("""
+                bad_code = textwrap.dedent(
+                    """
                     some_variable = '2'
                     some_variable = None
                     some_variable = 11.2
-                """).rstrip("\n")
+                """
+                ).rstrip("\n")
 
                 with open(name, "w") as f:
                     f.write(bad_code)
@@ -135,20 +127,22 @@
         async def test_flake8(self):
             f = io.StringIO()
             with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_flake8(self.test_py_files, True)
+                await actions_local_runner.Flake8(self.test_py_files, True).run()
 
             # Should exclude the caffe2/ file
-            expected = textwrap.dedent("""
+            expected = textwrap.dedent(
+                """
                 x flake8
                 torch/some_cool_file.py:4:21: W292 no newline at end of file
                 aten/some_cool_file.py:4:21: W292 no newline at end of file
-            """).lstrip("\n")
+            """
+            ).lstrip("\n")
             self.assertEqual(expected, f.getvalue())
 
         async def test_shellcheck(self):
             f = io.StringIO()
             with contextlib.redirect_stdout(f):
-                await actions_local_runner.run_shellcheck(self.test_sh_files, True)
+                await actions_local_runner.ShellCheck(self.test_sh_files, True).run()
 
             self.assertIn("SC2148: Tips depend on target shell", f.getvalue())
             self.assertIn("SC2283: Remove spaces around = to assign", f.getvalue())
@@ -173,12 +167,12 @@
                     redirect=True,
                 )
 
-                await actions_local_runner.run_mypy(self.test_py_files, True)
-
+                await actions_local_runner.Mypy(self.test_py_files, True).run()
 
             # Should exclude the aten/ file; also, apparently mypy
             # typechecks files in reverse order
-            expected = textwrap.dedent("""
+            expected = textwrap.dedent(
+                """
                 x mypy (skipped typestub generation)
                 torch/some_stubs.pyi:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                 torch/some_stubs.pyi:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
@@ -186,9 +180,12 @@
                 torch/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
                 caffe2/some_cool_file.py:3:17: error: Incompatible types in assignment (expression has type "None", variable has type "str")  [assignment]
                 caffe2/some_cool_file.py:4:17: error: Incompatible types in assignment (expression has type "float", variable has type "str")  [assignment]
-            """).lstrip("\n")  # noqa: B950
+            """  # noqa: B950
+            ).lstrip(
+                "\n"
+            )
             self.assertEqual(expected, f.getvalue())
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     unittest.main()
commit	a679bb5ecf14702b0ba75edf799e7fd2e893250c	[log] [tgz]
author	driazati <driazati@users.noreply.github.com>	Mon May 24 13:49:47 2021 -0700
committer	Facebook GitHub Bot <facebook-github-bot@users.noreply.github.com>	Mon May 24 13:52:53 2021 -0700
tree	4248ce8f5e4f94e26ecbf3d6c49b0e8c4fa8cb07
parent	a7f4f80903b331ecb9485ccf534df1dfc5748cff [diff]