pw_presubmit: Add ignore-prefix to keep-sorted

Add an option to ignore prefixes in keep-sorted blocks, so, for example,
"'CMakeLists.txt'," is placed next to "'*.cmake'," (with
"ignore-prefix='*.,'").

Bug: b/250875082
Change-Id: I2179585708ce950750cdd4cfc475075e181fd2b9
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/115231
Pigweed-Auto-Submit: Rob Mohr <mohrr@google.com>
Reviewed-by: Wyatt Hepler <hepler@google.com>
Commit-Queue: Auto-Submit <auto-submit@pigweed.google.com.iam.gserviceaccount.com>
diff --git a/pw_presubmit/docs.rst b/pw_presubmit/docs.rst
index 817b230..abb9e4e 100644
--- a/pw_presubmit/docs.rst
+++ b/pw_presubmit/docs.rst
@@ -171,7 +171,17 @@
 are preserved, even with ``ignore-case``. To allow duplicates, add
 ``allow-dupes`` to the start line.
 
-These will suggest fixes using ``pw keep-sorted --fix``.
+Prefixes can be ignored by adding ``ignore-prefix=`` followed by a
+comma-separated list of prefixes. The list below will be kept in this order.
+Neither commas nor whitespace are supported in prefixes.
+
+  # keep-sorted: start ignore-prefix=',"
+  'bar',
+  "baz",
+  'foo',
+  # keep-sorted: end
+
+The presubmit check will suggest fixes using ``pw keep-sorted --fix``.
 
 Future versions may support multiline list items.
 
diff --git a/pw_presubmit/py/keep_sorted_test.py b/pw_presubmit/py/keep_sorted_test.py
index 616543b..84550ec 100644
--- a/pw_presubmit/py/keep_sorted_test.py
+++ b/pw_presubmit/py/keep_sorted_test.py
@@ -135,6 +135,24 @@
         self.assertEqual(self.contents,
                          f'{START} ignore-case\nA\na\nB\n{END}\n')
 
+    def test_ignored_prefixes(self) -> None:
+        self._run(f'{START} ignore-prefix=foo,bar\na\nb\nfoob\nbarc\n{END}\n')
+        self.ctx.fail.assert_not_called()
+
+    def test_ignored_longest_prefixes(self) -> None:
+        self._run(f'{START} ignore-prefix=1,123\na\n123b\nb\n1c\n{END}\n')
+        self.ctx.fail.assert_not_called()
+
+    def test_ignored_prefixes_whitespace(self) -> None:
+        self._run(f'{START} ignore-prefix=foo,bar\n'
+                  f' a\n b\n foob\n barc\n{END}\n')
+        self.ctx.fail.assert_not_called()
+
+    def test_ignored_prefixes_insensitive(self) -> None:
+        self._run(f'{START} ignore-prefix=foo,bar ignore-case\n'
+                  f'a\nB\nfooB\nbarc\n{END}\n')
+        self.ctx.fail.assert_not_called()
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/pw_presubmit/py/pw_presubmit/keep_sorted.py b/pw_presubmit/py/pw_presubmit/keep_sorted.py
index 47234f9..3b32d1b 100644
--- a/pw_presubmit/py/pw_presubmit/keep_sorted.py
+++ b/pw_presubmit/py/pw_presubmit/keep_sorted.py
@@ -21,7 +21,8 @@
 from pathlib import Path
 import re
 import sys
-from typing import Collection, List, Optional, Pattern, Sequence, Union
+from typing import (Callable, Collection, List, Optional, Pattern, Sequence,
+                    Tuple, Union)
 
 import pw_cli
 from . import cli, git_repo, presubmit, tools
@@ -34,6 +35,7 @@
 _END = re.compile(r'keep-sorted: (stop|end)', re.IGNORECASE)
 _IGNORE_CASE = re.compile(r'ignore-case', re.IGNORECASE)
 _ALLOW_DUPES = re.compile(r'allow-dupes', re.IGNORECASE)
+_IGNORE_PREFIX = re.compile(r'ignore-prefix=(\S+)', re.IGNORECASE)
 
 # Only include these literals here so keep_sorted doesn't try to reorder later
 # test lines.
@@ -83,17 +85,41 @@
         self.changed: bool = False
 
     def _process_block(self, start_line: str, lines: List[str], end_line: str,
-                       i: int, ignore_case: bool,
-                       allow_dupes: bool) -> Sequence[str]:
+                       i: int, ignore_case: bool, allow_dupes: bool,
+                       ignored_prefixes: Sequence[str]) -> Sequence[str]:
         lines_after_dupes: List[str] = []
         if allow_dupes:
             lines_after_dupes = lines
         else:
             lines_after_dupes = list({x: None for x in lines})
 
-        sort_key = lambda x: x
+        sort_key_funcs: List[Callable[[Tuple[str, ...]], Tuple[str, ...]]] = []
+
+        if ignored_prefixes:
+
+            def strip_ignored_prefixes(val):
+                """Remove one ignored prefix from val, if present."""
+                wo_white = val[0].lstrip()
+                white = val[0][0:-len(wo_white)]
+                for prefix in ignored_prefixes:
+                    if wo_white.startswith(prefix):
+                        return (f'{white}{wo_white[len(prefix):]}', val[1])
+                return (val[0], val[1])
+
+            sort_key_funcs.append(strip_ignored_prefixes)
+
         if ignore_case:
-            sort_key = lambda x: (x.lower(), x)
+            sort_key_funcs.append(lambda val: (val[0].lower(), val[1]))
+
+        def sort_key(val):
+            vals = (val, val)
+            for sort_key_func in sort_key_funcs:
+                vals = sort_key_func(vals)
+            return vals
+
+        for val in lines_after_dupes:
+            _LOG.debug('For sorting: %r => %r', val, sort_key(val))
+
         sorted_lines = sorted(lines_after_dupes, key=sort_key)
 
         if lines != sorted_lines:
@@ -118,6 +144,7 @@
         in_block: bool = False
         ignore_case: bool = False
         allow_dupes: bool = False
+        ignored_prefixes: Sequence[str] = []
         start_line: Optional[str] = None
         end_line: Optional[str] = None
         lines: List[str] = []
@@ -135,8 +162,13 @@
                     in_block = False
                     assert start_line  # Implicitly cast from Optional.
                     self.all_lines.extend(
-                        self._process_block(start_line, lines, end_line, i,
-                                            ignore_case, allow_dupes))
+                        self._process_block(start_line=start_line,
+                                            lines=lines,
+                                            end_line=end_line,
+                                            i=i,
+                                            ignore_case=ignore_case,
+                                            allow_dupes=allow_dupes,
+                                            ignored_prefixes=ignored_prefixes))
                     start_line = end_line = None
                     self.all_lines.append(line)
                     lines = []
@@ -147,10 +179,23 @@
 
             elif start_match := _START.search(line):
                 _LOG.debug('Found start line %d %r', i, line)
+
                 ignore_case = bool(_IGNORE_CASE.search(line))
                 _LOG.debug('ignore_case: %s', ignore_case)
+
                 allow_dupes = bool(_ALLOW_DUPES.search(line))
                 _LOG.debug('allow_dupes: %s', allow_dupes)
+
+                ignored_prefixes = []
+                match = _IGNORE_PREFIX.search(line)
+                if match:
+                    ignored_prefixes = match.group(1).split(',')
+
+                    # We want to check the longest prefixes first, in case one
+                    # prefix is a prefix of another prefix.
+                    ignored_prefixes.sort(key=lambda x: (-len(x), x))
+                _LOG.debug('ignored_prefixes: %r', ignored_prefixes)
+
                 start_line = line
                 in_block = True
                 self.all_lines.append(line)
@@ -158,6 +203,7 @@
                 remaining = line[start_match.end():].strip()
                 remaining = _IGNORE_CASE.sub('', remaining, count=1).strip()
                 remaining = _ALLOW_DUPES.sub('', remaining, count=1).strip()
+                remaining = _IGNORE_PREFIX.sub('', remaining, count=1).strip()
                 if remaining.strip():
                     raise KeepSortedParsingError(
                         f'unrecognized directive on keep-sorted line: '
diff --git a/pw_watch/py/pw_watch/watch.py b/pw_watch/py/pw_watch/watch.py
index b8e8db3..07c3047 100755
--- a/pw_watch/py/pw_watch/watch.py
+++ b/pw_watch/py/pw_watch/watch.py
@@ -547,12 +547,13 @@
 
 _WATCH_PATTERN_DELIMITER = ','
 _WATCH_PATTERNS = (
-    # keep-sorted: start ignore-case
+    # keep-sorted: start ignore-case ignore-prefix=','*.
     '*.bloaty',
     '*.c',
     '*.cc',
     '*.cfg',
     '*.cmake',
+    'CMakeLists.txt',
     '*.cpp',
     '*.css',
     '*.dts',
@@ -572,7 +573,6 @@
     '*.S',
     '*.s',
     '*.toml',
-    'CMakeLists.txt',
     # keep-sorted: end
 )