python aprotoc instead of jq and textproto

  use bazel run and import protoc-generated python classes
  instead of relying on textproto, a Google tool for parsing
  protobuf files

Bug: TODO
Test: run ./incremental_build.sh -c 'no change' -b soong
Change-Id: Ie25e94d18f2b25e4cf49ac45cf2546a6b6cca596
diff --git a/scripts/incremental_build/BUILD.bazel b/scripts/incremental_build/BUILD.bazel
index 46f0f95..fec95c0 100644
--- a/scripts/incremental_build/BUILD.bazel
+++ b/scripts/incremental_build/BUILD.bazel
@@ -17,13 +17,12 @@
     srcs = [
         "cuj_catalog.py",
         "incremental_build.py",
-        "perf_metrics.py",
         "pretty.py",
         "ui.py",
     ],
     main = "incremental_build.py",
     python_version = "PY3",
-    deps = [":util"],
+    deps = [":perf_metrics"],
 )
 
 py_library(
@@ -37,11 +36,13 @@
     deps = [":util"],
 )
 
-py_binary(
+py_library(
     name = "perf_metrics",
     srcs = ["perf_metrics.py"],
-    python_version = "PY3",
-    deps = [":util"],
+    deps = [
+        ":util",
+        "//build/soong/ui/metrics:metrics-py-proto",
+    ],
 )
 
 py_test(
diff --git a/scripts/incremental_build/README.md b/scripts/incremental_build/README.md
index 9027059..ebca0bc 100644
--- a/scripts/incremental_build/README.md
+++ b/scripts/incremental_build/README.md
@@ -1,39 +1,16 @@
 # How to Use
 
-For automated use (e.g. in CI), use `main.py`. See its help
-with `main.py --help`. Note that metrics collection relies on `printproto`
-and `jq` tools being on $PATH.
-
-The most basic invocation, e.g. `./incremental_build.py libc`, is logically
+The most basic invocation, e.g. `incremental_build.sh -- libc`, is logically
 equivalent to
 
 1. running `m --skip-soong-tests libc` and then
 2. parsing `$OUTDIR/soong_metrics` and `$OUTDIR/bp2build_metrics.pb` files
 3. Adding timing-related metrics from those files
    into `out/timing_logs/metrics.csv`
+4. repeat 1-3 for each CUJ
 
-There are a number of CUJs set up in `cuj_catalog.py` and they are run
-sequentially, such that each row in `metrics.csv` are the timings of various "
-events" during an incremental build.
+CUJs are defined in `cuj_catalog.py`
+Each row in `metrics.csv` has the timings of various "phases" of a build.
 
-You may also add rows to `metrics.csv` after a manual run,
-using `perf_metrics.py`
-script. This is particularly useful when you don't want to
-modify `cuj_catalog.py`
-for one-off tests.
-
-Currently:
-
-1. run a build (conceptually, m droid)
-2. printproto to parse metrics related pb files
-3. use jq to filter data
-4. collate data into a csv file
-5. goto 1 until various CUJs are exhausted
-
-For CI, we should:
-
-1. run a build with some identifiable tag (not sure what mechanisms are
-   available)
-2. goto 1 until various CUJs are exhausted
-3. rely on plx to collate data from all builds and provide a filtering mechanism
-   based on that tag from step 1
+Try `incremental_build.sh --help` and `canoncial_perf.sh --help` for help on
+usage.
\ No newline at end of file
diff --git a/scripts/incremental_build/incremental_build.py b/scripts/incremental_build/incremental_build.py
old mode 100755
new mode 100644
diff --git a/scripts/incremental_build/incremental_build.sh b/scripts/incremental_build/incremental_build.sh
new file mode 100755
index 0000000..305348f
--- /dev/null
+++ b/scripts/incremental_build/incremental_build.sh
@@ -0,0 +1,12 @@
+#!/bin/bash -eux
+readonly TOP="$(realpath "$(dirname "$0")/../../../..")"
+"$TOP/build/soong/soong_ui.bash" \
+  --build-mode \
+  --all-modules \
+  --dir="$(pwd)" \
+  --skip-soong-tests \
+  bp2build
+
+ANDROID_BUILD_TOP=$TOP "$TOP/build/bazel/bin/bazel" run --config=bp2build --verbose_failures //build/bazel/scripts/incremental_build -- "$@"
+
+# Alternatively, we could use python_zip_file, https://github.com/bazelbuild/bazel/pull/9453
diff --git a/scripts/incremental_build/perf_metrics.py b/scripts/incremental_build/perf_metrics.py
old mode 100755
new mode 100644
index 024ac11..da5ff59
--- a/scripts/incremental_build/perf_metrics.py
+++ b/scripts/incremental_build/perf_metrics.py
@@ -13,7 +13,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import argparse
 import dataclasses
 import datetime
 import glob
@@ -24,10 +23,14 @@
 import subprocess
 import textwrap
 from pathlib import Path
-from typing import Optional
+from typing import Iterable
+
+from bp2build_metrics_proto.bp2build_metrics_pb2 import Bp2BuildMetrics
+from metrics_proto.metrics_pb2 import MetricsBase
+from metrics_proto.metrics_pb2 import PerfInfo
+from metrics_proto.metrics_pb2 import SoongBuildMetrics
 
 import util
-import pretty
 
 
 @dataclasses.dataclass
@@ -54,16 +57,6 @@
 SOONG_BUILD_PB = 'soong_build_metrics.pb'
 BP2BUILD_PB = 'bp2build_metrics.pb'
 
-SOONG_PROTO = 'build/soong/ui/metrics/' \
-              'metrics_proto/metrics.proto'
-SOONG_BUILD_PROTO = SOONG_PROTO
-BP2BUILD_PROTO = 'build/soong/ui/metrics/' \
-                 'bp2build_metrics_proto/bp2build_metrics.proto'
-
-SOONG_MSG = 'soong_build_metrics.MetricsBase'
-SOONG_BUILD_MSG = 'soong_build_metrics.SoongBuildMetrics'
-BP2BUILD_MSG = 'soong_build_bp2build_metrics.Bp2BuildMetrics'
-
 
 def _move_pbs_to(d: Path):
   soong_pb = util.get_out_dir().joinpath(SOONG_PB)
@@ -90,22 +83,46 @@
   Soong_build event names may contain "mixed_build" event. To normalize the
   event names between mixed builds and soong-only build, convert
     `soong_build/soong_build.xyz` and `soong_build/soong_build.mixed_build.xyz`
-  both to simply `soong_build/_.xyz`
+  both to simply `soong_build/*.xyz`
   """
   soong_pb = d.joinpath(SOONG_PB)
   soong_build_pb = d.joinpath(SOONG_BUILD_PB)
   bp2build_pb = d.joinpath(BP2BUILD_PB)
-  soong_proto = util.get_top_dir().joinpath(SOONG_PROTO)
-  soong_build_proto = soong_proto
-  bp2build_proto = util.get_top_dir().joinpath(BP2BUILD_PROTO)
 
   events: list[PerfInfoOrEvent] = []
+
+  def extract_perf_info(root_obj):
+    for field_name in dir(root_obj):
+      if field_name.startswith('__'):
+        continue
+      field_value = getattr(root_obj, field_name)
+      if isinstance(field_value, Iterable):
+        for item in field_value:
+          if not isinstance(item, PerfInfo):
+            break
+          events.append(
+            PerfInfoOrEvent(item.name, item.real_time, item.start_time,
+                            item.description))
+
   if soong_pb.exists():
-    events.extend(_read_pb(soong_pb, soong_proto, SOONG_MSG))
+    metrics_base = MetricsBase()
+    with open(soong_pb, "rb") as f:
+      metrics_base.ParseFromString(f.read())
+    extract_perf_info(metrics_base)
+
   if soong_build_pb.exists():
-    events.extend(_read_pb(soong_build_pb, soong_build_proto, SOONG_BUILD_MSG))
+    soong_build_metrics = SoongBuildMetrics()
+    with open(soong_build_pb, "rb") as f:
+      soong_build_metrics.ParseFromString(f.read())
+    extract_perf_info(soong_build_metrics)
+
   if bp2build_pb.exists():
-    events.extend(_read_pb(bp2build_pb, bp2build_proto, BP2BUILD_MSG))
+    bp2build_metrics = Bp2BuildMetrics()
+    with open(bp2build_pb, "rb") as f:
+      bp2build_metrics.ParseFromString(f.read())
+    for event in bp2build_metrics.events:
+      events.append(
+        PerfInfoOrEvent(event.name, event.real_time, event.start_time, ''))
 
   events.sort(key=lambda e: e.start_time)
 
@@ -116,38 +133,6 @@
           in events}
 
 
-def _read_pb(
-    pb_file: Path,
-    proto_file: Path,
-    proto_message: str
-) -> list[PerfInfoOrEvent]:
-  """
-  Loads PerfInfo or Event from the file sorted chronologically
-  Note we are not using protoc-generated classes for simplicity (e.g. dependency
-  on `google.protobuf`)
-  Note dict keeps insertion order in python 3.7+
-  """
-  cmd = (f'''printproto --proto2  --raw_protocol_buffer \
-  --message={proto_message} \
-  --proto="{proto_file}" \
-  --multiline \
-  --json --json_accuracy_loss_reaction=ignore \
-  "{pb_file}" \
-  | jq ".. | objects | select(.real_time) | select(.name)" \
-  | jq -s ". | sort_by(.start_time)"''')
-  result = subprocess.check_output(cmd, shell=True, cwd=util.get_top_dir(),
-                                   text=True)
-
-  fields: set[str] = {f.name for f in dataclasses.fields(PerfInfoOrEvent)}
-
-  def parse(d: dict) -> Optional[PerfInfoOrEvent]:
-    filtered = {k: v for (k, v) in d.items() if k in fields}
-    return PerfInfoOrEvent(**filtered)
-
-  events: list[PerfInfoOrEvent] = [parse(d) for d in json.loads(result)]
-  return events
-
-
 Row = dict[str, any]
 
 
@@ -261,37 +246,3 @@
   2 To view column headers:
     %s
     '''), output, cmd_str, util.get_csv_columns_cmd(log_dir))
-
-
-def main():
-  p = argparse.ArgumentParser(
-      formatter_class=argparse.RawTextHelpFormatter,
-      description='read archived perf metrics from [LOG_DIR] and '
-                  f'summarize them into {util.METRICS_TABLE}')
-  default_log_dir = util.get_default_log_dir()
-  p.add_argument('-l', '--log-dir', type=Path, default=default_log_dir,
-                 help=textwrap.dedent('''
-                 Directory for timing logs. Defaults to %(default)s
-                 TIPS: Specify a directory outside of the source tree
-                 ''').strip())
-  p.add_argument('-m', '--add-manual-build',
-                 help='If you want to add the metrics from the last manual '
-                      f'build to {util.METRICS_TABLE}, provide a description')
-  options = p.parse_args()
-
-  if options.add_manual_build:
-    build_info = {'build_type': 'MANUAL',
-                  'description': options.add_manual_build}
-    run_dir = next(util.next_path(options.log_dir.joinpath('run')))
-    run_dir.mkdir(parents=True, exist_ok=False)
-    archive_run(run_dir, build_info)
-
-  tabulate_metrics_csv(options.log_dir)
-  display_tabulated_metrics(options.log_dir)
-  pretty.summarize_metrics(options.log_dir)
-  pretty.display_summarized_metrics(options.log_dir)
-
-
-if __name__ == '__main__':
-  logging.root.setLevel(logging.INFO)
-  main()
diff --git a/scripts/incremental_build/perf_metrics_test.py b/scripts/incremental_build/perf_metrics_test.py
old mode 100755
new mode 100644
diff --git a/scripts/incremental_build/ui.py b/scripts/incremental_build/ui.py
index e85adc0..316e9c9 100644
--- a/scripts/incremental_build/ui.py
+++ b/scripts/incremental_build/ui.py
@@ -18,14 +18,13 @@
 import logging
 import os
 import re
+import sys
 import textwrap
 from datetime import date
 from enum import Enum
 from pathlib import Path
 from typing import Optional
 
-from future.moves import sys
-
 import cuj_catalog
 import util
 
diff --git a/scripts/incremental_build/util.py b/scripts/incremental_build/util.py
index 2bafa25..66707ee 100644
--- a/scripts/incremental_build/util.py
+++ b/scripts/incremental_build/util.py
@@ -75,13 +75,17 @@
 @functools.cache
 def get_top_dir(d: Path = Path('.').absolute()) -> Path:
   """Get the path to the root of the Android source tree"""
+  top_dir = os.environ.get('ANDROID_BUILD_TOP')
+  if top_dir:
+    logging.info('ANDROID BUILD TOP = %s', d)
+    return Path(top_dir)
   logging.debug('Checking if Android source tree root is %s', d)
   if d.parent == d:
     sys.exit('Unable to find ROOT source directory, specifically,'
              f'{INDICATOR_FILE} not found anywhere. '
              'Try `m nothing` and `repo sync`')
   if d.joinpath(INDICATOR_FILE).is_file():
-    logging.info('Android source tree root = %s', d)
+    logging.info('ANDROID BUILD TOP assumed to be %s', d)
     return d
   return get_top_dir(d.parent)