tools/print_test_stats.py - platform/external/pytorch - Git at Google

 #!/usr/bin/env python

 import bz2
 import datetime
 import json
 import math
 import os
 import re
 import statistics
 import subprocess
 import time
 from collections import defaultdict
 from glob import glob
 from pathlib import Path
 from typing import (Any, DefaultDict, Dict, Iterable, Iterator, List, Optional,
                     Set, Tuple, cast)
 from xml.dom import minidom  # type: ignore[import]

 import requests
 from typing_extensions import TypedDict
 from tools.stats_utils.s3_stat_parser import (newify_case, get_S3_object_from_bucket, get_S3_bucket_readonly,
                                               Report, Status, Commit, HAVE_BOTO3, Version2Case, VersionedReport,
                                               Version1Report, Version2Report, ReportMetaMeta)


 SimplerSuite = Dict[str, Version2Case]
 SimplerFile = Dict[str, SimplerSuite]
 SimplerReport = Dict[str, SimplerFile]


 class Stat(TypedDict):
     center: float
     spread: Optional[float]


 class CaseDiff(TypedDict):
     margin: str
     name: str
     was: Optional[Tuple[Stat, Status]]
     now: Optional[Version2Case]


 class SuiteDiff(TypedDict):
     margin: str
     name: str
     was: Optional[Stat]
     now: Optional[float]
     cases: List[CaseDiff]


 # TODO: consolidate this with the get_cases function from
 # tools/test_history.py

 # Here we translate to a three-layer format (file -> suite -> case)
 # rather than a two-layer format (suite -> case) because as mentioned in
 # a comment in the body of this function, if we consolidate suites that
 # share a name, there will be test case name collisions, and once we
 # have those, there's no clean way to deal with it in the diffing logic.
 # It's not great to have to add a dummy empty string for the filename
 # for version 1 reports, but it's better than either losing cases that
 # share a name (for version 2 reports) or using a list of cases rather
 # than a dict.
 def simplify(report: Report) -> SimplerReport:
     if 'format_version' not in report:  # version 1 implicitly
         v1report = cast(Version1Report, report)
         return {
             # we just don't have test filename information sadly, so we
             # just make one fake filename that is the empty string
             '': {
                 suite_name: {
                     # This clobbers some cases that have duplicate names
                     # because in version 1, we would merge together all
                     # the suites with a given name (even if they came
                     # from different files), so there were actually
                     # situations in which two cases in the same suite
                     # shared a name (because they actually originally
                     # came from two suites that were then merged). It
                     # would probably be better to warn about the cases
                     # that we're silently discarding here, but since
                     # we're only uploading in the new format (where
                     # everything is also keyed by filename) going
                     # forward, it shouldn't matter too much.
                     case['name']: newify_case(case)
                     for case in suite['cases']
                 }
                 for suite_name, suite in v1report['suites'].items()
             }
         }
     else:
         v_report = cast(VersionedReport, report)
         version = v_report['format_version']
         if version == 2:
             v2report = cast(Version2Report, v_report)
             return {
                 filename: {
                     suite_name: suite['cases']
                     for suite_name, suite in file_data['suites'].items()
                 }
                 for filename, file_data in v2report['files'].items()
             }
         else:
             raise RuntimeError(f'Unknown format version: {version}')


 def plural(n: int) -> str:
     return '' if n == 1 else 's'


 def display_stat(
     x: Stat,
     format: Tuple[Tuple[int, int], Tuple[int, int]],
 ) -> str:
     spread_len = format[1][0] + 1 + format[1][1]
     spread = x['spread']
     if spread is not None:
         spread_str = f' ± {spread:{spread_len}.{format[1][1]}f}s'
     else:
         spread_str = ' ' * (3 + spread_len + 1)
     mean_len = format[0][0] + 1 + format[0][1]
     return f'{x["center"]:{mean_len}.{format[0][1]}f}s{spread_str}'


 def list_stat(l: List[float]) -> Stat:
     return {
         'center': statistics.mean(l),
         'spread': statistics.stdev(l) if len(l) > 1 else None
     }


 def zero_stat() -> Stat:
     return {'center': 0, 'spread': None}


 def recenter(was: Stat, now: float) -> Stat:
     return {'center': now - was['center'], 'spread': was['spread']}


 def sum_normals(stats: Iterable[Stat]) -> Stat:
     """
     Returns a stat corresponding to the sum of the given stats.

     Assumes that the center and spread for each of the given stats are
     mean and stdev, respectively.
     """
     l = list(stats)
     spread: Optional[float]
     if any(stat['spread'] is not None for stat in l):
         spread = math.sqrt(sum((stat['spread'] or 0)**2 for stat in l))
     else:
         spread = None
     return {
         'center': sum(stat['center'] for stat in l),
         'spread': spread,
     }


 def format_seconds(seconds: List[float]) -> str:
     if len(seconds) > 0:
         x = list_stat(seconds)
         return f'total time {display_stat(x, ((5, 2), (4, 2)))}'.strip()
     return ''


 def show_ancestors(num_commits: int) -> str:
     return f'    | : ({num_commits} commit{plural(num_commits)})'


 def unlines(lines: List[str]) -> str:
     return ''.join(f'{line}\n' for line in lines)


 def matching_test_times(
     *,
     base_reports: Dict[Commit, List[SimplerReport]],
     filename: str,
     suite_name: str,
     case_name: str,
     status: Status,
 ) -> List[float]:
     times: List[float] = []
     for reports in base_reports.values():
         for report in reports:
             file_data = report.get(filename)
             if file_data:
                 suite = file_data.get(suite_name)
                 if suite:
                     case = suite.get(case_name)
                     if case:
                         t = case['seconds']
                         s = case['status']
                         if s == status:
                             times.append(t)
     return times


 def analyze(
     *,
     head_report: SimplerReport,
     base_reports: Dict[Commit, List[SimplerReport]],
 ) -> List[SuiteDiff]:
     nonempty_shas = [sha for sha, reports in base_reports.items() if reports]
     # most recent master ancestor with at least one S3 report,
     # or empty list if there are none (will show all tests as added)
     base_report = base_reports[nonempty_shas[0]] if nonempty_shas else []

     # find all relevant suites (those in either base or head or both)
     all_reports = [head_report] + base_report
     all_suites: Set[Tuple[str, str]] = {
         (filename, suite_name)
         for r in all_reports
         for filename, file_data in r.items()
         for suite_name in file_data.keys()
     }

     removed_suites: List[SuiteDiff] = []
     modified_suites: List[SuiteDiff] = []
     added_suites: List[SuiteDiff] = []

     for filename, suite_name in sorted(all_suites):
         case_diffs: List[CaseDiff] = []
         head_suite = head_report.get(filename, {}).get(suite_name)
         base_cases: Dict[str, Status] = dict(sorted(set.intersection(*[
             {
                 (n, case['status'])
                 for n, case
                 in report.get(filename, {}).get(suite_name, {}).items()
             }
             for report in base_report
         ] or [set()])))
         case_stats: Dict[str, Stat] = {}
         if head_suite:
             now = sum(case['seconds'] for case in head_suite.values())
             if any(
                 filename in report and suite_name in report[filename]
                 for report in base_report
             ):
                 removed_cases: List[CaseDiff] = []
                 for case_name, case_status in base_cases.items():
                     case_stats[case_name] = list_stat(matching_test_times(
                         base_reports=base_reports,
                         filename=filename,
                         suite_name=suite_name,
                         case_name=case_name,
                         status=case_status,
                     ))
                     if case_name not in head_suite:
                         removed_cases.append({
                             'margin': '-',
                             'name': case_name,
                             'was': (case_stats[case_name], case_status),
                             'now': None,
                         })
                 modified_cases: List[CaseDiff] = []
                 added_cases: List[CaseDiff] = []
                 for head_case_name in sorted(head_suite):
                     head_case = head_suite[head_case_name]
                     if head_case_name in base_cases:
                         stat = case_stats[head_case_name]
                         base_status = base_cases[head_case_name]
                         if head_case['status'] != base_status:
                             modified_cases.append({
                                 'margin': '!',
                                 'name': head_case_name,
                                 'was': (stat, base_status),
                                 'now': head_case,
                             })
                     else:
                         added_cases.append({
                             'margin': '+',
                             'name': head_case_name,
                             'was': None,
                             'now': head_case,
                         })
                 # there might be a bug calculating this stdev, not sure
                 was = sum_normals(case_stats.values())
                 case_diffs = removed_cases + modified_cases + added_cases
                 if case_diffs:
                     modified_suites.append({
                         'margin': ' ',
                         'name': suite_name,
                         'was': was,
                         'now': now,
                         'cases': case_diffs,
                     })
             else:
                 for head_case_name in sorted(head_suite):
                     head_case = head_suite[head_case_name]
                     case_diffs.append({
                         'margin': ' ',
                         'name': head_case_name,
                         'was': None,
                         'now': head_case,
                     })
                 added_suites.append({
                     'margin': '+',
                     'name': suite_name,
                     'was': None,
                     'now': now,
                     'cases': case_diffs,
                 })
         else:
             for case_name, case_status in base_cases.items():
                 case_stats[case_name] = list_stat(matching_test_times(
                     base_reports=base_reports,
                     filename=filename,
                     suite_name=suite_name,
                     case_name=case_name,
                     status=case_status,
                 ))
                 case_diffs.append({
                     'margin': ' ',
                     'name': case_name,
                     'was': (case_stats[case_name], case_status),
                     'now': None,
                 })
             removed_suites.append({
                 'margin': '-',
                 'name': suite_name,
                 # there might be a bug calculating this stdev, not sure
                 'was': sum_normals(case_stats.values()),
                 'now': None,
                 'cases': case_diffs,
             })

     return removed_suites + modified_suites + added_suites


 def case_diff_lines(diff: CaseDiff) -> List[str]:
     lines = [f'def {diff["name"]}: ...']

     case_fmt = ((3, 3), (2, 3))

     was = diff['was']
     if was:
         was_line = f'    # was {display_stat(was[0], case_fmt)}'
         was_status = was[1]
         if was_status:
             was_line += f' ({was_status})'
         lines.append(was_line)

     now = diff['now']
     if now:
         now_stat: Stat = {'center': now['seconds'], 'spread': None}
         now_line = f'    # now {display_stat(now_stat, case_fmt)}'
         now_status = now['status']
         if now_status:
             now_line += f' ({now_status})'
         lines.append(now_line)

     return [''] + [f'{diff["margin"]} {l}' for l in lines]


 def display_suite_diff(diff: SuiteDiff) -> str:
     lines = [f'class {diff["name"]}:']

     suite_fmt = ((4, 2), (3, 2))

     was = diff['was']
     if was:
         lines.append(f'    # was {display_stat(was, suite_fmt)}')

     now = diff['now']
     if now is not None:
         now_stat: Stat = {'center': now, 'spread': None}
         lines.append(f'    # now {display_stat(now_stat, suite_fmt)}')

     for case_diff in diff['cases']:
         lines.extend([f'  {l}' for l in case_diff_lines(case_diff)])

     return unlines([''] + [f'{diff["margin"]} {l}'.rstrip() for l in lines] + [''])


 def anomalies(diffs: List[SuiteDiff]) -> str:
     return ''.join(map(display_suite_diff, diffs))


 def graph(
     *,
     head_sha: Commit,
     head_seconds: float,
     base_seconds: Dict[Commit, List[float]],
     on_master: bool,
     ancestry_path: int = 0,
     other_ancestors: int = 0,
 ) -> str:
     lines = [
         'Commit graph (base is most recent master ancestor with at least one S3 report):',
         '',
         '    : (master)',
         '    |',
     ]

     head_time_str = f'           {format_seconds([head_seconds])}'
     if on_master:
         lines.append(f'    * {head_sha[:10]} (HEAD)   {head_time_str}')
     else:
         lines.append(f'    | * {head_sha[:10]} (HEAD) {head_time_str}')

         if ancestry_path > 0:
             lines += [
                 '    | |',
                 show_ancestors(ancestry_path),
             ]

         if other_ancestors > 0:
             lines += [
                 '    |/|',
                 show_ancestors(other_ancestors),
                 '    |',
             ]
         else:
             lines.append('    |/')

     is_first = True
     for sha, seconds in base_seconds.items():
         num_runs = len(seconds)
         prefix = str(num_runs).rjust(3)
         base = '(base)' if is_first and num_runs > 0 else '      '
         if num_runs > 0:
             is_first = False
         t = format_seconds(seconds)
         p = plural(num_runs)
         if t:
             p = f'{p}, '.ljust(3)
         lines.append(f'    * {sha[:10]} {base} {prefix} report{p}{t}')

     lines.extend(['    |', '    :'])

     return unlines(lines)


 def case_delta(case: CaseDiff) -> Stat:
     was = case['was']
     now = case['now']
     return recenter(
         was[0] if was else zero_stat(),
         now['seconds'] if now else 0,
     )


 def display_final_stat(stat: Stat) -> str:
     center = stat['center']
     spread = stat['spread']
     displayed = display_stat(
         {'center': abs(center), 'spread': spread},
         ((4, 2), (3, 2)),
     )
     if center < 0:
         sign = '-'
     elif center > 0:
         sign = '+'
     else:
         sign = ' '
     return f'{sign}{displayed}'.rstrip()


 def summary_line(message: str, d: DefaultDict[str, List[CaseDiff]]) -> str:
     all_cases = [c for cs in d.values() for c in cs]
     tests = len(all_cases)
     suites = len(d)
     sp = f'{plural(suites)})'.ljust(2)
     tp = f'{plural(tests)},'.ljust(2)
     # there might be a bug calculating this stdev, not sure
     stat = sum_normals(case_delta(c) for c in all_cases)
     return ''.join([
         f'{message} (across {suites:>4} suite{sp}',
         f'{tests:>6} test{tp}',
         f' totaling {display_final_stat(stat)}',
     ])


 def summary(analysis: List[SuiteDiff]) -> str:
     removed_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
     modified_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)
     added_tests: DefaultDict[str, List[CaseDiff]] = defaultdict(list)

     for diff in analysis:
         # the use of 'margin' here is not the most elegant
         name = diff['name']
         margin = diff['margin']
         cases = diff['cases']
         if margin == '-':
             removed_tests[name] += cases
         elif margin == '+':
             added_tests[name] += cases
         else:
             removed = list(filter(lambda c: c['margin'] == '-', cases))
             added = list(filter(lambda c: c['margin'] == '+', cases))
             modified = list(filter(lambda c: c['margin'] == '!', cases))
             if removed:
                 removed_tests[name] += removed
             if added:
                 added_tests[name] += added
             if modified:
                 modified_tests[name] += modified

     return unlines([
         summary_line('Removed ', removed_tests),
         summary_line('Modified', modified_tests),
         summary_line('Added   ', added_tests),
     ])


 def regression_info(
     *,
     head_sha: Commit,
     head_report: Report,
     base_reports: Dict[Commit, List[Report]],
     job_name: str,
     on_master: bool,
     ancestry_path: int,
     other_ancestors: int,
 ) -> str:
     """
     Return a human-readable report describing any test time regressions.

     The head_sha and head_report args give info about the current commit
     and its test times. Since Python dicts maintain insertion order
     (guaranteed as part of the language spec since 3.7), the
     base_reports argument must list the head's several most recent
     master commits, from newest to oldest (so the merge-base is
     list(base_reports)[0]).
     """
     simpler_head = simplify(head_report)
     simpler_base: Dict[Commit, List[SimplerReport]] = {}
     for commit, reports in base_reports.items():
         simpler_base[commit] = [simplify(r) for r in reports]
     analysis = analyze(
         head_report=simpler_head,
         base_reports=simpler_base,
     )

     return '\n'.join([
         unlines([
             '----- Historic stats comparison result ------',
             '',
             f'    job: {job_name}',
             f'    commit: {head_sha}',
         ]),

         # don't print anomalies, because sometimes due to sharding, the
         # output from this would be very long and obscure better signal

         # anomalies(analysis),

         graph(
             head_sha=head_sha,
             head_seconds=head_report['total_seconds'],
             base_seconds={
                 c: [r['total_seconds'] for r in rs]
                 for c, rs in base_reports.items()
             },
             on_master=on_master,
             ancestry_path=ancestry_path,
             other_ancestors=other_ancestors,
         ),
         summary(analysis),
     ])


 class TestCase:
     def __init__(self, dom: Any) -> None:
         self.class_name = str(dom.attributes['classname'].value)
         self.name = str(dom.attributes['name'].value)
         self.time = float(dom.attributes['time'].value)
         self.errored = len(dom.getElementsByTagName('error')) > 0
         self.failed = len(dom.getElementsByTagName('failure')) > 0
         self.skipped = len(dom.getElementsByTagName('skipped')) > 0


 class TestSuite:
     def __init__(self, name: str) -> None:
         self.name = name
         self.test_cases: Dict[str, TestCase] = dict()
         self.failed_count = 0
         self.skipped_count = 0
         self.errored_count = 0
         self.total_time = 0.0

     def __repr__(self) -> str:
         rc = f'{self.name} run_time: {self.total_time:.2f} tests: {len(self.test_cases)}'
         if self.skipped_count > 0:
             rc += f' skipped: {self.skipped_count}'
         return f'TestSuite({rc})'

     def append(self, test_case: TestCase) -> None:
         self.test_cases[test_case.name] = test_case
         self.total_time += test_case.time
         self.failed_count += 1 if test_case.failed else 0
         self.skipped_count += 1 if test_case.skipped else 0
         self.errored_count += 1 if test_case.errored else 0

     def replace(self, test_case: TestCase) -> float:
         name = test_case.name
         assert name in self.test_cases, f'Error: attempting to replace nonexistent test case {name}'
         old_time = self.test_cases[name].time
         # We don't replace anything if the old test case was not shorter.
         if old_time >= test_case.time:
             return 0.0
         self.total_time = self.total_time + test_case.time - old_time
         self.test_cases[name] = test_case
         return test_case.time - old_time

     def print_report(self, num_longest: int = 3) -> None:
         sorted_tests = sorted(self.test_cases.values(), key=lambda x: x.time)
         test_count = len(sorted_tests)
         print(f"class {self.name}:")
         print(f"    tests: {test_count} failed: {self.failed_count} skipped: {self.skipped_count} errored: {self.errored_count}")
         print(f"    run_time: {self.total_time:.2f} seconds")
         print(f"    avg_time: {self.total_time/test_count:.2f} seconds")
         if test_count >= 2:
             print(f"    median_time: {statistics.median(x.time for x in sorted_tests):.2f} seconds")
         sorted_tests = sorted_tests[-num_longest:]
         print(f"    {len(sorted_tests)} longest tests:")
         for test in reversed(sorted_tests):
             print(f"        {test.name} time: {test.time:.2f} seconds")
         print("")


 class TestFile:
     def __init__(self, name: str) -> None:
         self.name = name
         self.total_time = 0.0
         self.test_suites: Dict[str, TestSuite] = dict()

     def append(self, test_case: TestCase) -> None:
         suite_name = test_case.class_name
         if suite_name not in self.test_suites:
             self.test_suites[suite_name] = TestSuite(suite_name)
         if test_case.name in self.test_suites[suite_name].test_cases:
             # We expect duplicate tests for test_cpp_extensions_aot, distributed/test_distributed_fork,
             # and distributed/test_distributed_spawn. In these cases, we store the test case that took the longest,
             # as in these jobs, the duplicate tests are run in parallel.
             # For other unexpected cases, we should raise a warning.
             if self.name == 'test_cpp_extensions_aot' or \
                self.name == 'distributed/test_distributed_fork' or \
                self.name == 'distributed/test_distributed_spawn' or \
                self.name == 'cpp':  # The caffe2 cpp tests spawn duplicate test cases as well.
                 time_difference = self.test_suites[suite_name].replace(test_case)
                 self.total_time += time_difference
             else:
                 raise RuntimeWarning(f'Duplicate test case {test_case.name} in suite {suite_name} called from {self.name}')
         else:
             self.test_suites[suite_name].append(test_case)
             self.total_time += test_case.time


 def parse_report(path: str) -> Iterator[TestCase]:
     dom = minidom.parse(path)
     for test_case in dom.getElementsByTagName('testcase'):
         yield TestCase(test_case)


 def parse_reports(folder: str) -> Dict[str, TestFile]:
     reports = glob(os.path.join(folder, '**', '*.xml'), recursive=True)
     tests_by_file = dict()
     for report in reports:
         test_filename = re.sub(r'\.', '/', os.path.basename(os.path.dirname(report)))
         if test_filename not in tests_by_file:
             tests_by_file[test_filename] = TestFile(test_filename)
         for test_case in parse_report(report):
             tests_by_file[test_filename].append(test_case)
     return tests_by_file

 def build_info() -> ReportMetaMeta:
     return {
         "build_pr": os.environ.get("CIRCLE_PR_NUMBER", ""),
         "build_tag": os.environ.get("CIRCLE_TAG", ""),
         "build_sha1": os.environ.get("CIRCLE_SHA1", ""),
         "build_branch": os.environ.get("CIRCLE_BRANCH", ""),
         "build_job": os.environ.get("CIRCLE_JOB", ""),
         "build_workflow_id": os.environ.get("CIRCLE_WORKFLOW_ID", ""),
     }


 def build_message(test_case: TestCase) -> Dict[str, Dict[str, Any]]:
     return {
         "normal": {
             **build_info(),
             "test_suite_name": test_case.class_name,
             "test_case_name": test_case.name,
         },
         "int": {
             "time": int(time.time()),
             "test_total_count": 1,
             "test_total_time": int(test_case.time * 1000),
             "test_failed_count": 1 if test_case.failed > 0 else 0,
             "test_skipped_count": 1 if test_case.skipped > 0 else 0,
             "test_errored_count": 1 if test_case.errored > 0 else 0,
         },
     }


 def send_report_to_scribe(reports: Dict[str, TestFile]) -> None:
     access_token = os.environ.get("SCRIBE_GRAPHQL_ACCESS_TOKEN")

     if not access_token:
         print("No scribe access token provided, skip sending report!")
         return
     print("Scribe access token provided, sending report...")
     url = "https://graph.facebook.com/scribe_logs"
     r = requests.post(
         url,
         data={
             "access_token": access_token,
             "logs": json.dumps(
                 [
                     {
                         "category": "perfpipe_pytorch_test_times",
                         "message": json.dumps(build_message(test_case)),
                         "line_escape": False,
                     }
                     for test_file in reports.values()
                     for test_suite in test_file.test_suites.values()
                     for test_case in test_suite.test_cases.values()
                 ]
             ),
         },
     )
     r.raise_for_status()


 def assemble_s3_object(
     reports: Dict[str, TestFile],
     *,
     total_seconds: float,
 ) -> Version2Report:
     return {
         **build_info(),  # type: ignore[misc]
         'total_seconds': total_seconds,
         'format_version': 2,
         'files' : {
             name: {
                 'total_seconds': test_file.total_time,
                 'suites': {
                     name: {
                         'total_seconds': suite.total_time,
                         'cases': {
                             name: {
                                 'seconds': case.time,
                                 'status': 'skipped' if case.skipped else
                                           'errored' if case.errored else
                                           'failed' if case.failed else None
                             }
                             for name, case in suite.test_cases.items()
                         },
                     }
                     for name, suite in test_file.test_suites.items()
                 }
             }
             for name, test_file in reports.items()
         }
     }


 def send_report_to_s3(head_report: Version2Report) -> None:
     job = os.environ.get('CIRCLE_JOB')
     sha1 = os.environ.get('CIRCLE_SHA1')
     branch = os.environ.get('CIRCLE_BRANCH', '')
     if branch not in ['master', 'nightly'] and not branch.startswith("release/"):
         print("S3 upload only enabled on master, nightly and release branches.")
         print(f"skipping test report on branch: {branch}")
         return
     now = datetime.datetime.utcnow().isoformat()
     key = f'test_time/{sha1}/{job}/{now}Z.json.bz2'  # Z meaning UTC
     obj = get_S3_object_from_bucket('ossci-metrics', key)
     # use bz2 because the results are smaller than gzip, and the
     # compression time penalty we pay is only about half a second for
     # input files of a few megabytes in size like these JSON files, and
     # because for some reason zlib doesn't seem to play nice with the
     # gunzip command whereas Python's bz2 does work with bzip2
     obj.put(Body=bz2.compress(json.dumps(head_report).encode()))


 def print_regressions(head_report: Report, *, num_prev_commits: int) -> None:
     sha1 = os.environ.get("CIRCLE_SHA1", "HEAD")

     base = subprocess.check_output(
         ["git", "merge-base", sha1, "origin/master"],
         encoding="ascii",
     ).strip()

     count_spec = f"{base}..{sha1}"
     intermediate_commits = int(subprocess.check_output(
         ["git", "rev-list", "--count", count_spec],
         encoding="ascii"
     ))
     ancestry_path = int(subprocess.check_output(
         ["git", "rev-list", "--ancestry-path", "--count", count_spec],
         encoding="ascii",
     ))

     # if current commit is already on master, we need to exclude it from
     # this history; otherwise we include the merge-base
     commits = subprocess.check_output(
         ["git", "rev-list", f"--max-count={num_prev_commits+1}", base],
         encoding="ascii",
     ).splitlines()
     on_master = False
     if base == sha1:
         on_master = True
         commits = commits[1:]
     else:
         commits = commits[:-1]

     job = os.environ.get("CIRCLE_JOB", "")
     bucket = get_S3_bucket_readonly('ossci-metrics')
     index = {}
     for commit in commits:
         summaries = bucket.objects.filter(Prefix=f"test_time/{commit}/{job}/")
         index[commit] = list(summaries)

     objects: Dict[Commit, List[Report]] = {}
     # should we do these in parallel?
     for commit, summaries in index.items():
         objects[commit] = []
         for summary in summaries:
             binary = summary.get()["Body"].read()
             string = bz2.decompress(binary).decode("utf-8")
             objects[commit].append(json.loads(string))

     print()
     print(regression_info(
         head_sha=sha1,
         head_report=head_report,
         base_reports=objects,
         job_name=job,
         on_master=on_master,
         ancestry_path=ancestry_path - 1,
         other_ancestors=intermediate_commits - ancestry_path,
     ), end="")


 def positive_integer(value: str) -> float:
     parsed = int(value)
     if parsed < 1:
         raise argparse.ArgumentTypeError(f"{value} is not a natural number")
     return parsed


 def positive_float(value: str) -> float:
     parsed = float(value)
     if parsed <= 0.0:
         raise argparse.ArgumentTypeError(f"{value} is not a positive rational number")
     return parsed


 def reports_has_no_tests(reports: Dict[str, TestFile]) -> bool:
     for test_file in reports.values():
         for test_suite in test_file.test_suites.values():
             if len(test_suite.test_cases) > 0:
                 return False
     return True

 if __name__ == '__main__':
     import argparse
     import sys
     parser = argparse.ArgumentParser(
         "Print statistics from test XML output.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
     parser.add_argument(
         "--longest-of-class",
         type=positive_integer,
         default=3,
         metavar="N",
         help="how many longest tests to show for each class",
     )
     parser.add_argument(
         "--class-print-threshold",
         type=positive_float,
         default=1.0,
         metavar="N",
         help="Minimal total time to warrant class report",
     )
     parser.add_argument(
         "--longest-of-run",
         type=positive_integer,
         default=10,
         metavar="N",
         help="how many longest tests to show from the entire run",
     )
     if HAVE_BOTO3:
         parser.add_argument(
             "--upload-to-s3",
             action="store_true",
             help="upload test time to S3 bucket",
         )
         parser.add_argument(
             "--compare-with-s3",
             action="store_true",
             help="download test times for base commits and compare",
         )
     parser.add_argument(
         "--num-prev-commits",
         type=positive_integer,
         default=10,
         metavar="N",
         help="how many previous commits to compare test times with",
     )
     parser.add_argument(
         "--use-json",
         metavar="FILE.json",
         help="compare S3 with JSON file, instead of the test report folder",
     )
     parser.add_argument(
         "folder",
         help="test report folder",
     )
     args = parser.parse_args()

     reports_by_file = parse_reports(args.folder)
     if reports_has_no_tests(reports_by_file):
         print(f"No tests in reports found in {args.folder}")
         sys.exit(0)

     try:
         send_report_to_scribe(reports_by_file)
     except Exception as e:
         print(f"error encountered when uploading to scribe: {e}")

     # longest_tests can contain duplicates as the same tests can be spawned from different files
     longest_tests : List[TestCase] = []
     total_time = 0.0
     for filename, test_filename in reports_by_file.items():
         for suite_name, test_suite in test_filename.test_suites.items():
             if test_suite.total_time >= args.class_print_threshold:
                 test_suite.print_report(args.longest_of_class)
                 total_time += test_suite.total_time
                 longest_tests.extend(test_suite.test_cases.values())
     longest_tests = sorted(longest_tests, key=lambda x: x.time)[-args.longest_of_run:]

     obj = assemble_s3_object(reports_by_file, total_seconds=total_time)

     if args.upload_to_s3:
         try:
             send_report_to_s3(obj)
         except Exception as e:
             print(f"error encountered when uploading to s3: {e}")

     print(f"Total runtime is {datetime.timedelta(seconds=int(total_time))}")
     print(f"{len(longest_tests)} longest tests of entire run:")
     for test_case in reversed(longest_tests):
         print(f"    {test_case.class_name}.{test_case.name}  time: {test_case.time:.2f} seconds")

     if args.compare_with_s3:
         head_json = obj
         if args.use_json:
             head_json = json.loads(Path(args.use_json).read_text())
         print_regressions(head_json, num_prev_commits=args.num_prev_commits)