| #!/usr/bin/env python2 |
| # |
| # Copyright 2020 - The Android Open Source Project |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| """Tool to generate data for smoke tests.""" |
| |
| from __future__ import print_function |
| |
| import collections |
| import datetime |
| import gzip |
| import operator |
| import os |
| import re |
| import tempfile |
| |
| import jinja2 |
| import util |
| |
| ANDROID_REPOSITORY_ROOT = util.android_repository_root() |
| LIBCORE_DIR = os.path.join(ANDROID_REPOSITORY_ROOT, 'libcore') |
| LOGS_DIR = os.path.join(LIBCORE_DIR, 'smoketest') |
| REQUIRED_RUNS = 3 |
| CTS_LOG_LINE_PATTERN = (r'(\d+)-(\d+) +(\d+):(\d+):(\d+)\.(\d+) +\d+ +\d+ +' |
| r'[^ ]+ (CtsTestRunListener|TestRunner) *: *(.+)') |
| THIS_YEAR = datetime.datetime.now().year |
| START_MATCHER = ('CtsTestRunListener', r'Now executing\s*:\s*(.+)') |
| TESTS_RAN_MATCHER = ('TestRunner', r'started\s*:.*') |
| END_MATCHER = ('CtsTestRunListener', r'Total memory\s*:.+') |
| HARD_END_MATCHER = ('TestRunner', r'run finished\s*:.+') |
| JAVA_CLASS_PATTERN = r'(?:([a-zA-Z_][a-zA-Z0-9_]*(?:\.[a-zA-Z_][a-zA-Z0-9_]*)*)\.)?[a-zA-Z_][a-zA-Z0-9_]*' |
| MAX_RUN_TIME = datetime.timedelta(minutes=10) |
| OUT_DIR = tempfile.mkdtemp() |
| ROLL_UP_TEST_CLASSES_TO_PACKAGE = False |
| CTS_MODULE_NAME = 'CtsLibcoreTestCases' |
| |
| TEST_MAPPING_TEMPLATE = jinja2.Template(""" |
| { |
| "presubmit": [ |
| { |
| "name": "{{module}}", |
| "options": [{% for test in exclusions %} |
| { |
| "exclude-filter": "{{test}}" |
| }{% if not loop.last %},{% endif %}{% endfor %} |
| ] |
| } |
| ] |
| } |
| """.strip()) |
| |
| |
| def find_all_log_files(): |
| """Returns a list of the log files to read.""" |
| if not os.path.isdir(LOGS_DIR): |
| raise Exception('Could not find logs directory ' + LOGS_DIR) |
| filenames = os.listdir(LOGS_DIR) |
| if not filenames: |
| raise Exception('Found empty logs directory ' + LOGS_DIR) |
| if len(filenames) != REQUIRED_RUNS: |
| raise Exception('Expected to find exactly %d files in %s, found %d' % |
| (REQUIRED_RUNS, LOGS_DIR, len(filenames))) |
| return map(lambda f: os.path.join(LOGS_DIR, f), filenames) |
| |
| |
| def read_cts_logs(filename): |
| """Read CTS entries from a log file. |
| |
| Args: |
| filename: The name of the file to read. |
| |
| Yields: |
| Tuples of timestamps (as datetimes), log sources, and log messages. |
| """ |
| print('Reading ' + util.printable_path(filename)) |
| with gzip.open(filename, mode='rt') as log_file: |
| for line in log_file: |
| cts_match = re.match(CTS_LOG_LINE_PATTERN, line) |
| if cts_match: |
| assert len(cts_match.groups()) == 8 |
| timestamp = datetime.datetime( |
| year=THIS_YEAR, |
| month=int(cts_match.group(1)), |
| day=int(cts_match.group(2)), |
| hour=int(cts_match.group(3)), |
| minute=int(cts_match.group(4)), |
| second=int(cts_match.group(5)), |
| microsecond=1000 * int(cts_match.group(6))) |
| source = cts_match.group(7) |
| message = cts_match.group(8) |
| yield (timestamp, source, message) |
| |
| |
| def match(matcher, got_source, message): |
| """Check whether a log line matches requirements. |
| |
| Args: |
| matcher: A pair of the required log source and a pattern to match messages. |
| got_source: The actual log source. |
| message: The actual message. |
| |
| Returns: |
| A MatchObject from the pattern match against the message, or None. |
| """ |
| (want_source, pattern) = matcher |
| if got_source == want_source: |
| return re.match(pattern, message) |
| else: |
| return None |
| |
| |
| def parse_running_times(filename): |
| """Parse the running times from a log file. |
| |
| Args: |
| filename: The name of the file to read. |
| |
| Yields: |
| Pairs of test names and running times (as timedeltas). Also emits the |
| overall elapsed time from the start of the first test to the end of the last |
| test with the key 'OVERALL'. |
| |
| Raises: |
| Exception: The log file entries didn't look like we expected. |
| """ |
| executing = None |
| start_timestamp = None |
| ran_tests = False |
| overall_start_timestamp = None |
| overall_end_timestamp = None |
| for (timestamp, source, message) in read_cts_logs(filename): |
| start_match = match(START_MATCHER, source, message) |
| tests_ran_match = match(TESTS_RAN_MATCHER, source, message) |
| end_match = match(END_MATCHER, source, message) |
| hard_end_match = match(HARD_END_MATCHER, source, message) |
| if not executing: |
| if start_match: |
| assert len(start_match.groups()) == 1 |
| executing = start_match.group(1) |
| start_timestamp = timestamp |
| if not overall_start_timestamp: |
| overall_start_timestamp = timestamp |
| else: |
| if start_match: |
| raise Exception( |
| 'Found start for %s while waiting for end for %s at %s in %s' % |
| (start_match.group(1), executing, str(timestamp), filename)) |
| if tests_ran_match: |
| ran_tests = True |
| if end_match or hard_end_match: |
| running_time = timestamp - start_timestamp |
| # We see two types of execution in the logs. One appears to be some kind |
| # of dry run which doesn't actually run any tests (and completes |
| # significantly faster). We want the one that actually runs tests. |
| if ran_tests: |
| yield (executing, running_time) |
| executing = None |
| start_timestamp = None |
| ran_tests = False |
| overall_end_timestamp = timestamp |
| if executing: |
| raise Exception('Reached EOF while waiting for end for %s in %s' % |
| (executing, filename)) |
| yield ('OVERALL', overall_end_timestamp - overall_start_timestamp) |
| |
| |
| def collect_running_times(filenames): |
| """Collect running times from some log file. |
| |
| Args: |
| filenames: The names of the files to read. |
| |
| Returns: |
| A tuple containing (1) a dictionary mapping test names to sets of running |
| times (as timedeltas), and (2) a list of overall running times (i.e. elapsed |
| times from the start of the first test to the end of the last test). |
| """ |
| times_by_test = collections.defaultdict(list) |
| output_path = os.path.join(OUT_DIR, 'test_times.txt') |
| overall_times = [] |
| with open(output_path, 'w') as output: |
| for filename in filenames: |
| for (test_name, time) in parse_running_times(filename): |
| output.write('%s: %g ms\n' % (test_name, time.total_seconds() * 1000)) |
| if test_name == 'OVERALL': |
| overall_times.append(time) |
| else: |
| times_by_test[test_name].append(time) |
| print('Wrote test times to ' + util.printable_path(output_path)) |
| return (times_by_test, overall_times) |
| |
| |
| def process_running_times(times_by_test): |
| """Processes the collected running times. |
| |
| Args: |
| times_by_test: A dictionary mapping test names to sets of running times. |
| |
| Returns: |
| A dictionary mapping test names to fastest running times. |
| """ |
| for (test, times) in times_by_test.iteritems(): |
| if len(times) != REQUIRED_RUNS: |
| print('Warning: Only found %d runs for %s' % (len(times), test)) |
| return {test: min(times) for (test, times) in times_by_test.iteritems()} |
| |
| |
| def calculate_overhead_ratio(overall_times, fastest_time_by_test): |
| """Calculates a ratio for the actual overall time to the sum of test times. |
| |
| The actual overall times are the elapsed times from the start of the first |
| test to the end of the last test. The average of these is used. The ratio is |
| taken with the sume of the fastest time for each test (which is what will be |
| used for scoring). |
| |
| Args: |
| overall_times: A list of overall running times. |
| fastest_time_by_test: A dictionary mapping test names to fastest running |
| times. |
| |
| Returns: |
| The ratio. |
| """ |
| average_overall_time = sum(overall_times, |
| datetime.timedelta(0)) / len(overall_times) |
| total_time_by_test = sum(fastest_time_by_test.values(), datetime.timedelta(0)) |
| ratio = ( |
| average_overall_time.total_seconds() / total_time_by_test.total_seconds()) |
| print( |
| 'Average time for run is %g seconds, sum of fastest test times is %g, ratio is %g' |
| % (average_overall_time.total_seconds(), |
| total_time_by_test.total_seconds(), ratio)) |
| ................................................................................ |
| # N.B. Possible future enhancement: Currently, because# we take the fastest of |
| # three runs, a real run will always be slightly slower than we predict. We |
| # could multiply in another overhead factor to account for this, e.g. by |
| # looking at the ratio of the mean of the three to the fastest of the three. |
| # (This factor should be applied globally rather than individually to each |
| # test so as not to penalize tests which happened to have a slow run or two.) |
| # This is not a high priority since for now we can just set MAX_RUN_TIME a bit |
| # low to allow for this. |
| return ratio |
| |
| |
| def get_parent(name): |
| """Returns the parent of a Java class or package name.""" |
| class_match = re.match(JAVA_CLASS_PATTERN, name) |
| if not class_match: |
| raise Exception('Could not parse Java class name ' + name) |
| assert len(class_match.groups()) == 1 |
| return class_match.group(1) |
| |
| |
| def group_times_by_package(times_by_test): |
| """Groups the test classes by package name, summing the times. |
| |
| Args: |
| times_by_test: A dictionary mapping test names to fastest running times. |
| |
| Returns: |
| A dictionary mapping package names to total fastest running times. |
| """ |
| time_by_package = collections.defaultdict(datetime.timedelta) |
| for (clazz, time) in times_by_test.iteritems(): |
| package = get_parent(clazz) |
| if package: # A few tests have no package. They're weird, let's skip them. |
| time_by_package[package] = time_by_package[package] + time |
| output_path = os.path.join(OUT_DIR, 'package_times.txt') |
| with open(output_path, 'w') as output: |
| for (package, time) in time_by_package.iteritems(): |
| output.write('%s: %s ms\n' % (package, time.total_seconds() * 1000.0)) |
| print('Wrote package times to ' + util.printable_path(output_path)) |
| return time_by_package |
| |
| |
| def find_tests_to_run(time_by_test, overhead_ratio): |
| """Finds the tests to actually run. |
| |
| The tests chosen will be the fastest set such that their total time, when |
| multiplied by the overhead ratio, is less than the maximum. |
| |
| Args: |
| time_by_test: A dictionary mapping test names to total fastest running |
| times. The names can be packages, classes, or a mixture of the two. |
| overhead_ratio: A ratio for the actual overall time to the sum of test |
| times. |
| |
| Returns: |
| A list of test names whose running times are below the threshold. |
| |
| Raises: |
| Exception: The total running time of all the tests is below the threhold. |
| """ |
| test_inclusions = {test: False for test in time_by_test.keys()} |
| included = 0 |
| total_time = datetime.timedelta() |
| output_path = os.path.join(OUT_DIR, 'included_tests.txt') |
| adjusted_max_run_time_seconds = MAX_RUN_TIME.total_seconds() / overhead_ratio |
| with open(output_path, 'w') as output: |
| for (test, time) in sorted( |
| time_by_test.iteritems(), key=operator.itemgetter(1)): |
| if (total_time + time).total_seconds() <= adjusted_max_run_time_seconds: |
| test_inclusions[test] = True |
| included += 1 |
| total_time += time |
| output.write('%s: %g ms -> %g ms\n' % |
| (test, time.total_seconds() * 1000.0, |
| total_time.total_seconds() * 1000.0)) |
| else: |
| print('Can run fastest %d of %d tests in %g * %g = %g seconds' % |
| (included, len(time_by_test), total_time.total_seconds(), |
| overhead_ratio, total_time.total_seconds() * overhead_ratio)) |
| print('Wrote tests to include to ' + util.printable_path(output_path)) |
| return test_inclusions |
| raise Exception('Apparently we can run all the tests? This smells wrong.') |
| |
| |
| def build_test_tree(tests): |
| """Builds a tree of tests. |
| |
| Args: |
| tests: The list of tests to build into a tree. These can be packages, |
| classes, or a mixture of the two. |
| |
| Returns: |
| A dictionary mapping every test's ancestors to its children. The roots |
| appear as children of None. |
| """ |
| tree = collections.defaultdict(set) |
| for test in tests: |
| while True: |
| parent = get_parent(test) |
| tree[parent].add(test) |
| if parent: |
| test = parent |
| else: |
| break |
| return tree |
| |
| |
| def build_exclusion_list(test_inclusions): |
| """Builds a list of tests to exclude. |
| |
| Args: |
| test_inclusions: A dictionary mapping test names to whether or not they |
| should be included in the smoke tests. The names can be packages, classes, |
| or a mixture of the two. |
| |
| Returns: |
| A list of the exclusions. These could be individual tests, or packages or |
| some part of the package hierarchy if they are to be entirely excluded. |
| """ |
| tree = build_test_tree(test_inclusions.keys()) |
| exclusions = [] |
| |
| # We do a DFS of the tree, rolling up exclusions as far as possible, i.e. if |
| # an entire branch is excluded, we exclude that branch rather than all of its |
| # leaves. |
| |
| def visit(test): |
| """Visitor for a DFS of the tree. |
| |
| Args: |
| test: The test to visit (either a class or package name). |
| |
| Returns: |
| Whether or not the parent should include this node in the tree. |
| |
| Raises: |
| Exception: The tree had an unexpected structure. |
| """ |
| if test in test_inclusions: |
| # We only expect to have inclusion status for the leaves of the tree. |
| # Check that this is the case. |
| if test in tree: |
| raise Exception('The name %s is used for a leaf and a branch!' % test) |
| # Return to the parent node whether this leaf is included. |
| return test_inclusions[test] |
| else: |
| # We expect to have inclusion status for all leaves of the tree. Check |
| # that this is the case. |
| if test not in tree: |
| raise Exception('Leaf %s has no inclusion status!' % test) |
| # Look at all the children of this node. |
| any_child_included = False |
| child_exclusions = [] |
| for child in tree[test]: |
| child_included = visit(child) |
| if child_included: |
| any_child_included = True |
| else: |
| child_exclusions.append(child) |
| # If any children are included, we will count this node as included, so we |
| # have to add any excluded children to the exclusion list. |
| if any_child_included: |
| for child in child_exclusions: |
| exclusions.append(child) |
| # Now return whether this node should be counted as included, which means |
| # whether any children are included. |
| return any_child_included |
| |
| # Start the DFS at each root of the tree. |
| for root in tree[None]: |
| root_included = visit(root) |
| # If any of the roots are counted as excluded, add them to the list. |
| if not root_included: |
| exclusions.append(root) |
| |
| # Finally, sort and return the list of exclusions. |
| return sorted(exclusions) |
| |
| |
| def self_test(): |
| """Self-test for the build_include_exclude_list logic.""" |
| test_inclusions = { |
| 'a.x': True, |
| 'a.y': True, |
| 'b.x': True, |
| 'b.y': False, |
| 'c.x': False, |
| 'c.y': False, |
| 'd.x.m': True, |
| 'd.x.n': True, |
| 'd.y.m': True, |
| 'd.y.n': False, |
| 'd.z.m': False, |
| 'd.z.n': False, |
| } |
| expected_exclusions = [ |
| 'b.y', |
| 'c', |
| 'd.y.n', |
| 'd.z', |
| ] |
| actual_exclusions = build_exclusion_list(test_inclusions) |
| if actual_exclusions != expected_exclusions: |
| raise Exception('Self test failed! Expected %s but got %s' % |
| (expected_exclusions, actual_exclusions)) |
| |
| |
| def main(): |
| """The main method.""" |
| self_test() |
| filenames = find_all_log_files() |
| (times_by_test, overall_times) = collect_running_times(filenames) |
| fastest_time_by_test = process_running_times(times_by_test) |
| overhead_ratio = calculate_overhead_ratio(overall_times, fastest_time_by_test) |
| if ROLL_UP_TEST_CLASSES_TO_PACKAGE: |
| time_by_package = group_times_by_package(fastest_time_by_test) |
| test_inclusions = find_tests_to_run(time_by_package, overhead_ratio) |
| else: |
| test_inclusions = find_tests_to_run(fastest_time_by_test, overhead_ratio) |
| exclusions = build_exclusion_list(test_inclusions) |
| output_path = os.path.join(LIBCORE_DIR, 'TEST_MAPPING') |
| with open(output_path, 'w') as output: |
| output.write( |
| TEST_MAPPING_TEMPLATE.render( |
| module=CTS_MODULE_NAME, exclusions=exclusions)) |
| print('Wrote test mapping to ' + util.printable_path(output_path)) |
| |
| |
| main() |