blob: 7bff81eb17e181c66c2eb402f6668add3030b4f3 [file] [log] [blame]
#!/usr/bin/python3
#
# Copyright (C) 2016 The Android Open Source Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse, collections, os, re, sys
dir_of_this_script = os.path.dirname(os.path.realpath(__file__))
parser = argparse.ArgumentParser(
description="""USAGE:
Simplifies a build.log from hundreds of megabytes to <100 lines. Prints output to terminal.
Pass this script a filepath to parse. You should be able to type "python3 build_log_simplifier.py"
And then drag-and-drop a log file onto the terminal window to get its path.
Sample usage: python3 development/build_log_simplifier.py Users/owengray/Desktop/build.log
""")
parser.add_argument("--validate", action="store_true", help="Validate that no unrecognized messages exist in the given log")
parser.add_argument("--update", action="store_true", help="Update our list of recognized messages to include all messages from the given log")
parser.add_argument("--gc", action="store_true", help="When generating a new exemptions file, exclude any exemptions that were not found in the given log. Only relevant with --update or --validate")
parser.add_argument("log_path", help="Filepath of log(s) to process", nargs="+")
# a regexes_matcher can quickly identify which of a set of regexes matches a given text
class regexes_matcher(object):
def __init__(self, regexes):
self.regex_texts = regexes
self.children = None
self.matcher = None
# returns a list of regexes that match the given text
def get_matching_regexes(self, text, expect_match=True):
if expect_match and len(self.regex_texts) > 1:
# If we already expect our matcher to match, we can directly jump to asking our children
return self.query_children_for_matching_regexes(text)
# It takes more time to match lots of regexes than to match one composite regex
# So, we try to match one composite regex first
if self.matches(text):
if len(self.regex_texts) > 1:
# At least one child regex matches, so we have to determine which ones
return self.query_children_for_matching_regexes(text)
else:
return self.regex_texts
# Our composite regex yielded no matches
return []
# queries our children for regexes that match <text>
def query_children_for_matching_regexes(self, text):
# Create children if they don't yet exist
self.ensure_split()
# query children and join their results
results = []
for child in self.children:
results += child.get_matching_regexes(text, False)
return results
# Returns the index of the first regex matching this string, or None of not found
def index_first_matching_regex(self, text):
if len(self.regex_texts) <= 1:
if len(self.regex_texts) == 0:
return None
if self.matches(text):
return 0
return None
if not self.matches(text):
return None
self.ensure_split()
count = 0
for child in self.children:
child_index = child.index_first_matching_regex(text)
if child_index is not None:
return count + child_index
count += len(child.regex_texts)
return None
# Create children if they don't yet exist
def ensure_split(self):
if self.children is None:
# It takes more time to compile a longer regex, but it also takes more time to
# test lots of small regexes.
# In practice, this number of children seems to result in fast execution
num_children = min(len(self.regex_texts), 32)
child_start = 0
self.children = []
for i in range(num_children):
child_end = int(len(self.regex_texts) * (i + 1) / num_children)
self.children.append(regexes_matcher(self.regex_texts[child_start:child_end]))
child_start = child_end
def matches(self, text):
if self.matcher is None:
full_regex_text = "(?:" + ")|(?:".join(self.regex_texts) + ")"
self.matcher = re.compile(full_regex_text)
return self.matcher.fullmatch(text)
def print_failing_task_names(lines):
tasks_of_interest = []
# first, find tasks of interest
for line in lines:
if line.startswith("Execution failed for task"):
tasks_of_interest.append(line.split("task '")[1][:-3])
print("Detected these failing tasks: " + str(tasks_of_interest))
def shorten_uninteresting_stack_frames(lines):
result = []
prev_line_is_boring = False
for line in lines:
if line.startswith("\tat ") and not line.startswith("\tat androidx"):
# non-androidx stack frame
if not prev_line_is_boring:
result.append(line.replace("\n", "...\n"))
prev_line_is_boring = True
else:
result.append(line)
prev_line_is_boring = False
return result
# Returns the path of the config file holding exemptions for deterministic/consistent output.
# These exemptions can be garbage collected via the `--gc` argument
def get_deterministic_exemptions_path():
return os.path.join(dir_of_this_script, "messages.ignore")
# Returns the path of the config file holding exemptions for nondetermistic/flaky output.
# These exemptions will not be garbage collected via the `--gc` argument
def get_flake_exemptions_path():
return os.path.join(dir_of_this_script, "message-flakes.ignore")
# Returns a regexes_matcher that matches what is described by our config file
# Ignores comments and ordering in our config file
def build_exemptions_matcher(config_lines):
config_lines = [line.replace("\n", "") for line in config_lines]
regexes = []
for line in config_lines:
line = line.strip()
if line.startswith("#") or line == "":
# skip comments
continue
regexes.append(line)
if remove_control_characters(line) != line:
raise Exception("Unexpected control characters found in configuration line:\n\n " +
"'" + line + "'\n\n. This line is unexpected to match anything. Is this a copying mistake?")
return regexes_matcher(sorted(regexes))
# Returns a regexes_matcher that matches the content of our config file
# Can match comments
# Respects ordering in the config
# This is used for editing the config file itself
def build_exemptions_code_matcher(config_lines):
config_lines = [line.strip() for line in config_lines]
regexes = []
for line in config_lines:
line = line.strip()
if line == "":
continue
regexes.append(line)
return regexes_matcher(regexes)
def remove_by_regexes(lines, config_lines, validate_no_duplicates):
fast_matcher = build_exemptions_matcher(config_lines)
result = []
for line in lines:
stripped = line.strip()
matching_exemptions = fast_matcher.get_matching_regexes(stripped, expect_match=True)
if validate_no_duplicates and len(matching_exemptions) > 1:
print("")
print("build_log_simplifier.py: Invalid configuration: multiple message exemptions match the same message. Are some exemptions too broad?")
print("")
print("Line: '" + stripped + "'")
print("")
print(str(len(matching_exemptions)) + " Matching exemptions:")
for exemption_text in matching_exemptions:
print("'" + exemption_text + "'")
exit(1)
if len(matching_exemptions) < 1:
result.append(line)
return result
def collapse_consecutive_blank_lines(lines):
result = []
prev_blank = True
for line in lines:
if line.strip() == "":
if not prev_blank:
result.append(line)
prev_blank = True
else:
result.append(line)
prev_blank = False
return result
def remove_trailing_blank_lines(lines):
while len(lines) > 0 and lines[-1].strip() == "":
del lines[-1]
return lines
def extract_task_name(line):
prefix = "> Task "
if line.startswith(prefix):
return line[len(prefix):].strip()
return None
def is_task_line(line):
return extract_task_name(line) is not None
def extract_task_names(lines):
names = []
for line in lines:
name = extract_task_name(line)
if name is not None and name not in names:
names.append(name)
return names
# If a task has no output (or only blank output), this function removes the task (and its output)
# For example, turns this:
# > Task :a
# > Task :b
# some message
#
# into this:
#
# > Task :b
# some message
def collapse_tasks_having_no_output(lines):
result = []
# When we see a task name, we might not emit it if it doesn't have any output
# This variable is that pending task name, or none if we have no pending task
pending_task = None
pending_blanks = []
for line in lines:
is_section = is_task_line(line) or line.startswith("> Configure project ") or line.startswith("FAILURE: Build failed with an exception.")
if is_section:
pending_task = line
pending_blanks = []
elif line.strip() == "":
# If we have a pending task and we found a blank line, then hold the blank line,
# and only output it if we later find some nonempty output
if pending_task is not None:
pending_blanks.append(line)
else:
result.append(line)
else:
# We found some nonempty output, now we emit any pending task names
if pending_task is not None:
result.append(pending_task)
result += pending_blanks
pending_task = None
pending_blanks = []
result.append(line)
return result
# Removes color characters and other ANSI control characters from this input
control_character_regex = re.compile(r"""
\x1B # Escape
(?: # 7-bit C1 Fe (except CSI)
[@-Z\\-_]
| # or [ for CSI, followed by a control sequence
\[
[0-?]* # Parameters
[ -/]* # Intermediate bytes
[@-~] # End
)
""", re.VERBOSE)
def remove_control_characters(line):
return control_character_regex.sub("", line)
# Removes strings from the input wherever they are found
# This list is less convenient than the .ignore files:
# This list doesn't get autosuggested additions
# This list isn't automatically garbage collected
# Users interested in seeing the exemption history probably won't think to look here
# This list does allow removing part of the text from a line and still validating the remainder of the line
# If this list eventually gets long we might want to make it easier to update
inline_ignores_regex = re.compile(
# b/300072778
"Sharing is only supported for boot loader classes because bootstrap classpath has been appended"
)
def remove_inline_ignores(line):
return re.sub(inline_ignores_regex, "", line)
# Normalizes some filepaths to more easily simplify/skip some messages
def normalize_paths(lines):
# get OUT_DIR, DIST_DIR, and the path of the root of the checkout
out_dir = None
dist_dir = None
checkout_dir = None
gradle_user_home = None
# we read checkout_root from the log file in case this build was run in a location,
# such as on a build server
out_marker = "OUT_DIR="
dist_marker = "DIST_DIR="
checkout_marker = "CHECKOUT="
gradle_user_home_marker="GRADLE_USER_HOME="
for line in lines:
if line.startswith(out_marker):
out_dir = line.split(out_marker)[1].strip()
continue
if line.startswith(dist_marker):
dist_dir = line.split(dist_marker)[1].strip()
continue
if line.startswith(checkout_marker):
checkout_dir = line.split(checkout_marker)[1].strip()
continue
if line.startswith(gradle_user_home_marker):
gradle_user_home = line.split(gradle_user_home_marker)[1].strip()
continue
if out_dir is not None and dist_dir is not None and checkout_dir is not None and gradle_user_home is not None:
break
# Remove any mentions of these paths, and replace them with consistent values
# Make sure to put these paths in the correct order so that more-specific paths will
# be matched first
remove_paths = collections.OrderedDict()
if gradle_user_home is not None:
remove_paths[gradle_user_home] = "$GRADLE_USER_HOME"
if dist_dir is not None:
remove_paths[dist_dir] = "$DIST_DIR"
if out_dir is not None:
remove_paths[out_dir] = "$OUT_DIR"
if checkout_dir is not None:
remove_paths[checkout_dir + "/frameworks/support"] = "$SUPPORT"
remove_paths[checkout_dir] = "$CHECKOUT"
result = []
for line in lines:
for path in remove_paths:
if path in line:
replacement = remove_paths[path]
line = line.replace(path + "/", replacement + "/")
line = line.replace(path, replacement)
result.append(line)
return result
# Given a regex with hashes in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8",
# tries to return a more generalized regex like ".gradle/caches/transforms-2/files-2.1/[0-9a-f]{32}"
def generalize_hashes(message):
hash_matcher = "[0-9a-f]{32}"
return re.sub(hash_matcher, hash_matcher, message)
# Given a regex with numbers in it like ".gradle/caches/transforms-2/files-2.1/73f631f487bd87cfd8cb2aabafbac6a8"
# tries to return a more generalized regex like ".gradle/caches/transforms-[0-9]*/files-[0-9]*.[0-9]*/73f631f487bd87cfd8cb2aabafbac6a8"
def generalize_numbers(message):
matcher = "[0-9]+"
generalized = re.sub(matcher, matcher, message)
# the above replacement corrupts strings of the form "[0-9a-f]{32}", so we fix them before returning
return generalized.replace("[[0-9]+-[0-9]+a-f]{[0-9]+}", "[0-9a-f]{32}")
# Given a list of output messages and a list of existing exemption lines,
# generates a new list of exemption lines
def generate_suggested_exemptions(messages, config_lines, remove_unmatched_lines):
new_config = suggest_missing_exemptions(messages, config_lines)
if remove_unmatched_lines:
new_config = remove_unmatched_exemptions(messages, new_config)
return new_config
# Given a list of output messages and a list of existing exemption lines,
# generates an augmented list of exemptions containing any necessary new exemptions
def suggest_missing_exemptions(messages, config_lines):
# given a message, finds the index of the existing exemption for that message, if any
existing_matcher = build_exemptions_code_matcher(config_lines)
# the index of the previously matched exemption
previous_found_index = -1
# map from line index to list of lines to insert there
insertions_by_position = collections.defaultdict(lambda: [])
insertions_by_task_name = collections.OrderedDict()
# current task generating any subsequent output
pending_task_line = None
# new, suggested exemptions
new_suggestions = set()
# generate new suggestions
for line in messages:
line = line.strip()
if line == "":
continue
# save task name
is_section = False
if is_task_line(line) or line.startswith("> Configure project "):
# If a task creates output, we record its name
line = "# " + line
pending_task_line = line
is_section = True
# determine where to put task name
current_found_index = existing_matcher.index_first_matching_regex(line)
if current_found_index is not None:
# We already have a mention of this line
# We don't need to exempt it again, but this informs where to insert our next exemption
previous_found_index = current_found_index
pending_task_line = None
continue
# skip outputting task names for tasks that don't output anything
if is_section:
continue
# escape message
escaped = re.escape(line)
escaped = escaped.replace("\ ", " ") # spaces don't need to be escaped
escaped = generalize_hashes(escaped)
escaped = generalize_numbers(escaped)
# confirm that we haven't already inserted this message
if escaped in new_suggestions:
continue
# insert this regex into an appropriate position
if pending_task_line is not None:
# We know which task this line came from, and it's a task that didn't previously make output
if pending_task_line not in insertions_by_task_name:
insertions_by_task_name[pending_task_line] = []
insertions_by_task_name[pending_task_line].append(escaped)
else:
# This line of output didn't come from a new task
# So we append it after the previous line that we found
insertions_by_position[previous_found_index].append(escaped)
new_suggestions.add(escaped)
# for each regex for which we chose a position in the file, insert it there
exemption_lines = []
for i in range(len(existing_matcher.regex_texts)):
exemption_lines.append(existing_matcher.regex_texts[i])
if i in insertions_by_position:
exemption_lines += insertions_by_position[i]
# for regexes that could not be assigned to a task, insert them next
if -1 in insertions_by_position:
exemption_lines += insertions_by_position[-1]
# for regexes that were simply assigned to certain task names, insert the there, grouped by task
for task_name in insertions_by_task_name:
exemption_lines.append(task_name)
exemption_lines += insertions_by_task_name[task_name]
return exemption_lines
# Searches for config lines in <config_lines> that match no line in <messages>
# Create and returns a new list of config lines, which excludes unmatched lines and
# any corresponding comments
def remove_unmatched_exemptions(messages, config_lines):
existing_matcher = build_exemptions_matcher(config_lines)
matched_config_lines = set()
# find all of the regexes that match at least one message
for line in messages:
line = line.strip()
if line.startswith("#"):
continue
for regex in existing_matcher.get_matching_regexes(line):
matched_config_lines.add(regex)
# generate a new list of config lines
# keep config lines that were matched in the list of messages
# keep comments where there remains a matched config line before the next comment
# skip comments that were previously followed by other config lines that were deleted
result = []
pending_comments = [] # comments that we haven't yet decided to keep or not
found_unused_line_after_comment = False
for line in config_lines:
if line.startswith("#"):
# We found a comment
if found_unused_line_after_comment:
# We found an unused config line more recently than the previous comment,
# and now we've found a new comment.
if len(pending_comments) > 0:
# We also haven't found any used config lines more recently than the previous comment
# Presumably these pending comments were intended to describe the lines that we're removing
# So, we skip emitting these pending comments too
pending_comments = []
pending_comments.append(line)
found_unused_line_after_comment = False
continue
matched = (line in matched_config_lines)
if matched:
# If this config line is being used, then we keep its comments too
result += pending_comments
pending_comments = []
result.append(line)
else:
found_unused_line_after_comment = True
# If there are any comments at the bottom of the file, then keep them too
if not found_unused_line_after_comment:
result += pending_comments
return result
# opens a file and reads the lines in it
def readlines(path):
infile = open(path)
lines = infile.readlines()
infile.close()
return lines
def writelines(path, lines):
destfile = open(path, 'w')
destfile.write("\n".join(lines))
destfile.close()
def main():
arguments = parser.parse_args()
# read each file
log_paths = arguments.log_path
all_lines = []
for log_path in log_paths:
lines = readlines(log_path)
lines = [remove_control_characters(line) for line in lines]
lines = [remove_inline_ignores(line) for line in lines]
lines = normalize_paths(lines)
all_lines += lines
# load configuration
flake_exemption_regexes = readlines(get_flake_exemptions_path())
deterministic_exemption_regexes = readlines(get_deterministic_exemptions_path())
exemption_regexes = flake_exemption_regexes + deterministic_exemption_regexes
# load configuration
# remove lines we're not interested in
update = arguments.update or arguments.gc
validate = update or arguments.validate
interesting_lines = all_lines
if not validate:
print_failing_task_names(interesting_lines)
interesting_lines = remove_by_regexes(interesting_lines, exemption_regexes, validate)
interesting_lines = collapse_tasks_having_no_output(interesting_lines)
interesting_lines = collapse_consecutive_blank_lines(interesting_lines)
interesting_lines = remove_trailing_blank_lines(interesting_lines)
# process results
if update:
if arguments.gc or len(interesting_lines) != 0:
update_path = get_deterministic_exemptions_path()
# filter out any inconsistently observed messages so we don't try to exempt them twice
all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate)
# update the deterministic exemptions file based on the result
suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc)
writelines(update_path, suggested)
print("build_log_simplifier.py updated exemptions " + update_path)
elif validate:
if len(interesting_lines) != 0:
print("")
print("=" * 80)
print("build_log_simplifier.py: Error: Found " + str(len(interesting_lines)) + " new lines of warning output!")
print("")
print("The new output:")
print(" " + " ".join(interesting_lines))
print("")
print("To reproduce this failure:")
print(" Try $ ./gradlew -Pandroidx.validateNoUnrecognizedMessages --rerun-tasks " + " ".join(extract_task_names(interesting_lines)))
print("")
print("Instructions:")
print(" If you can fix these messages, do so.")
print(" If you cannot fix these messages, you may suppress them.")
print(" To automatically suppress new output from build server builds, run development/build_log_simplifier/update.sh")
print(" See also https://android.googlesource.com/platform/frameworks/support/+/androidx-main/development/build_log_simplifier/VALIDATION_FAILURE.md")
print("")
new_exemptions_path = log_paths[0] + ".ignore"
# filter out any inconsistently observed messages so we don't try to exempt them twice
all_lines = remove_by_regexes(all_lines, flake_exemption_regexes, validate)
# update deterministic exemptions file based on the result
suggested = generate_suggested_exemptions(all_lines, deterministic_exemption_regexes, arguments.gc)
writelines(new_exemptions_path, suggested)
print("Files:")
print(" Full Log : " + ",".join(log_paths))
print(" Baseline : " + get_deterministic_exemptions_path())
print(" Autogenerated new baseline : " + new_exemptions_path)
exit(1)
else:
interesting_lines = shorten_uninteresting_stack_frames(interesting_lines)
print("".join(interesting_lines))
if __name__ == "__main__":
main()