blob: 054f375fd832c0c1334f4fac5badc6993c44ef39 [file] [log] [blame]
#!/usr/bin/env python
# Copyright 2016 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import argparse
import bisect
import collections
import gzip
import json
import os
import re
import subprocess
import sys
_SYMBOLS_PATH = os.path.abspath(os.path.join(
os.path.dirname(os.path.realpath(__file__)),
'..',
'third_party',
'symbols'))
sys.path.append(_SYMBOLS_PATH)
# pylint: disable=import-error
import symbols.elf_symbolizer as elf_symbolizer
# Relevant trace event phases from Chromium's
# src/base/trace_event/common/trace_event_common.h.
TRACE_EVENT_PHASE_METADATA = 'M'
TRACE_EVENT_PHASE_MEMORY_DUMP = 'v'
# Matches Android library paths, supports both K (/data/app-lib/<>/lib.so)
# as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available
# via 'name' group.
ANDROID_PATH_MATCHER = re.compile(
r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)')
# Subpath of output path where unstripped libraries are stored.
ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped'
def FindInSystemPath(binary_name):
paths = os.environ['PATH'].split(os.pathsep)
for path in paths:
binary_path = os.path.join(path, binary_name)
if os.path.isfile(binary_path):
return binary_path
return None
def IsSymbolizableFile(file_path):
result = subprocess.check_output(['file', '-0', file_path])
type_string = result[result.find('\0') + 1:]
return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string))
class ProcessMemoryMaps(object):
"""Represents 'process_mmaps' trace file entry."""
class Region(object):
def __init__(self, start_address, size, file_path):
self._start_address = start_address
self._size = size
self._file_path = file_path
@property
def start_address(self):
return self._start_address
@property
def end_address(self):
return self._start_address + self._size
@property
def size(self):
return self._size
@property
def file_path(self):
return self._file_path
def __cmp__(self, other):
if isinstance(other, type(self)):
return long(self._start_address).__cmp__(long(other._start_address))
elif isinstance(other, (long, int)):
return long(self._start_address).__cmp__(long(other))
else:
raise Exception('Cannot compare with %s' % type(other))
def __repr__(self):
return 'Region(0x{:X} - 0x{:X}, {})'.format(
self.start_address, self.end_address, self.file_path)
def __init__(self, process_mmaps):
"""Parses 'process_mmaps' dictionary."""
regions = []
for region_value in process_mmaps['vm_regions']:
regions.append(self.Region(
long(region_value['sa'], 16),
long(region_value['sz'], 16),
region_value['mf']))
regions.sort()
# Copy regions without duplicates and check for overlaps.
self._regions = []
previous_region = None
for region in regions:
if previous_region is not None:
if region == previous_region:
continue
assert region.start_address >= previous_region.end_address, \
'Regions {} and {} overlap.'.format(previous_region, region)
previous_region = region
self._regions.append(region)
@property
def regions(self):
return self._regions
def FindRegion(self, address):
"""Finds region containing |address|. Returns None if none found."""
region_index = bisect.bisect_right(self._regions, address) - 1
if region_index >= 0:
region = self._regions[region_index]
if address >= region.start_address and address < region.end_address:
return region
return None
class StackFrames(object):
"""Represents 'stackFrames' trace file entry."""
class PCFrame(object):
def __init__(self, pc, frame):
self._modified = False
self._pc = pc
self._frame = frame
@property
def modified(self):
return self._modified
@property
def pc(self):
return self._pc
@property
def name(self):
return self._frame['name']
@name.setter
def name(self, value):
self._modified = True
self._frame['name'] = value
def __init__(self, stack_frames):
"""Constructs object using 'stackFrames' dictionary."""
self._pc_frames = []
for frame in stack_frames.itervalues():
pc_frame = self._ParsePCFrame(frame)
if pc_frame:
self._pc_frames.append(pc_frame)
@property
def pc_frames(self):
return self._pc_frames
@property
def modified(self):
return any(f.modified for f in self._pc_frames)
_PC_TAG = 'pc:'
@classmethod
def _ParsePCFrame(self, frame):
name = frame['name']
if not name.startswith(self._PC_TAG):
return None
pc = long(name[len(self._PC_TAG):], 16)
return self.PCFrame(pc, frame)
class Process(object):
"""Holds various bits of information about a process in a trace file."""
def __init__(self, pid):
self.pid = pid
self.name = None
self.mmaps = None
self.stack_frames = None
def CollectProcesses(trace):
"""Parses trace dictionary and returns pid->Process map of all processes
suitable for symbolization (which have both mmaps and stack_frames).
"""
process_map = {}
# Android traces produced via 'chrome://inspect/?tracing#devices' are
# just list of events.
events = trace if isinstance(trace, list) else trace['traceEvents']
for event in events:
name = event.get('name')
if not name:
continue
pid = event['pid']
process = process_map.get(pid)
if process is None:
process = Process(pid)
process_map[pid] = process
phase = event['ph']
if phase == TRACE_EVENT_PHASE_METADATA:
if name == 'process_name':
process.name = event['args']['name']
elif name == 'stackFrames':
process.stack_frames = StackFrames(event['args']['stackFrames'])
elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP:
process_mmaps = event['args']['dumps'].get('process_mmaps')
if process_mmaps:
# TODO(dskiba): this parses all process_mmaps, but retains only the
# last one. We need to parse only once (lazy parsing?).
process.mmaps = ProcessMemoryMaps(process_mmaps)
return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames]
class SymbolizableFile(object):
"""Holds file path, addresses to symbolize and stack frames to update.
This class is a link between ELFSymbolizer and a trace file: it specifies
what to symbolize (addresses) and what to update with the symbolization
result (frames).
"""
def __init__(self, file_path):
self.path = file_path
self.frames_by_address = collections.defaultdict(list)
def ResolveSymbolizableFiles(processes):
"""Resolves and groups PCs into list of SymbolizableFiles.
As part of the grouping process, this function resolves PC from each stack
frame to the corresponding mmap region. Stack frames that failed to resolve
are symbolized with '<unresolved>'.
"""
symfile_by_path = {}
for process in processes:
for frame in process.stack_frames.pc_frames:
region = process.mmaps.FindRegion(frame.pc)
if region is None:
frame.name = '<unresolved>'
continue
symfile = symfile_by_path.get(region.file_path)
if symfile is None:
symfile = SymbolizableFile(region.file_path)
symfile_by_path[symfile.path] = symfile
relative_pc = frame.pc - region.start_address
symfile.frames_by_address[relative_pc].append(frame)
return symfile_by_path.values()
def SymbolizeFiles(symfiles, addr2line_path):
"""Symbolizes each file in the given list of SymbolizableFiles
and updates stack frames with symbolization results."""
print 'Symbolizing...'
def _SubPrintf(message, *args):
print (' ' + message).format(*args)
symbolized = False
for symfile in symfiles:
unsymbolized_name = '<{}>'.format(
symfile.path if symfile.path else 'unnamed')
problem = None
if not os.path.isabs(symfile.path):
problem = 'not a file'
elif not os.path.isfile(symfile.path):
problem = "file doesn't exist"
elif not IsSymbolizableFile(symfile.path):
problem = 'file is not symbolizable'
if problem:
_SubPrintf("Won't symbolize {} PCs for '{}': {}.",
len(symfile.frames_by_address),
symfile.path,
problem)
for frames in symfile.frames_by_address.itervalues():
for frame in frames:
frame.name = unsymbolized_name
continue
def _SymbolizerCallback(sym_info, frames):
# Unwind inline chain to the top.
while sym_info.inlined_by:
sym_info = sym_info.inlined_by
symbolized_name = sym_info.name if sym_info.name else unsymbolized_name
for frame in frames:
frame.name = symbolized_name
symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path,
addr2line_path,
_SymbolizerCallback,
inlines=True)
_SubPrintf('Symbolizing {} PCs from {}...',
len(symfile.frames_by_address),
symfile.path)
for address, frames in symfile.frames_by_address.iteritems():
# SymbolizeAsync() asserts that the type of address is int. We operate
# on longs (since they are raw pointers possibly from 64-bit processes).
# It's OK to cast here because we're passing relative PC, which should
# always fit into int.
symbolizer.SymbolizeAsync(int(address), frames)
symbolizer.Join()
symbolized = True
return symbolized
def HaveFilesFromAndroid(symfiles):
return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles)
def RemapAndroidFiles(symfiles, output_path):
for symfile in symfiles:
match = ANDROID_PATH_MATCHER.match(symfile.path)
if match:
name = match.group('name')
symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name)
# Suffix used for backup files.
BACKUP_FILE_TAG = '.BACKUP'
def main():
parser = argparse.ArgumentParser()
parser.add_argument('file',
help='Trace file to symbolize (.json or .json.gz)')
parser.add_argument('--no-backup',
dest='backup', default='true', action='store_false',
help="Don't create {} files".format(BACKUP_FILE_TAG))
parser.add_argument('--output-directory',
help='The path to the build output directory, such ' +
'as out/Debug. Only needed for Android.')
options = parser.parse_args()
trace_file_path = options.file
def _OpenTraceFile(mode):
if trace_file_path.endswith('.gz'):
return gzip.open(trace_file_path, mode + 'b')
else:
return open(trace_file_path, mode + 't')
addr2line_path = FindInSystemPath('addr2line')
if addr2line_path is None:
sys.exit("Can't symbolize - no addr2line in PATH.")
print 'Reading trace file...'
with _OpenTraceFile('r') as trace_file:
trace = json.load(trace_file)
processes = CollectProcesses(trace)
symfiles = ResolveSymbolizableFiles(processes)
# Android trace files don't have any indication they are from Android.
# So we're checking for Android-specific paths.
if HaveFilesFromAndroid(symfiles):
if not options.output_directory:
parser.error('The trace file appears to be from Android. Please '
"specify output directory (e.g. 'out/Debug') to properly "
'symbolize it.')
RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory))
if SymbolizeFiles(symfiles, addr2line_path):
if options.backup:
backup_file_path = trace_file_path + BACKUP_FILE_TAG
print 'Backing up trace file to {}...'.format(backup_file_path)
os.rename(trace_file_path, backup_file_path)
print 'Updating trace file...'
with _OpenTraceFile('w') as trace_file:
json.dump(trace, trace_file)
else:
print 'No PCs symbolized - not updating trace file.'
if __name__ == '__main__':
main()