| #!/usr/bin/env python |
| # Copyright 2016 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import argparse |
| import bisect |
| import collections |
| import gzip |
| import json |
| import os |
| import re |
| import subprocess |
| import sys |
| |
| _SYMBOLS_PATH = os.path.abspath(os.path.join( |
| os.path.dirname(os.path.realpath(__file__)), |
| '..', |
| 'third_party', |
| 'symbols')) |
| sys.path.append(_SYMBOLS_PATH) |
| # pylint: disable=import-error |
| import symbols.elf_symbolizer as elf_symbolizer |
| |
| |
| # Relevant trace event phases from Chromium's |
| # src/base/trace_event/common/trace_event_common.h. |
| TRACE_EVENT_PHASE_METADATA = 'M' |
| TRACE_EVENT_PHASE_MEMORY_DUMP = 'v' |
| |
| |
| # Matches Android library paths, supports both K (/data/app-lib/<>/lib.so) |
| # as well as L+ (/data/app/<>/lib/<>/lib.so). Library name is available |
| # via 'name' group. |
| ANDROID_PATH_MATCHER = re.compile( |
| r'^/data/(?:app/[^/]+/lib/[^/]+/|app-lib/[^/]+/)(?P<name>.*\.so)') |
| |
| # Subpath of output path where unstripped libraries are stored. |
| ANDROID_UNSTRIPPED_SUBPATH = 'lib.unstripped' |
| |
| |
| def FindInSystemPath(binary_name): |
| paths = os.environ['PATH'].split(os.pathsep) |
| for path in paths: |
| binary_path = os.path.join(path, binary_name) |
| if os.path.isfile(binary_path): |
| return binary_path |
| return None |
| |
| |
| def IsSymbolizableFile(file_path): |
| result = subprocess.check_output(['file', '-0', file_path]) |
| type_string = result[result.find('\0') + 1:] |
| return bool(re.match(r'\: (ELF|Mach-O) (32|64)-bit\b', type_string)) |
| |
| |
| class ProcessMemoryMaps(object): |
| """Represents 'process_mmaps' trace file entry.""" |
| |
| class Region(object): |
| def __init__(self, start_address, size, file_path): |
| self._start_address = start_address |
| self._size = size |
| self._file_path = file_path |
| |
| @property |
| def start_address(self): |
| return self._start_address |
| |
| @property |
| def end_address(self): |
| return self._start_address + self._size |
| |
| @property |
| def size(self): |
| return self._size |
| |
| @property |
| def file_path(self): |
| return self._file_path |
| |
| def __cmp__(self, other): |
| if isinstance(other, type(self)): |
| return long(self._start_address).__cmp__(long(other._start_address)) |
| elif isinstance(other, (long, int)): |
| return long(self._start_address).__cmp__(long(other)) |
| else: |
| raise Exception('Cannot compare with %s' % type(other)) |
| |
| def __repr__(self): |
| return 'Region(0x{:X} - 0x{:X}, {})'.format( |
| self.start_address, self.end_address, self.file_path) |
| |
| def __init__(self, process_mmaps): |
| """Parses 'process_mmaps' dictionary.""" |
| |
| regions = [] |
| for region_value in process_mmaps['vm_regions']: |
| regions.append(self.Region( |
| long(region_value['sa'], 16), |
| long(region_value['sz'], 16), |
| region_value['mf'])) |
| regions.sort() |
| |
| # Copy regions without duplicates and check for overlaps. |
| self._regions = [] |
| previous_region = None |
| for region in regions: |
| if previous_region is not None: |
| if region == previous_region: |
| continue |
| assert region.start_address >= previous_region.end_address, \ |
| 'Regions {} and {} overlap.'.format(previous_region, region) |
| previous_region = region |
| self._regions.append(region) |
| |
| @property |
| def regions(self): |
| return self._regions |
| |
| def FindRegion(self, address): |
| """Finds region containing |address|. Returns None if none found.""" |
| |
| region_index = bisect.bisect_right(self._regions, address) - 1 |
| if region_index >= 0: |
| region = self._regions[region_index] |
| if address >= region.start_address and address < region.end_address: |
| return region |
| return None |
| |
| |
| class StackFrames(object): |
| """Represents 'stackFrames' trace file entry.""" |
| |
| class PCFrame(object): |
| def __init__(self, pc, frame): |
| self._modified = False |
| self._pc = pc |
| self._frame = frame |
| |
| @property |
| def modified(self): |
| return self._modified |
| |
| @property |
| def pc(self): |
| return self._pc |
| |
| @property |
| def name(self): |
| return self._frame['name'] |
| |
| @name.setter |
| def name(self, value): |
| self._modified = True |
| self._frame['name'] = value |
| |
| def __init__(self, stack_frames): |
| """Constructs object using 'stackFrames' dictionary.""" |
| self._pc_frames = [] |
| for frame in stack_frames.itervalues(): |
| pc_frame = self._ParsePCFrame(frame) |
| if pc_frame: |
| self._pc_frames.append(pc_frame) |
| |
| @property |
| def pc_frames(self): |
| return self._pc_frames |
| |
| @property |
| def modified(self): |
| return any(f.modified for f in self._pc_frames) |
| |
| _PC_TAG = 'pc:' |
| |
| @classmethod |
| def _ParsePCFrame(self, frame): |
| name = frame['name'] |
| if not name.startswith(self._PC_TAG): |
| return None |
| pc = long(name[len(self._PC_TAG):], 16) |
| return self.PCFrame(pc, frame) |
| |
| |
| class Process(object): |
| """Holds various bits of information about a process in a trace file.""" |
| |
| def __init__(self, pid): |
| self.pid = pid |
| self.name = None |
| self.mmaps = None |
| self.stack_frames = None |
| |
| |
| def CollectProcesses(trace): |
| """Parses trace dictionary and returns pid->Process map of all processes |
| suitable for symbolization (which have both mmaps and stack_frames). |
| """ |
| |
| process_map = {} |
| |
| # Android traces produced via 'chrome://inspect/?tracing#devices' are |
| # just list of events. |
| events = trace if isinstance(trace, list) else trace['traceEvents'] |
| for event in events: |
| name = event.get('name') |
| if not name: |
| continue |
| |
| pid = event['pid'] |
| process = process_map.get(pid) |
| if process is None: |
| process = Process(pid) |
| process_map[pid] = process |
| |
| phase = event['ph'] |
| if phase == TRACE_EVENT_PHASE_METADATA: |
| if name == 'process_name': |
| process.name = event['args']['name'] |
| elif name == 'stackFrames': |
| process.stack_frames = StackFrames(event['args']['stackFrames']) |
| elif phase == TRACE_EVENT_PHASE_MEMORY_DUMP: |
| process_mmaps = event['args']['dumps'].get('process_mmaps') |
| if process_mmaps: |
| # TODO(dskiba): this parses all process_mmaps, but retains only the |
| # last one. We need to parse only once (lazy parsing?). |
| process.mmaps = ProcessMemoryMaps(process_mmaps) |
| |
| return [p for p in process_map.itervalues() if p.mmaps and p.stack_frames] |
| |
| |
| class SymbolizableFile(object): |
| """Holds file path, addresses to symbolize and stack frames to update. |
| |
| This class is a link between ELFSymbolizer and a trace file: it specifies |
| what to symbolize (addresses) and what to update with the symbolization |
| result (frames). |
| """ |
| def __init__(self, file_path): |
| self.path = file_path |
| self.frames_by_address = collections.defaultdict(list) |
| |
| |
| def ResolveSymbolizableFiles(processes): |
| """Resolves and groups PCs into list of SymbolizableFiles. |
| |
| As part of the grouping process, this function resolves PC from each stack |
| frame to the corresponding mmap region. Stack frames that failed to resolve |
| are symbolized with '<unresolved>'. |
| """ |
| symfile_by_path = {} |
| for process in processes: |
| for frame in process.stack_frames.pc_frames: |
| region = process.mmaps.FindRegion(frame.pc) |
| if region is None: |
| frame.name = '<unresolved>' |
| continue |
| |
| symfile = symfile_by_path.get(region.file_path) |
| if symfile is None: |
| symfile = SymbolizableFile(region.file_path) |
| symfile_by_path[symfile.path] = symfile |
| |
| relative_pc = frame.pc - region.start_address |
| symfile.frames_by_address[relative_pc].append(frame) |
| return symfile_by_path.values() |
| |
| |
| def SymbolizeFiles(symfiles, addr2line_path): |
| """Symbolizes each file in the given list of SymbolizableFiles |
| and updates stack frames with symbolization results.""" |
| print 'Symbolizing...' |
| |
| def _SubPrintf(message, *args): |
| print (' ' + message).format(*args) |
| |
| symbolized = False |
| for symfile in symfiles: |
| unsymbolized_name = '<{}>'.format( |
| symfile.path if symfile.path else 'unnamed') |
| |
| problem = None |
| if not os.path.isabs(symfile.path): |
| problem = 'not a file' |
| elif not os.path.isfile(symfile.path): |
| problem = "file doesn't exist" |
| elif not IsSymbolizableFile(symfile.path): |
| problem = 'file is not symbolizable' |
| if problem: |
| _SubPrintf("Won't symbolize {} PCs for '{}': {}.", |
| len(symfile.frames_by_address), |
| symfile.path, |
| problem) |
| for frames in symfile.frames_by_address.itervalues(): |
| for frame in frames: |
| frame.name = unsymbolized_name |
| continue |
| |
| def _SymbolizerCallback(sym_info, frames): |
| # Unwind inline chain to the top. |
| while sym_info.inlined_by: |
| sym_info = sym_info.inlined_by |
| |
| symbolized_name = sym_info.name if sym_info.name else unsymbolized_name |
| for frame in frames: |
| frame.name = symbolized_name |
| |
| symbolizer = elf_symbolizer.ELFSymbolizer(symfile.path, |
| addr2line_path, |
| _SymbolizerCallback, |
| inlines=True) |
| |
| _SubPrintf('Symbolizing {} PCs from {}...', |
| len(symfile.frames_by_address), |
| symfile.path) |
| |
| for address, frames in symfile.frames_by_address.iteritems(): |
| # SymbolizeAsync() asserts that the type of address is int. We operate |
| # on longs (since they are raw pointers possibly from 64-bit processes). |
| # It's OK to cast here because we're passing relative PC, which should |
| # always fit into int. |
| symbolizer.SymbolizeAsync(int(address), frames) |
| |
| symbolizer.Join() |
| symbolized = True |
| |
| return symbolized |
| |
| |
| def HaveFilesFromAndroid(symfiles): |
| return any(ANDROID_PATH_MATCHER.match(f.path) for f in symfiles) |
| |
| |
| def RemapAndroidFiles(symfiles, output_path): |
| for symfile in symfiles: |
| match = ANDROID_PATH_MATCHER.match(symfile.path) |
| if match: |
| name = match.group('name') |
| symfile.path = os.path.join(output_path, ANDROID_UNSTRIPPED_SUBPATH, name) |
| |
| |
| # Suffix used for backup files. |
| BACKUP_FILE_TAG = '.BACKUP' |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument('file', |
| help='Trace file to symbolize (.json or .json.gz)') |
| parser.add_argument('--no-backup', |
| dest='backup', default='true', action='store_false', |
| help="Don't create {} files".format(BACKUP_FILE_TAG)) |
| parser.add_argument('--output-directory', |
| help='The path to the build output directory, such ' + |
| 'as out/Debug. Only needed for Android.') |
| options = parser.parse_args() |
| |
| trace_file_path = options.file |
| def _OpenTraceFile(mode): |
| if trace_file_path.endswith('.gz'): |
| return gzip.open(trace_file_path, mode + 'b') |
| else: |
| return open(trace_file_path, mode + 't') |
| |
| addr2line_path = FindInSystemPath('addr2line') |
| if addr2line_path is None: |
| sys.exit("Can't symbolize - no addr2line in PATH.") |
| |
| print 'Reading trace file...' |
| with _OpenTraceFile('r') as trace_file: |
| trace = json.load(trace_file) |
| |
| processes = CollectProcesses(trace) |
| symfiles = ResolveSymbolizableFiles(processes) |
| |
| # Android trace files don't have any indication they are from Android. |
| # So we're checking for Android-specific paths. |
| if HaveFilesFromAndroid(symfiles): |
| if not options.output_directory: |
| parser.error('The trace file appears to be from Android. Please ' |
| "specify output directory (e.g. 'out/Debug') to properly " |
| 'symbolize it.') |
| RemapAndroidFiles(symfiles, os.path.abspath(options.output_directory)) |
| |
| if SymbolizeFiles(symfiles, addr2line_path): |
| if options.backup: |
| backup_file_path = trace_file_path + BACKUP_FILE_TAG |
| print 'Backing up trace file to {}...'.format(backup_file_path) |
| os.rename(trace_file_path, backup_file_path) |
| |
| print 'Updating trace file...' |
| with _OpenTraceFile('w') as trace_file: |
| json.dump(trace, trace_file) |
| else: |
| print 'No PCs symbolized - not updating trace file.' |
| |
| |
| if __name__ == '__main__': |
| main() |