| #!/usr/bin/env python |
| #===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# |
| # |
| # The LLVM Compiler Infrastructure |
| # |
| # This file is distributed under the University of Illinois Open Source |
| # License. See LICENSE.TXT for details. |
| # |
| #===------------------------------------------------------------------------===# |
| import bisect |
| import os |
| import re |
| import sys |
| import subprocess |
| |
| symbolizers = {} |
| filetypes = {} |
| vmaddrs = {} |
| DEBUG = False |
| |
| |
| def fix_filename(file_name): |
| for path_to_cut in sys.argv[1:]: |
| file_name = re.sub(".*" + path_to_cut, "", file_name) |
| file_name = re.sub(".*asan_[a-z_]*.cc:[0-9]*", "_asan_rtl_", file_name) |
| file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) |
| return file_name |
| |
| |
| class Symbolizer(object): |
| def __init__(self): |
| pass |
| |
| |
| class LinuxSymbolizer(Symbolizer): |
| def __init__(self, binary): |
| super(LinuxSymbolizer, self).__init__() |
| self.binary = binary |
| self.pipe = self.open_addr2line() |
| def open_addr2line(self): |
| cmd = ["addr2line", "-f", "-e", self.binary] |
| if DEBUG: |
| print ' '.join(cmd) |
| return subprocess.Popen(cmd, |
| stdin=subprocess.PIPE, stdout=subprocess.PIPE) |
| def symbolize(self, prefix, addr, offset): |
| try: |
| print >> self.pipe.stdin, offset |
| function_name = self.pipe.stdout.readline().rstrip() |
| file_name = self.pipe.stdout.readline().rstrip() |
| except Exception: |
| function_name = "" |
| file_name = "" |
| file_name = fix_filename(file_name) |
| return "%s%s in %s %s" % (prefix, addr, function_name, file_name) |
| |
| |
| class DarwinSymbolizer(Symbolizer): |
| def __init__(self, addr, binary): |
| super(DarwinSymbolizer, self).__init__() |
| self.binary = binary |
| # Guess which arch we're running. 10 = len("0x") + 8 hex digits. |
| if len(addr) > 10: |
| self.arch = "x86_64" |
| else: |
| self.arch = "i386" |
| self.vmaddr = None |
| self.pipe = None |
| def get_binary_vmaddr(self): |
| """ |
| Get the slide value to be added to the address. |
| We're ooking for the following piece in otool -l output: |
| Load command 0 |
| cmd LC_SEGMENT |
| cmdsize 736 |
| segname __TEXT |
| vmaddr 0x00000000 |
| """ |
| if self.vmaddr: |
| return self.vmaddr |
| cmdline = ["otool", "-l", self.binary] |
| pipe = subprocess.Popen(cmdline, |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE) |
| is_text = False |
| vmaddr = 0 |
| for line in pipe.stdout.readlines(): |
| line = line.strip() |
| if line.startswith('segname'): |
| is_text = (line == 'segname __TEXT') |
| continue |
| if line.startswith('vmaddr') and is_text: |
| sv = line.split(' ') |
| vmaddr = int(sv[-1], 16) |
| break |
| self.vmaddr = vmaddr |
| return self.vmaddr |
| def write_addr_to_pipe(self, offset): |
| slide = self.get_binary_vmaddr() |
| print >> self.pipe.stdin, "0x%x" % (int(offset, 16) + slide) |
| def open_atos(self): |
| if DEBUG: |
| print "atos -o %s -arch %s" % (self.binary, self.arch) |
| cmdline = ["atos", "-o", self.binary, "-arch", self.arch] |
| self.pipe = subprocess.Popen(cmdline, |
| stdin=subprocess.PIPE, |
| stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| def symbolize(self, prefix, addr, offset): |
| self.open_atos() |
| self.write_addr_to_pipe(offset) |
| self.pipe.stdin.close() |
| atos_line = self.pipe.stdout.readline().rstrip() |
| # A well-formed atos response looks like this: |
| # foo(type1, type2) (in object.name) (filename.cc:80) |
| match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) |
| if DEBUG: |
| print "atos_line: ", atos_line |
| if match: |
| function_name = match.group(1) |
| function_name = re.sub("\(.*?\)", "", function_name) |
| file_name = fix_filename(match.group(3)) |
| return "%s%s in %s %s" % (prefix, addr, function_name, file_name) |
| else: |
| return "%s%s in %s" % (prefix, addr, atos_line) |
| |
| |
| # Chain two symbolizers so that the second one is called if the first fails. |
| class ChainSymbolizer(Symbolizer): |
| def __init__(self, symbolizer1, symbolizer2): |
| super(ChainSymbolizer, self).__init__() |
| self.symbolizer1 = symbolizer1 |
| self.symbolizer2 = symbolizer2 |
| def symbolize(self, prefix, addr, offset): |
| result = self.symbolizer1.symbolize(prefix, addr, offset) |
| if result is None: |
| result = self.symbolizer2.symbolize(prefix, addr, offset) |
| return result |
| |
| |
| def BreakpadSymbolizerFactory(addr, binary): |
| suffix = os.getenv("BREAKPAD_SUFFIX") |
| if suffix: |
| filename = binary + suffix |
| if os.access(filename, os.F_OK): |
| return BreakpadSymbolizer(filename) |
| return None |
| |
| |
| def SystemSymbolizerFactory(system, addr, binary): |
| if system == 'Darwin': |
| return DarwinSymbolizer(addr, binary) |
| elif system == 'Linux': |
| return LinuxSymbolizer(binary) |
| |
| |
| class BreakpadSymbolizer(Symbolizer): |
| def __init__(self, filename): |
| super(BreakpadSymbolizer, self).__init__() |
| self.filename = filename |
| lines = file(filename).readlines() |
| self.files = [] |
| self.symbols = {} |
| self.address_list = [] |
| self.addresses = {} |
| # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t |
| fragments = lines[0].rstrip().split() |
| self.arch = fragments[2] |
| self.debug_id = fragments[3] |
| self.binary = ' '.join(fragments[4:]) |
| self.parse_lines(lines[1:]) |
| def parse_lines(self, lines): |
| cur_function_addr = '' |
| for line in lines: |
| fragments = line.split() |
| if fragments[0] == 'FILE': |
| assert int(fragments[1]) == len(self.files) |
| self.files.append(' '.join(fragments[2:])) |
| elif fragments[0] == 'PUBLIC': |
| self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) |
| elif fragments[0] in ['CFI', 'STACK']: |
| pass |
| elif fragments[0] == 'FUNC': |
| cur_function_addr = int(fragments[1], 16) |
| if not cur_function_addr in self.symbols.keys(): |
| self.symbols[cur_function_addr] = ' '.join(fragments[4:]) |
| else: |
| # Line starting with an address. |
| addr = int(fragments[0], 16) |
| self.address_list.append(addr) |
| # Tuple of symbol address, size, line, file number. |
| self.addresses[addr] = (cur_function_addr, |
| int(fragments[1], 16), |
| int(fragments[2]), |
| int(fragments[3])) |
| self.address_list.sort() |
| def get_sym_file_line(self, addr): |
| key = None |
| if addr in self.addresses.keys(): |
| key = addr |
| else: |
| index = bisect.bisect_left(self.address_list, addr) |
| if index == 0: |
| return None |
| else: |
| key = self.address_list[index - 1] |
| sym_id, size, line_no, file_no = self.addresses[key] |
| symbol = self.symbols[sym_id] |
| filename = self.files[file_no] |
| if addr < key + size: |
| return symbol, filename, line_no |
| else: |
| return None |
| def symbolize(self, prefix, addr, offset): |
| res = self.get_sym_file_line(int(offset, 16)) |
| if res: |
| function_name, file_name, line_no = res |
| result = "%s%s in %s %s:%d" % ( |
| prefix, addr, function_name, file_name, line_no) |
| print result |
| return result |
| else: |
| return None |
| |
| |
| def symbolize_line(system, line): |
| #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) |
| match = re.match('^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)', |
| line) |
| if match: |
| if DEBUG: |
| print line |
| prefix = match.group(1) |
| # frameno = match.group(2) |
| addr = match.group(3) |
| binary = match.group(4) |
| offset = match.group(5) |
| if not symbolizers.has_key(binary): |
| p = BreakpadSymbolizerFactory(addr, binary) |
| if p: |
| symbolizers[binary] = p |
| else: |
| symbolizers[binary] = SystemSymbolizerFactory(system, addr, binary) |
| result = symbolizers[binary].symbolize(prefix, addr, offset) |
| if result is None: |
| symbolizers[binary] = ChainSymbolizer(symbolizers[binary], |
| SystemSymbolizerFactory(system, addr, binary)) |
| return symbolizers[binary].symbolize(prefix, addr, offset) |
| else: |
| return line |
| |
| |
| def main(): |
| system = os.uname()[0] |
| if system in ['Linux', 'Darwin']: |
| for line in sys.stdin: |
| line = symbolize_line(system, line) |
| print line.rstrip() |
| else: |
| print 'Unknown system: ', system |
| |
| |
| if __name__ == '__main__': |
| main() |