|  | #!/usr/bin/python | 
|  |  | 
|  | """Disassemble the code stored in a tombstone. | 
|  |  | 
|  | The classes in this module use an interface, ProcessLine, so that they can be | 
|  | chained together to do arbitrary procerssing. The current classes support | 
|  | disassembling the bytes embedded in tombstones and printing output to stdout. | 
|  | """ | 
|  |  | 
|  |  | 
|  | import re | 
|  | import subprocess | 
|  | import sys | 
|  | import tempfile | 
|  | import architecture | 
|  |  | 
|  |  | 
|  | STANDARD_PROLOGUE = """ | 
|  | .type   _start, %function | 
|  | .globl  _start | 
|  | _start: | 
|  | """ | 
|  |  | 
|  |  | 
|  | THUMB_PROLOGUE = STANDARD_PROLOGUE + """ | 
|  | .code   16 | 
|  | .thumb_func | 
|  | .type   thumb_start, %function | 
|  | thumb_start: | 
|  | """ | 
|  |  | 
|  |  | 
|  | def Disassemble(line_generator): | 
|  | abi_line = re.compile("(ABI: \'(.*)\')") | 
|  | abi = None | 
|  | tools = None | 
|  | # Process global headers | 
|  | for line in line_generator: | 
|  | yield line | 
|  | abi_header = abi_line.search(line) | 
|  | if abi_header: | 
|  | abi = abi_header.group(2) | 
|  | # Look up the tools here so we don't do a lookup for each code block. | 
|  | tools = architecture.Architecture(abi) | 
|  | break | 
|  | # The rest of the file consists of: | 
|  | #   o Lines that should pass through unchanged | 
|  | #   o Blocks of register values, which follow a 'pid: ...' line and end with | 
|  | #     'backtrace:' line | 
|  | #   o Blocks of code represented as words, which start with 'code around ...' | 
|  | #     and end with a line that doesn't look like a list of words. | 
|  | # | 
|  | # The only constraint on the ordering of these blocks is that the register | 
|  | # values must come before the first code block. | 
|  | # | 
|  | # It's easiest to nest register processing in the codeblock search loop. | 
|  | register_list_re = re.compile('^pid: ') | 
|  | codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)') | 
|  | register_text = {} | 
|  | for line in line_generator: | 
|  | yield line | 
|  | if register_list_re.search(line): | 
|  | register_text = {} | 
|  | for output in ProcessRegisterList(line_generator, register_text): | 
|  | yield output | 
|  | code_match = codeblock_re.search(line) | 
|  | if code_match: | 
|  | code_reg = ''.join(code_match.groups('')) | 
|  | for output in ProcessCodeBlock( | 
|  | abi, tools, code_reg, register_text, line_generator): | 
|  | yield output | 
|  |  | 
|  |  | 
|  | def ProcessRegisterList(line_generator, rval): | 
|  | for line in line_generator: | 
|  | yield line | 
|  | if line.startswith('backtrace:'): | 
|  | return | 
|  | # The register list is indented and consists of alternating name, value | 
|  | # pairs. | 
|  | if line.startswith(' '): | 
|  | words = line.split() | 
|  | assert len(words) % 2 == 0 | 
|  | for index in range(0, len(words), 2): | 
|  | rval[words[index]] = words[index + 1] | 
|  |  | 
|  |  | 
|  | def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator): | 
|  | program_counter = register_text[register_name] | 
|  | program_counter_val = int(program_counter, 16) | 
|  | scratch_file = tempfile.NamedTemporaryFile(suffix='.s') | 
|  | # ARM code comes in two flavors: arm and thumb. Figure out the one | 
|  | # to use by peeking in the cpsr. | 
|  | if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20: | 
|  | scratch_file.write(THUMB_PROLOGUE) | 
|  | else: | 
|  | scratch_file.write(STANDARD_PROLOGUE) | 
|  | # Retains the hexadecimal text for the start of the block | 
|  | start_address = None | 
|  | # Maintains a numeric counter for the address of the current byte | 
|  | current_address = None | 
|  | # Handle the 3 differnt file formats that we've observerd. | 
|  | if len(program_counter) == 8: | 
|  | block_line_len = [67] | 
|  | block_num_words = 4 | 
|  | else: | 
|  | assert len(program_counter) == 16 | 
|  | block_line_len = [57, 73] | 
|  | block_num_words = 2 | 
|  | # Now generate assembly from the bytes in the code block. | 
|  | for line in line_generator: | 
|  | words = line.split() | 
|  | # Be conservative and stop interpreting if the line length is wrong | 
|  | # We can't count words because spaces can appear in the text representation | 
|  | # of the memory. | 
|  | if len(line) not in block_line_len: | 
|  | break | 
|  | # Double check the address at the start of each line | 
|  | if current_address is None: | 
|  | start_address = words[0] | 
|  | current_address = int(start_address, 16) | 
|  | else: | 
|  | assert current_address == int(words[0], 16) | 
|  | for word in words[1:block_num_words+1]: | 
|  | # Handle byte swapping | 
|  | for byte in tools.WordToBytes(word): | 
|  | # Emit a label at the desired program counter. | 
|  | # This will cause the disassembler to resynchronize at this point, | 
|  | # allowing us to position the arrow and also ensuring that we decode | 
|  | # the instruction properly. | 
|  | if current_address == program_counter_val: | 
|  | scratch_file.write('program_counter_was_here:\n') | 
|  | scratch_file.write('  .byte 0x%s\n' % byte) | 
|  | current_address += 1 | 
|  | scratch_file.flush() | 
|  | # Assemble the scratch file and relocate it to the block address with the | 
|  | # linker. | 
|  | object_file = tempfile.NamedTemporaryFile(suffix='.o') | 
|  | subprocess.check_call(tools.Assemble([ | 
|  | '-o', object_file.name, scratch_file.name])) | 
|  | scratch_file.close() | 
|  |  | 
|  | # Work around ARM data tagging: rename $d to $t. | 
|  | if abi.startswith('arm'): | 
|  | subprocess.check_call( | 
|  | ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name]) | 
|  |  | 
|  | linked_file = tempfile.NamedTemporaryFile(suffix='.o') | 
|  | cmd = tools.Link([ | 
|  | '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name]) | 
|  | subprocess.check_call(cmd) | 
|  | object_file.close() | 
|  | disassembler = subprocess.Popen(tools.Disassemble([ | 
|  | '-S', linked_file.name]), stdout=subprocess.PIPE) | 
|  | # Skip some of the annoying assembler headers. | 
|  | emit = False | 
|  | start_pattern = start_address + ' ' | 
|  | # objdump padding varies between 32 bit and 64 bit architectures | 
|  | arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val) | 
|  | for line in disassembler.stdout: | 
|  | emit = emit or line.startswith(start_pattern) | 
|  | if emit and len(line) > 1 and line.find('program_counter_was_here') == -1: | 
|  | if arrow_pattern.search(line): | 
|  | yield '--->' + line | 
|  | else: | 
|  | yield '    ' + line | 
|  | linked_file.close() | 
|  | yield '\n' | 
|  |  | 
|  |  | 
|  | def main(argv): | 
|  | for fn in argv[1:]: | 
|  | for line in Disassemble(open(fn, 'r')): | 
|  | print line, | 
|  |  | 
|  |  | 
|  | if __name__ == '__main__': | 
|  | main(sys.argv) |