scripts/disassemble_tombstone.py - platform/development.git - Git at Google

 #!/usr/bin/python

 """Disassemble the code stored in a tombstone.

 The classes in this module use an interface, ProcessLine, so that they can be
 chained together to do arbitrary procerssing. The current classes support
 disassembling the bytes embedded in tombstones and printing output to stdout.
 """


 import re
 import subprocess
 import sys
 import tempfile
 import architecture


 STANDARD_PROLOGUE = """
        .type   _start, %function
        .globl  _start
 _start:
 """


 THUMB_PROLOGUE = STANDARD_PROLOGUE + """
        .code   16
        .thumb_func
        .type   thumb_start, %function
 thumb_start:
 """


 def Disassemble(line_generator):
   abi_line = re.compile("(ABI: \'(.*)\')")
   abi = None
   tools = None
   # Process global headers
   for line in line_generator:
     yield line
     abi_header = abi_line.search(line)
     if abi_header:
       abi = abi_header.group(2)
       # Look up the tools here so we don't do a lookup for each code block.
       tools = architecture.Architecture(abi)
       break
   # The rest of the file consists of:
   #   o Lines that should pass through unchanged
   #   o Blocks of register values, which follow a 'pid: ...' line and end with
   #     'backtrace:' line
   #   o Blocks of code represented as words, which start with 'code around ...'
   #     and end with a line that doesn't look like a list of words.
   #
   # The only constraint on the ordering of these blocks is that the register
   # values must come before the first code block.
   #
   # It's easiest to nest register processing in the codeblock search loop.
   register_list_re = re.compile('^pid: ')
   codeblock_re = re.compile('^code around ([a-z0-9]+)|memory near (pc)')
   register_text = {}
   for line in line_generator:
     yield line
     if register_list_re.search(line):
       register_text = {}
       for output in ProcessRegisterList(line_generator, register_text):
         yield output
     code_match = codeblock_re.search(line)
     if code_match:
       code_reg = ''.join(code_match.groups(''))
       for output in ProcessCodeBlock(
           abi, tools, code_reg, register_text, line_generator):
         yield output


 def ProcessRegisterList(line_generator, rval):
   for line in line_generator:
     yield line
     if line.startswith('backtrace:'):
       return
     # The register list is indented and consists of alternating name, value
     # pairs.
     if line.startswith(' '):
       words = line.split()
       assert len(words) % 2 == 0
       for index in range(0, len(words), 2):
         rval[words[index]] = words[index + 1]


 def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator):
   program_counter = register_text[register_name]
   program_counter_val = int(program_counter, 16)
   scratch_file = tempfile.NamedTemporaryFile(suffix='.s')
   # ARM code comes in two flavors: arm and thumb. Figure out the one
   # to use by peeking in the cpsr.
   if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20:
     scratch_file.write(THUMB_PROLOGUE)
   else:
     scratch_file.write(STANDARD_PROLOGUE)
   # Retains the hexadecimal text for the start of the block
   start_address = None
   # Maintains a numeric counter for the address of the current byte
   current_address = None
   # Handle the 3 differnt file formats that we've observerd.
   if len(program_counter) == 8:
     block_line_len = [67]
     block_num_words = 4
   else:
     assert len(program_counter) == 16
     block_line_len = [57, 73]
     block_num_words = 2
   # Now generate assembly from the bytes in the code block.
   for line in line_generator:
     words = line.split()
     # Be conservative and stop interpreting if the line length is wrong
     # We can't count words because spaces can appear in the text representation
     # of the memory.
     if len(line) not in block_line_len:
       break
     # Double check the address at the start of each line
     if current_address is None:
       start_address = words[0]
       current_address = int(start_address, 16)
     else:
       assert current_address == int(words[0], 16)
     for word in words[1:block_num_words+1]:
       # Handle byte swapping
       for byte in tools.WordToBytes(word):
         # Emit a label at the desired program counter.
         # This will cause the disassembler to resynchronize at this point,
         # allowing us to position the arrow and also ensuring that we decode
         # the instruction properly.
         if current_address == program_counter_val:
           scratch_file.write('program_counter_was_here:\n')
         scratch_file.write('  .byte 0x%s\n' % byte)
         current_address += 1
   scratch_file.flush()
   # Assemble the scratch file and relocate it to the block address with the
   # linker.
   object_file = tempfile.NamedTemporaryFile(suffix='.o')
   subprocess.check_call(tools.Assemble([
       '-o', object_file.name, scratch_file.name]))
   scratch_file.close()

   # Work around ARM data tagging: rename $d to $t.
   if abi.startswith('arm'):
     subprocess.check_call(
         ['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name])

   linked_file = tempfile.NamedTemporaryFile(suffix='.o')
   cmd = tools.Link([
       '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name])
   subprocess.check_call(cmd)
   object_file.close()
   disassembler = subprocess.Popen(tools.Disassemble([
       '-S', linked_file.name]), stdout=subprocess.PIPE)
   # Skip some of the annoying assembler headers.
   emit = False
   start_pattern = start_address + ' '
   # objdump padding varies between 32 bit and 64 bit architectures
   arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val)
   for line in disassembler.stdout:
     emit = emit or line.startswith(start_pattern)
     if emit and len(line) > 1 and line.find('program_counter_was_here') == -1:
       if arrow_pattern.search(line):
         yield '--->' + line
       else:
         yield '    ' + line
   linked_file.close()
   yield '\n'


 def main(argv):
   for fn in argv[1:]:
     for line in Disassemble(open(fn, 'r')):
       print line,


 if __name__ == '__main__':
   main(sys.argv)
	#!/usr/bin/python

	"""Disassemble the code stored in a tombstone.

	The classes in this module use an interface, ProcessLine, so that they can be
	chained together to do arbitrary procerssing. The current classes support
	disassembling the bytes embedded in tombstones and printing output to stdout.
	"""


	import re
	import subprocess
	import sys
	import tempfile
	import architecture


	STANDARD_PROLOGUE = """
	.type _start, %function
	.globl _start
	_start:
	"""


	THUMB_PROLOGUE = STANDARD_PROLOGUE + """
	.code 16
	.thumb_func
	.type thumb_start, %function
	thumb_start:
	"""


	def Disassemble(line_generator):
	abi_line = re.compile("(ABI: \'(.*)\')")
	abi = None
	tools = None
	# Process global headers
	for line in line_generator:
	yield line
	abi_header = abi_line.search(line)
	if abi_header:
	abi = abi_header.group(2)
	# Look up the tools here so we don't do a lookup for each code block.
	tools = architecture.Architecture(abi)
	break
	# The rest of the file consists of:
	# o Lines that should pass through unchanged
	# o Blocks of register values, which follow a 'pid: ...' line and end with
	# 'backtrace:' line
	# o Blocks of code represented as words, which start with 'code around ...'
	# and end with a line that doesn't look like a list of words.
	#
	# The only constraint on the ordering of these blocks is that the register
	# values must come before the first code block.
	#
	# It's easiest to nest register processing in the codeblock search loop.
	register_list_re = re.compile('^pid: ')
	codeblock_re = re.compile('^code around ([a-z0-9]+)\|memory near (pc)')
	register_text = {}
	for line in line_generator:
	yield line
	if register_list_re.search(line):
	register_text = {}
	for output in ProcessRegisterList(line_generator, register_text):
	yield output
	code_match = codeblock_re.search(line)
	if code_match:
	code_reg = ''.join(code_match.groups(''))
	for output in ProcessCodeBlock(
	abi, tools, code_reg, register_text, line_generator):
	yield output


	def ProcessRegisterList(line_generator, rval):
	for line in line_generator:
	yield line
	if line.startswith('backtrace:'):
	return
	# The register list is indented and consists of alternating name, value
	# pairs.
	if line.startswith(' '):
	words = line.split()
	assert len(words) % 2 == 0
	for index in range(0, len(words), 2):
	rval[words[index]] = words[index + 1]


	def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator):
	program_counter = register_text[register_name]
	program_counter_val = int(program_counter, 16)
	scratch_file = tempfile.NamedTemporaryFile(suffix='.s')
	# ARM code comes in two flavors: arm and thumb. Figure out the one
	# to use by peeking in the cpsr.
	if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20:
	scratch_file.write(THUMB_PROLOGUE)
	else:
	scratch_file.write(STANDARD_PROLOGUE)
	# Retains the hexadecimal text for the start of the block
	start_address = None
	# Maintains a numeric counter for the address of the current byte
	current_address = None
	# Handle the 3 differnt file formats that we've observerd.
	if len(program_counter) == 8:
	block_line_len = [67]
	block_num_words = 4
	else:
	assert len(program_counter) == 16
	block_line_len = [57, 73]
	block_num_words = 2
	# Now generate assembly from the bytes in the code block.
	for line in line_generator:
	words = line.split()
	# Be conservative and stop interpreting if the line length is wrong
	# We can't count words because spaces can appear in the text representation
	# of the memory.
	if len(line) not in block_line_len:
	break
	# Double check the address at the start of each line
	if current_address is None:
	start_address = words[0]
	current_address = int(start_address, 16)
	else:
	assert current_address == int(words[0], 16)
	for word in words[1:block_num_words+1]:
	# Handle byte swapping
	for byte in tools.WordToBytes(word):
	# Emit a label at the desired program counter.
	# This will cause the disassembler to resynchronize at this point,
	# allowing us to position the arrow and also ensuring that we decode
	# the instruction properly.
	if current_address == program_counter_val:
	scratch_file.write('program_counter_was_here:\n')
	scratch_file.write(' .byte 0x%s\n' % byte)
	current_address += 1
	scratch_file.flush()
	# Assemble the scratch file and relocate it to the block address with the
	# linker.
	object_file = tempfile.NamedTemporaryFile(suffix='.o')
	subprocess.check_call(tools.Assemble([
	'-o', object_file.name, scratch_file.name]))
	scratch_file.close()

	# Work around ARM data tagging: rename $d to $t.
	if abi.startswith('arm'):
	subprocess.check_call(
	['sed', '-i', '-e', "s/\\x00\\x24\\x64\\x00/\\x00\\x24\\x71\\x00/", object_file.name])

	linked_file = tempfile.NamedTemporaryFile(suffix='.o')
	cmd = tools.Link([
	'-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name])
	subprocess.check_call(cmd)
	object_file.close()
	disassembler = subprocess.Popen(tools.Disassemble([
	'-S', linked_file.name]), stdout=subprocess.PIPE)
	# Skip some of the annoying assembler headers.
	emit = False
	start_pattern = start_address + ' '
	# objdump padding varies between 32 bit and 64 bit architectures
	arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val)
	for line in disassembler.stdout:
	emit = emit or line.startswith(start_pattern)
	if emit and len(line) > 1 and line.find('program_counter_was_here') == -1:
	if arrow_pattern.search(line):
	yield '--->' + line
	else:
	yield ' ' + line
	linked_file.close()
	yield '\n'


	def main(argv):
	for fn in argv[1:]:
	for line in Disassemble(open(fn, 'r')):
	print line,


	if __name__ == '__main__':
	main(sys.argv)