| # Copyright 2017 syzkaller project authors. All rights reserved. |
| # Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. |
| |
| ''' |
| This module provides classes which implement AST traversal in order to extract |
| items belonging to a struct. |
| ''' |
| |
| import collections |
| import logging |
| |
| from pycparser import c_ast |
| from header_preprocessor import HeaderFilePreprocessor |
| |
| |
| class StructWalkerException(Exception): |
| pass |
| |
| |
| class StructWalker(c_ast.NodeVisitor): |
| ''' |
| Given an ast obtained by parsing a header file, return a hierarchy |
| dictionary. The ast is expected to be of type pycparser.c_ast.FileAST. |
| |
| Usage : |
| |
| >>> import tempfile |
| >>> t = tempfile.NamedTemporaryFile() |
| >>> contents = """ |
| ... #define STRUCT_SIZE 1337 |
| ... struct ARRAY_OF_POINTERS_CONTAINER { |
| ... unsigned int *ptr[10]; |
| ... int **n; |
| ... }; |
| ... struct ARRAY_CONTAINER { |
| ... int g[10]; |
| ... int h[20][30]; |
| ... }; |
| ... struct REGULAR_STRUCT { |
| ... int x; |
| ... char *y; |
| ... void *ptr; |
| ... }; |
| ... struct STRUCT_WITH_STRUCT_PTR { |
| ... struct REGULAR_STRUCT *struct_ptr; |
| ... int z; |
| ... }; |
| ... struct STRUCT_WITH_STRUCT_INST { |
| ... struct REGULAR_STRUCT regular_struct_inst; |
| ... int a; |
| ... }; |
| ... struct STRUCT_WITH_STRUCT_ARRAY { |
| ... struct REGULAR_STRUCT regular_struct_array[100]; |
| ... int b; |
| ... }; |
| ... struct STRUCT_WITH_ANONYMOUS_STRUCT { |
| ... struct { |
| ... int g; |
| ... int h; |
| ... int i; |
| ... } anonymous_struct; |
| ... }; |
| ... struct STRUCT_WITH_ANONYMOUS_UNION { |
| ... union { |
| ... int t; |
| ... char r[100]; |
| ... } anonymous_union; |
| ... }; |
| ... struct STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO { |
| ... struct REGULAR_STRUCT regular_struct_array[STRUCT_SIZE]; |
| ... }; |
| ... struct STRUCT_WITH_2D_ARRAY_INST { |
| ... struct REGULAR_STRUCT regular_struct_array_2D[10][10]; |
| ... }; |
| ... struct NESTED_ANONYMOUS_STRUCT { |
| ... struct { |
| ... int x; |
| ... struct { |
| ... int y; |
| ... int z; |
| ... } level_2; |
| ... } level_1; |
| ... }; |
| ... """ |
| >>> t.write(contents) ; t.flush() |
| >>> struct_walker = StructWalker(filenames=[t.name]) |
| >>> local_hierarchy = struct_walker.generate_local_hierarchy() |
| >>> for k in local_hierarchy: |
| ... print k |
| ... print local_hierarchy[k] |
| ARRAY_OF_POINTERS_CONTAINER |
| [('unsigned int*[10]', 'ptr'), ('int**', 'n')] |
| STRUCT_WITH_STRUCT_ARRAY_SIZE_MACRO |
| [('struct REGULAR_STRUCT[1337]', 'regular_struct_array')] |
| STRUCT_WITH_2D_ARRAY_INST |
| [('struct REGULAR_STRUCT[10][10]', 'regular_struct_array_2D')] |
| STRUCT_WITH_STRUCT_ARRAY |
| [('struct REGULAR_STRUCT[100]', 'regular_struct_array'), ('int', 'b')] |
| NESTED_ANONYMOUS_STRUCT |
| [('int', 'level_1.x'), ('int', 'level_1.level_2.y'), ('int', 'level_1.level_2.z')] |
| STRUCT_WITH_ANONYMOUS_STRUCT |
| [('int', 'anonymous_struct.g'), ('int', 'anonymous_struct.h'), ('int', 'anonymous_struct.i')] |
| STRUCT_WITH_ANONYMOUS_UNION |
| [('int', 'anonymous_union.t'), ('char[100]', 'anonymous_union.r')] |
| STRUCT_WITH_STRUCT_INST |
| [('struct REGULAR_STRUCT', 'regular_struct_inst'), ('int', 'a')] |
| ARRAY_CONTAINER |
| [('int[10]', 'g'), ('int[20][30]', 'h')] |
| REGULAR_STRUCT |
| [('int', 'x'), ('char*', 'y'), ('void*', 'ptr')] |
| STRUCT_WITH_STRUCT_PTR |
| [('struct REGULAR_STRUCT*', 'struct_ptr'), ('int', 'z')] |
| ''' |
| |
| def __init__(self, ast=None, filenames=[], include_lines='', loglvl=logging.INFO): |
| super(StructWalker, self).__init__() |
| self.ast = ast |
| self.filenames = filenames |
| |
| if not filenames and not ast: |
| raise StructWalkerException('Specify either "filename" or "ast" to create' |
| 'StructParser object') |
| |
| if not self.ast: |
| self.ast = HeaderFilePreprocessor(self.filenames, include_lines=include_lines, |
| loglvl=loglvl).get_ast() |
| |
| self.include_lines = include_lines |
| self.local_structs_hierarchy = {} |
| self._setuplogging(loglvl) |
| |
| def _setuplogging(self, loglvl): |
| self.logger = logging.getLogger(self.__class__.__name__) |
| formatter = logging.Formatter('DEBUG:%(name)s:%(message)s') |
| sh = logging.StreamHandler() |
| sh.setFormatter(formatter) |
| sh.setLevel(loglvl) |
| self.logger.addHandler(sh) |
| self.logger.setLevel(loglvl) |
| |
| def _format_item(self, processed_item): |
| fmt_type = processed_item['type'] |
| fmt_type = ' '.join(fmt_type) |
| |
| self.logger.debug('_format_item : %s', processed_item) |
| |
| if 'is_ptr' in processed_item and 'is_fnptr' not in processed_item: |
| fmt_type = '%s%s' % (fmt_type, '*' * processed_item['is_ptr']) |
| |
| if 'is_array' in processed_item and 'array_size' in processed_item: |
| size_str = str(processed_item['array_size']).replace(', ', '][') |
| fmt_type = '%s%s' % (fmt_type, size_str) |
| |
| fmt_identifier = processed_item['identifier'] |
| |
| return [(fmt_type, fmt_identifier)] |
| |
| def _recursive_process_item(self, item_ast, processed_item, parent): |
| self.logger.debug('--- _recursive_process_item : %s', type(item_ast)) |
| if isinstance(item_ast, c_ast.Decl): |
| processed_item['identifier'] = item_ast.name |
| return self._recursive_process_item(item_ast.type, processed_item, item_ast) |
| |
| elif isinstance(item_ast, c_ast.TypeDecl): |
| return self._recursive_process_item(item_ast.type, processed_item, item_ast) |
| |
| elif isinstance(item_ast, c_ast.IdentifierType): |
| if len(item_ast.names) > 0: |
| processed_item['type'] = item_ast.names |
| return self._format_item(processed_item) |
| |
| elif (isinstance(item_ast, c_ast.Struct) or |
| isinstance(item_ast, c_ast.Union)): |
| if not item_ast.name: |
| nodename, _items_list = self._traverse_ast(item_ast, toplevel=False) |
| try: |
| items_list = [(i[0], '%s.%s' % (parent.declname, i[1])) for i in _items_list] |
| except AttributeError as e: |
| self.logger.info('-- Encountered anonymous_struct/anonymous_union with no name') |
| raise StructWalkerException('Encountered anonymous_struct/anonymous_union with no name') |
| |
| return items_list |
| else: |
| processed_item['type'] = ['struct %s' % (item_ast.name)] |
| return self._format_item(processed_item) |
| |
| elif isinstance(item_ast, c_ast.PtrDecl): |
| if 'is_ptr' not in processed_item: |
| processed_item['is_ptr'] = 0 |
| processed_item['is_ptr'] = processed_item['is_ptr'] + 1 |
| return self._recursive_process_item(item_ast.type, processed_item, item_ast) |
| |
| elif isinstance(item_ast, c_ast.ArrayDecl): |
| processed_item['is_array'] = True |
| if 'array_size' not in processed_item: |
| processed_item['array_size'] = [] |
| processed_item['array_size'].append(int(item_ast.dim.value)) |
| return self._recursive_process_item(item_ast.type, processed_item, item_ast) |
| |
| elif isinstance(item_ast, c_ast.Enum): |
| processed_item['type'] = ['enum %s' % (item_ast.name)] |
| return self._format_item(processed_item) |
| |
| elif isinstance(item_ast, c_ast.FuncDecl): |
| processed_item['is_fnptr'] = True |
| processed_item['type'] = ['void (*)()'] |
| return self._format_item(processed_item) |
| |
| def _traverse_ast(self, node, toplevel=True): |
| items_list = [] |
| |
| # Argument structs are used as types, hence anonymous top-level |
| # structs are ignored. |
| if toplevel and not node.name: |
| return None |
| |
| if not node.children(): |
| return None |
| |
| self.logger.debug('>>> Struct name = %s, coord: %s', node.name, node.coord) |
| for child in node.children(): |
| item = self._recursive_process_item(child[1], {}, None) |
| items_list.extend(item) |
| |
| self.logger.debug('_traverse_ast returns: %s', str((node.name, items_list))) |
| return (node.name, items_list) |
| |
| def visit_Struct(self, node, *a): |
| if node.name in self.local_structs_hierarchy: |
| self.logger.info('Encountered %s again. Ignoring.', repr(node.name)) |
| return |
| |
| try: |
| desc = self._traverse_ast(node) |
| except StructWalkerException as e: |
| self.logger.info('-- Exception raised by StructWalkerException in %s,' |
| 'inspect manually.', |
| repr(node.name)) |
| self.logger.info(str(e)) |
| return |
| |
| if not desc: |
| return |
| |
| struct_name, struct_items = desc |
| self.local_structs_hierarchy[struct_name] = struct_items |
| |
| def generate_local_hierarchy(self): |
| self.visit(self.ast) |
| return self.local_structs_hierarchy |