| # Copyright 2014 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import copy |
| import datetime |
| import logging |
| import os |
| import re |
| import time |
| |
| from lib.dump import Dump |
| from lib.exceptions import EmptyDumpException, InvalidDumpException |
| from lib.exceptions import ObsoleteDumpVersionException, ParsingException |
| from lib.pageframe import PageFrame |
| from lib.range_dict import ExclusiveRangeDict |
| from lib.symbol import procfs |
| |
| |
| LOGGER = logging.getLogger('dmprof') |
| VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6) |
| |
| |
| # Heap Profile Dump versions |
| |
| # DUMP_DEEP_[1-4] are obsolete. |
| # DUMP_DEEP_2+ distinct mmap regions and malloc chunks. |
| # DUMP_DEEP_3+ don't include allocation functions in their stack dumps. |
| # DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*". |
| # DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1. |
| # DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3. |
| DUMP_DEEP_1 = 'DUMP_DEEP_1' |
| DUMP_DEEP_2 = 'DUMP_DEEP_2' |
| DUMP_DEEP_3 = 'DUMP_DEEP_3' |
| DUMP_DEEP_4 = 'DUMP_DEEP_4' |
| |
| DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4) |
| |
| # DUMP_DEEP_5 doesn't separate sections for malloc and mmap. |
| # malloc and mmap are identified in bucket files. |
| # DUMP_DEEP_5 should be processed by POLICY_DEEP_4. |
| DUMP_DEEP_5 = 'DUMP_DEEP_5' |
| |
| # DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5. |
| DUMP_DEEP_6 = 'DUMP_DEEP_6' |
| |
| |
| class DeepDump(Dump): |
| """Represents a heap profile dump.""" |
| |
| _PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$') |
| |
| _HOOK_PATTERN = re.compile( |
| r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+' |
| r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE) |
| |
| _HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
| '(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)') |
| _UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / ' |
| '(?P<RESERVED>[0-9]+)') |
| |
| _OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)') |
| _OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)') |
| |
| _TIME_PATTERN_FORMAT = re.compile( |
| r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?') |
| _TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$') |
| |
| def __init__(self, path, modified_time): |
| super(DeepDump, self).__init__() |
| self._path = path |
| matched = self._PATH_PATTERN.match(path) |
| self._pid = int(matched.group(2)) |
| self._count = int(matched.group(3)) |
| self._time = modified_time |
| self._map = {} |
| self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute) |
| self._stacktrace_lines = [] |
| self._global_stats = {} # used only in apply_policy |
| |
| self._run_id = '' |
| self._pagesize = 4096 |
| self._pageframe_length = 0 |
| self._pageframe_encoding = '' |
| self._has_pagecount = False |
| |
| self._version = '' |
| self._lines = [] |
| |
| @property |
| def path(self): |
| return self._path |
| |
| @property |
| def count(self): |
| return self._count |
| |
| @property |
| def time(self): |
| return self._time |
| |
| @property |
| def iter_map(self): |
| for region in sorted(self._map.iteritems()): |
| yield region[0], region[1] |
| |
| @property |
| def iter_stacktrace(self): |
| for line in self._stacktrace_lines: |
| words = line.split() |
| yield (int(words[BUCKET_ID]), |
| int(words[VIRTUAL]), |
| int(words[COMMITTED]), |
| int(words[ALLOC_COUNT]), |
| int(words[FREE_COUNT])) |
| |
| def global_stat(self, name): |
| return self._global_stats[name] |
| |
| @property |
| def run_id(self): |
| return self._run_id |
| |
| @property |
| def pagesize(self): |
| return self._pagesize |
| |
| @property |
| def pageframe_length(self): |
| return self._pageframe_length |
| |
| @property |
| def pageframe_encoding(self): |
| return self._pageframe_encoding |
| |
| @property |
| def has_pagecount(self): |
| return self._has_pagecount |
| |
| @staticmethod |
| def load(path, log_header='Loading a heap profile dump: '): |
| """Loads a heap profile dump. |
| |
| Args: |
| path: A file path string to load. |
| log_header: A preceding string for log messages. |
| |
| Returns: |
| A loaded Dump object. |
| |
| Raises: |
| ParsingException for invalid heap profile dumps. |
| """ |
| dump = Dump(path, os.stat(path).st_mtime) |
| with open(path, 'r') as f: |
| dump.load_file(f, log_header) |
| return dump |
| |
| def load_file(self, f, log_header): |
| self._lines = [line for line in f |
| if line and not line.startswith('#')] |
| |
| try: |
| self._version, ln = self._parse_version() |
| self._parse_meta_information() |
| if self._version == DUMP_DEEP_6: |
| self._parse_mmap_list() |
| self._parse_global_stats() |
| self._extract_stacktrace_lines(ln) |
| except EmptyDumpException: |
| LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path)) |
| except ParsingException, e: |
| LOGGER.error('%s%s ...error %s' % (log_header, self._path, e)) |
| raise |
| else: |
| LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version)) |
| |
| def _parse_version(self): |
| """Parses a version string in self._lines. |
| |
| Returns: |
| A pair of (a string representing a version of the stacktrace dump, |
| and an integer indicating a line number next to the version string). |
| |
| Raises: |
| ParsingException for invalid dump versions. |
| """ |
| version = '' |
| |
| # Skip until an identifiable line. |
| headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ') |
| if not self._lines: |
| raise EmptyDumpException('Empty heap dump file.') |
| (ln, found) = skip_while( |
| 0, len(self._lines), |
| lambda n: not self._lines[n].startswith(headers)) |
| if not found: |
| raise InvalidDumpException('No version header.') |
| |
| # Identify a version. |
| if self._lines[ln].startswith('heap profile: '): |
| version = self._lines[ln][13:].strip() |
| if version in (DUMP_DEEP_5, DUMP_DEEP_6): |
| (ln, _) = skip_while( |
| ln, len(self._lines), |
| lambda n: self._lines[n] != 'STACKTRACES:\n') |
| elif version in DUMP_DEEP_OBSOLETE: |
| raise ObsoleteDumpVersionException(version) |
| else: |
| raise InvalidDumpException('Invalid version: %s' % version) |
| elif self._lines[ln] == 'STACKTRACES:\n': |
| raise ObsoleteDumpVersionException(DUMP_DEEP_1) |
| elif self._lines[ln] == 'MMAP_STACKTRACES:\n': |
| raise ObsoleteDumpVersionException(DUMP_DEEP_2) |
| |
| return (version, ln) |
| |
| def _parse_global_stats(self): |
| """Parses lines in self._lines as global stats.""" |
| (ln, _) = skip_while( |
| 0, len(self._lines), |
| lambda n: self._lines[n] != 'GLOBAL_STATS:\n') |
| |
| global_stat_names = [ |
| 'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack', |
| 'other', 'nonprofiled-absent', 'nonprofiled-anonymous', |
| 'nonprofiled-file-exec', 'nonprofiled-file-nonexec', |
| 'nonprofiled-stack', 'nonprofiled-other', |
| 'profiled-mmap', 'profiled-malloc'] |
| |
| for prefix in global_stat_names: |
| (ln, _) = skip_while( |
| ln, len(self._lines), |
| lambda n: self._lines[n].split()[0] != prefix) |
| words = self._lines[ln].split() |
| self._global_stats[prefix + '_virtual'] = int(words[-2]) |
| self._global_stats[prefix + '_committed'] = int(words[-1]) |
| |
| def _parse_meta_information(self): |
| """Parses lines in self._lines for meta information.""" |
| (ln, found) = skip_while( |
| 0, len(self._lines), |
| lambda n: self._lines[n] != 'META:\n') |
| if not found: |
| return |
| ln += 1 |
| |
| while True: |
| if self._lines[ln].startswith('Time:'): |
| matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln]) |
| matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln]) |
| if matched_format: |
| self._time = time.mktime(datetime.datetime.strptime( |
| matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple()) |
| if matched_format.group(2): |
| self._time += float(matched_format.group(2)[1:]) / 1000.0 |
| elif matched_seconds: |
| self._time = float(matched_seconds.group(1)) |
| elif self._lines[ln].startswith('Reason:'): |
| pass # Nothing to do for 'Reason:' |
| elif self._lines[ln].startswith('PageSize: '): |
| self._pagesize = int(self._lines[ln][10:]) |
| elif self._lines[ln].startswith('CommandLine:'): |
| pass |
| elif (self._lines[ln].startswith('PageFrame: ') or |
| self._lines[ln].startswith('PFN: ')): |
| if self._lines[ln].startswith('PageFrame: '): |
| words = self._lines[ln][11:].split(',') |
| else: |
| words = self._lines[ln][5:].split(',') |
| for word in words: |
| if word == '24': |
| self._pageframe_length = 24 |
| elif word == 'Base64': |
| self._pageframe_encoding = 'base64' |
| elif word == 'PageCount': |
| self._has_pagecount = True |
| elif self._lines[ln].startswith('RunID: '): |
| self._run_id = self._lines[ln][7:].strip() |
| elif (self._lines[ln].startswith('MMAP_LIST:') or |
| self._lines[ln].startswith('GLOBAL_STATS:')): |
| # Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found. |
| break |
| else: |
| pass |
| ln += 1 |
| |
| def _parse_mmap_list(self): |
| """Parses lines in self._lines as a mmap list.""" |
| (ln, found) = skip_while( |
| 0, len(self._lines), |
| lambda n: self._lines[n] != 'MMAP_LIST:\n') |
| if not found: |
| return {} |
| |
| ln += 1 |
| self._map = {} |
| current_vma = {} |
| pageframe_list = [] |
| while True: |
| entry = procfs.ProcMaps.parse_line(self._lines[ln]) |
| if entry: |
| current_vma = {} |
| for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end): |
| for key, value in entry.as_dict().iteritems(): |
| attr[key] = value |
| current_vma[key] = value |
| ln += 1 |
| continue |
| |
| if self._lines[ln].startswith(' PF: '): |
| for pageframe in self._lines[ln][5:].split(): |
| pageframe_list.append(PageFrame.parse(pageframe, self._pagesize)) |
| ln += 1 |
| continue |
| |
| matched = self._HOOK_PATTERN.match(self._lines[ln]) |
| if not matched: |
| break |
| # 2: starting address |
| # 5: end address |
| # 7: hooked or unhooked |
| # 8: additional information |
| if matched.group(7) == 'hooked': |
| submatched = self._HOOKED_PATTERN.match(matched.group(8)) |
| if not submatched: |
| submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8)) |
| elif matched.group(7) == 'unhooked': |
| submatched = self._UNHOOKED_PATTERN.match(matched.group(8)) |
| if not submatched: |
| submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8)) |
| else: |
| assert matched.group(7) in ['hooked', 'unhooked'] |
| |
| submatched_dict = submatched.groupdict() |
| region_info = { 'vma': current_vma } |
| if submatched_dict.get('TYPE'): |
| region_info['type'] = submatched_dict['TYPE'].strip() |
| if submatched_dict.get('COMMITTED'): |
| region_info['committed'] = int(submatched_dict['COMMITTED']) |
| if submatched_dict.get('RESERVED'): |
| region_info['reserved'] = int(submatched_dict['RESERVED']) |
| if submatched_dict.get('BUCKETID'): |
| region_info['bucket_id'] = int(submatched_dict['BUCKETID']) |
| |
| if matched.group(1) == '(': |
| start = current_vma['begin'] |
| else: |
| start = int(matched.group(2), 16) |
| if matched.group(4) == '(': |
| end = current_vma['end'] |
| else: |
| end = int(matched.group(5), 16) |
| |
| if pageframe_list and pageframe_list[0].start_truncated: |
| pageframe_list[0].set_size( |
| pageframe_list[0].size - start % self._pagesize) |
| if pageframe_list and pageframe_list[-1].end_truncated: |
| pageframe_list[-1].set_size( |
| pageframe_list[-1].size - (self._pagesize - end % self._pagesize)) |
| region_info['pageframe'] = pageframe_list |
| pageframe_list = [] |
| |
| self._map[(start, end)] = (matched.group(7), region_info) |
| ln += 1 |
| |
| def _extract_stacktrace_lines(self, line_number): |
| """Extracts the position of stacktrace lines. |
| |
| Valid stacktrace lines are stored into self._stacktrace_lines. |
| |
| Args: |
| line_number: A line number to start parsing in lines. |
| |
| Raises: |
| ParsingException for invalid dump versions. |
| """ |
| if self._version in (DUMP_DEEP_5, DUMP_DEEP_6): |
| (line_number, _) = skip_while( |
| line_number, len(self._lines), |
| lambda n: not self._lines[n].split()[0].isdigit()) |
| stacktrace_start = line_number |
| (line_number, _) = skip_while( |
| line_number, len(self._lines), |
| lambda n: self._check_stacktrace_line(self._lines[n])) |
| self._stacktrace_lines = self._lines[stacktrace_start:line_number] |
| |
| elif self._version in DUMP_DEEP_OBSOLETE: |
| raise ObsoleteDumpVersionException(self._version) |
| |
| else: |
| raise InvalidDumpException('Invalid version: %s' % self._version) |
| |
| @staticmethod |
| def _check_stacktrace_line(stacktrace_line): |
| """Checks if a given stacktrace_line is valid as stacktrace. |
| |
| Args: |
| stacktrace_line: A string to be checked. |
| |
| Returns: |
| True if the given stacktrace_line is valid. |
| """ |
| words = stacktrace_line.split() |
| if len(words) < BUCKET_ID + 1: |
| return False |
| if words[BUCKET_ID - 1] != '@': |
| return False |
| return True |
| |
| |
| class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute): |
| """Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict.""" |
| _DUMMY_ENTRY = procfs.ProcMapsEntry( |
| 0, # begin |
| 0, # end |
| '-', # readable |
| '-', # writable |
| '-', # executable |
| '-', # private |
| 0, # offset |
| '00', # major |
| '00', # minor |
| 0, # inode |
| '' # name |
| ) |
| |
| def __init__(self): |
| super(ProcMapsEntryAttribute, self).__init__() |
| self._entry = self._DUMMY_ENTRY.as_dict() |
| |
| def __str__(self): |
| return str(self._entry) |
| |
| def __repr__(self): |
| return 'ProcMapsEntryAttribute' + str(self._entry) |
| |
| def __getitem__(self, key): |
| return self._entry[key] |
| |
| def __setitem__(self, key, value): |
| if key not in self._entry: |
| raise KeyError(key) |
| self._entry[key] = value |
| |
| def copy(self): |
| new_entry = ProcMapsEntryAttribute() |
| for key, value in self._entry.iteritems(): |
| new_entry[key] = copy.deepcopy(value) |
| return new_entry |
| |
| |
| def skip_while(index, max_index, skipping_condition): |
| """Increments |index| until |skipping_condition|(|index|) is False. |
| |
| Returns: |
| A pair of an integer indicating a line number after skipped, and a |
| boolean value which is True if found a line which skipping_condition |
| is False for. |
| """ |
| while skipping_condition(index): |
| index += 1 |
| if index >= max_index: |
| return index, False |
| return index, True |