blob: dc37ea0c7764d65f4daf26f22a90aa917172769d [file] [log] [blame]
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import copy
import datetime
import logging
import os
import re
import time
from lib.dump import Dump
from lib.exceptions import EmptyDumpException, InvalidDumpException
from lib.exceptions import ObsoleteDumpVersionException, ParsingException
from lib.pageframe import PageFrame
from lib.range_dict import ExclusiveRangeDict
from lib.symbol import procfs
LOGGER = logging.getLogger('dmprof')
VIRTUAL, COMMITTED, ALLOC_COUNT, FREE_COUNT, _AT, BUCKET_ID = range(6)
# Heap Profile Dump versions
# DUMP_DEEP_[1-4] are obsolete.
# DUMP_DEEP_2+ distinct mmap regions and malloc chunks.
# DUMP_DEEP_3+ don't include allocation functions in their stack dumps.
# DUMP_DEEP_4+ support comments with '#' and global stats "nonprofiled-*".
# DUMP_DEEP_[1-2] should be processed by POLICY_DEEP_1.
# DUMP_DEEP_[3-4] should be processed by POLICY_DEEP_2 or POLICY_DEEP_3.
DUMP_DEEP_1 = 'DUMP_DEEP_1'
DUMP_DEEP_2 = 'DUMP_DEEP_2'
DUMP_DEEP_3 = 'DUMP_DEEP_3'
DUMP_DEEP_4 = 'DUMP_DEEP_4'
DUMP_DEEP_OBSOLETE = (DUMP_DEEP_1, DUMP_DEEP_2, DUMP_DEEP_3, DUMP_DEEP_4)
# DUMP_DEEP_5 doesn't separate sections for malloc and mmap.
# malloc and mmap are identified in bucket files.
# DUMP_DEEP_5 should be processed by POLICY_DEEP_4.
DUMP_DEEP_5 = 'DUMP_DEEP_5'
# DUMP_DEEP_6 adds a mmap list to DUMP_DEEP_5.
DUMP_DEEP_6 = 'DUMP_DEEP_6'
class DeepDump(Dump):
"""Represents a heap profile dump."""
_PATH_PATTERN = re.compile(r'^(.*)\.([0-9]+)\.([0-9]+)\.heap$')
_HOOK_PATTERN = re.compile(
r'^ ([ \(])([a-f0-9]+)([ \)])-([ \(])([a-f0-9]+)([ \)])\s+'
r'(hooked|unhooked)\s+(.+)$', re.IGNORECASE)
_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
'(?P<RESERVED>[0-9]+) @ (?P<BUCKETID>[0-9]+)')
_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+ )?(?P<COMMITTED>[0-9]+) / '
'(?P<RESERVED>[0-9]+)')
_OLD_HOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) @ (?P<BUCKETID>[0-9]+)')
_OLD_UNHOOKED_PATTERN = re.compile(r'(?P<TYPE>.+) (?P<COMMITTED>[0-9]+)')
_TIME_PATTERN_FORMAT = re.compile(
r'^Time: ([0-9]+/[0-9]+/[0-9]+ [0-9]+:[0-9]+:[0-9]+)(\.[0-9]+)?')
_TIME_PATTERN_SECONDS = re.compile(r'^Time: ([0-9]+)$')
def __init__(self, path, modified_time):
super(DeepDump, self).__init__()
self._path = path
matched = self._PATH_PATTERN.match(path)
self._pid = int(matched.group(2))
self._count = int(matched.group(3))
self._time = modified_time
self._map = {}
self._procmaps = ExclusiveRangeDict(ProcMapsEntryAttribute)
self._stacktrace_lines = []
self._global_stats = {} # used only in apply_policy
self._run_id = ''
self._pagesize = 4096
self._pageframe_length = 0
self._pageframe_encoding = ''
self._has_pagecount = False
self._version = ''
self._lines = []
@property
def path(self):
return self._path
@property
def count(self):
return self._count
@property
def time(self):
return self._time
@property
def iter_map(self):
for region in sorted(self._map.iteritems()):
yield region[0], region[1]
@property
def iter_stacktrace(self):
for line in self._stacktrace_lines:
words = line.split()
yield (int(words[BUCKET_ID]),
int(words[VIRTUAL]),
int(words[COMMITTED]),
int(words[ALLOC_COUNT]),
int(words[FREE_COUNT]))
def global_stat(self, name):
return self._global_stats[name]
@property
def run_id(self):
return self._run_id
@property
def pagesize(self):
return self._pagesize
@property
def pageframe_length(self):
return self._pageframe_length
@property
def pageframe_encoding(self):
return self._pageframe_encoding
@property
def has_pagecount(self):
return self._has_pagecount
@staticmethod
def load(path, log_header='Loading a heap profile dump: '):
"""Loads a heap profile dump.
Args:
path: A file path string to load.
log_header: A preceding string for log messages.
Returns:
A loaded Dump object.
Raises:
ParsingException for invalid heap profile dumps.
"""
dump = Dump(path, os.stat(path).st_mtime)
with open(path, 'r') as f:
dump.load_file(f, log_header)
return dump
def load_file(self, f, log_header):
self._lines = [line for line in f
if line and not line.startswith('#')]
try:
self._version, ln = self._parse_version()
self._parse_meta_information()
if self._version == DUMP_DEEP_6:
self._parse_mmap_list()
self._parse_global_stats()
self._extract_stacktrace_lines(ln)
except EmptyDumpException:
LOGGER.info('%s%s ...ignored an empty dump.' % (log_header, self._path))
except ParsingException, e:
LOGGER.error('%s%s ...error %s' % (log_header, self._path, e))
raise
else:
LOGGER.info('%s%s (version:%s)' % (log_header, self._path, self._version))
def _parse_version(self):
"""Parses a version string in self._lines.
Returns:
A pair of (a string representing a version of the stacktrace dump,
and an integer indicating a line number next to the version string).
Raises:
ParsingException for invalid dump versions.
"""
version = ''
# Skip until an identifiable line.
headers = ('STACKTRACES:\n', 'MMAP_STACKTRACES:\n', 'heap profile: ')
if not self._lines:
raise EmptyDumpException('Empty heap dump file.')
(ln, found) = skip_while(
0, len(self._lines),
lambda n: not self._lines[n].startswith(headers))
if not found:
raise InvalidDumpException('No version header.')
# Identify a version.
if self._lines[ln].startswith('heap profile: '):
version = self._lines[ln][13:].strip()
if version in (DUMP_DEEP_5, DUMP_DEEP_6):
(ln, _) = skip_while(
ln, len(self._lines),
lambda n: self._lines[n] != 'STACKTRACES:\n')
elif version in DUMP_DEEP_OBSOLETE:
raise ObsoleteDumpVersionException(version)
else:
raise InvalidDumpException('Invalid version: %s' % version)
elif self._lines[ln] == 'STACKTRACES:\n':
raise ObsoleteDumpVersionException(DUMP_DEEP_1)
elif self._lines[ln] == 'MMAP_STACKTRACES:\n':
raise ObsoleteDumpVersionException(DUMP_DEEP_2)
return (version, ln)
def _parse_global_stats(self):
"""Parses lines in self._lines as global stats."""
(ln, _) = skip_while(
0, len(self._lines),
lambda n: self._lines[n] != 'GLOBAL_STATS:\n')
global_stat_names = [
'total', 'absent', 'file-exec', 'file-nonexec', 'anonymous', 'stack',
'other', 'nonprofiled-absent', 'nonprofiled-anonymous',
'nonprofiled-file-exec', 'nonprofiled-file-nonexec',
'nonprofiled-stack', 'nonprofiled-other',
'profiled-mmap', 'profiled-malloc']
for prefix in global_stat_names:
(ln, _) = skip_while(
ln, len(self._lines),
lambda n: self._lines[n].split()[0] != prefix)
words = self._lines[ln].split()
self._global_stats[prefix + '_virtual'] = int(words[-2])
self._global_stats[prefix + '_committed'] = int(words[-1])
def _parse_meta_information(self):
"""Parses lines in self._lines for meta information."""
(ln, found) = skip_while(
0, len(self._lines),
lambda n: self._lines[n] != 'META:\n')
if not found:
return
ln += 1
while True:
if self._lines[ln].startswith('Time:'):
matched_seconds = self._TIME_PATTERN_SECONDS.match(self._lines[ln])
matched_format = self._TIME_PATTERN_FORMAT.match(self._lines[ln])
if matched_format:
self._time = time.mktime(datetime.datetime.strptime(
matched_format.group(1), '%Y/%m/%d %H:%M:%S').timetuple())
if matched_format.group(2):
self._time += float(matched_format.group(2)[1:]) / 1000.0
elif matched_seconds:
self._time = float(matched_seconds.group(1))
elif self._lines[ln].startswith('Reason:'):
pass # Nothing to do for 'Reason:'
elif self._lines[ln].startswith('PageSize: '):
self._pagesize = int(self._lines[ln][10:])
elif self._lines[ln].startswith('CommandLine:'):
pass
elif (self._lines[ln].startswith('PageFrame: ') or
self._lines[ln].startswith('PFN: ')):
if self._lines[ln].startswith('PageFrame: '):
words = self._lines[ln][11:].split(',')
else:
words = self._lines[ln][5:].split(',')
for word in words:
if word == '24':
self._pageframe_length = 24
elif word == 'Base64':
self._pageframe_encoding = 'base64'
elif word == 'PageCount':
self._has_pagecount = True
elif self._lines[ln].startswith('RunID: '):
self._run_id = self._lines[ln][7:].strip()
elif (self._lines[ln].startswith('MMAP_LIST:') or
self._lines[ln].startswith('GLOBAL_STATS:')):
# Skip until "MMAP_LIST:" or "GLOBAL_STATS" is found.
break
else:
pass
ln += 1
def _parse_mmap_list(self):
"""Parses lines in self._lines as a mmap list."""
(ln, found) = skip_while(
0, len(self._lines),
lambda n: self._lines[n] != 'MMAP_LIST:\n')
if not found:
return {}
ln += 1
self._map = {}
current_vma = {}
pageframe_list = []
while True:
entry = procfs.ProcMaps.parse_line(self._lines[ln])
if entry:
current_vma = {}
for _, _, attr in self._procmaps.iter_range(entry.begin, entry.end):
for key, value in entry.as_dict().iteritems():
attr[key] = value
current_vma[key] = value
ln += 1
continue
if self._lines[ln].startswith(' PF: '):
for pageframe in self._lines[ln][5:].split():
pageframe_list.append(PageFrame.parse(pageframe, self._pagesize))
ln += 1
continue
matched = self._HOOK_PATTERN.match(self._lines[ln])
if not matched:
break
# 2: starting address
# 5: end address
# 7: hooked or unhooked
# 8: additional information
if matched.group(7) == 'hooked':
submatched = self._HOOKED_PATTERN.match(matched.group(8))
if not submatched:
submatched = self._OLD_HOOKED_PATTERN.match(matched.group(8))
elif matched.group(7) == 'unhooked':
submatched = self._UNHOOKED_PATTERN.match(matched.group(8))
if not submatched:
submatched = self._OLD_UNHOOKED_PATTERN.match(matched.group(8))
else:
assert matched.group(7) in ['hooked', 'unhooked']
submatched_dict = submatched.groupdict()
region_info = { 'vma': current_vma }
if submatched_dict.get('TYPE'):
region_info['type'] = submatched_dict['TYPE'].strip()
if submatched_dict.get('COMMITTED'):
region_info['committed'] = int(submatched_dict['COMMITTED'])
if submatched_dict.get('RESERVED'):
region_info['reserved'] = int(submatched_dict['RESERVED'])
if submatched_dict.get('BUCKETID'):
region_info['bucket_id'] = int(submatched_dict['BUCKETID'])
if matched.group(1) == '(':
start = current_vma['begin']
else:
start = int(matched.group(2), 16)
if matched.group(4) == '(':
end = current_vma['end']
else:
end = int(matched.group(5), 16)
if pageframe_list and pageframe_list[0].start_truncated:
pageframe_list[0].set_size(
pageframe_list[0].size - start % self._pagesize)
if pageframe_list and pageframe_list[-1].end_truncated:
pageframe_list[-1].set_size(
pageframe_list[-1].size - (self._pagesize - end % self._pagesize))
region_info['pageframe'] = pageframe_list
pageframe_list = []
self._map[(start, end)] = (matched.group(7), region_info)
ln += 1
def _extract_stacktrace_lines(self, line_number):
"""Extracts the position of stacktrace lines.
Valid stacktrace lines are stored into self._stacktrace_lines.
Args:
line_number: A line number to start parsing in lines.
Raises:
ParsingException for invalid dump versions.
"""
if self._version in (DUMP_DEEP_5, DUMP_DEEP_6):
(line_number, _) = skip_while(
line_number, len(self._lines),
lambda n: not self._lines[n].split()[0].isdigit())
stacktrace_start = line_number
(line_number, _) = skip_while(
line_number, len(self._lines),
lambda n: self._check_stacktrace_line(self._lines[n]))
self._stacktrace_lines = self._lines[stacktrace_start:line_number]
elif self._version in DUMP_DEEP_OBSOLETE:
raise ObsoleteDumpVersionException(self._version)
else:
raise InvalidDumpException('Invalid version: %s' % self._version)
@staticmethod
def _check_stacktrace_line(stacktrace_line):
"""Checks if a given stacktrace_line is valid as stacktrace.
Args:
stacktrace_line: A string to be checked.
Returns:
True if the given stacktrace_line is valid.
"""
words = stacktrace_line.split()
if len(words) < BUCKET_ID + 1:
return False
if words[BUCKET_ID - 1] != '@':
return False
return True
class ProcMapsEntryAttribute(ExclusiveRangeDict.RangeAttribute):
"""Represents an entry of /proc/maps in range_dict.ExclusiveRangeDict."""
_DUMMY_ENTRY = procfs.ProcMapsEntry(
0, # begin
0, # end
'-', # readable
'-', # writable
'-', # executable
'-', # private
0, # offset
'00', # major
'00', # minor
0, # inode
'' # name
)
def __init__(self):
super(ProcMapsEntryAttribute, self).__init__()
self._entry = self._DUMMY_ENTRY.as_dict()
def __str__(self):
return str(self._entry)
def __repr__(self):
return 'ProcMapsEntryAttribute' + str(self._entry)
def __getitem__(self, key):
return self._entry[key]
def __setitem__(self, key, value):
if key not in self._entry:
raise KeyError(key)
self._entry[key] = value
def copy(self):
new_entry = ProcMapsEntryAttribute()
for key, value in self._entry.iteritems():
new_entry[key] = copy.deepcopy(value)
return new_entry
def skip_while(index, max_index, skipping_condition):
"""Increments |index| until |skipping_condition|(|index|) is False.
Returns:
A pair of an integer indicating a line number after skipped, and a
boolean value which is True if found a line which skipping_condition
is False for.
"""
while skipping_condition(index):
index += 1
if index >= max_index:
return index, False
return index, True