blob: ef34857dfdc55b8d6a88730cbc6a85b1fe71c79c [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import datetime
import json
import logging
import sys
from lib.pageframe import PFNCounts
from lib.policy import PolicySet
from lib.subcommand import SubCommand
LOGGER = logging.getLogger('dmprof')
class PolicyCommands(SubCommand):
def __init__(self, command):
super(PolicyCommands, self).__init__(
'Usage: %%prog %s [-p POLICY] <first-dump> [shared-first-dumps...]' %
command)
self._parser.add_option('-p', '--policy', type='string', dest='policy',
help='profile with POLICY', metavar='POLICY')
self._parser.add_option('--alternative-dirs', dest='alternative_dirs',
metavar='/path/on/target@/path/on/host[:...]',
help='Read files in /path/on/host/ instead of '
'files in /path/on/target/.')
self._parser.add_option('--timestamp', dest='timestamp',
action='store_true', help='Use timestamp.')
self._timestamp = False
def _set_up(self, sys_argv):
options, args = self._parse_args(sys_argv, 1)
dump_path = args[1]
shared_first_dump_paths = args[2:]
alternative_dirs_dict = {}
if options.alternative_dirs:
for alternative_dir_pair in options.alternative_dirs.split(':'):
target_path, host_path = alternative_dir_pair.split('@', 1)
alternative_dirs_dict[target_path] = host_path
(bucket_set, dumps) = SubCommand.load_basic_files(
dump_path, True, alternative_dirs=alternative_dirs_dict)
self._timestamp = options.timestamp
pfn_counts_dict = {}
for shared_first_dump_path in shared_first_dump_paths:
shared_dumps = SubCommand._find_all_dumps(shared_first_dump_path)
for shared_dump in shared_dumps:
pfn_counts = PFNCounts.load(shared_dump)
if pfn_counts.pid not in pfn_counts_dict:
pfn_counts_dict[pfn_counts.pid] = []
pfn_counts_dict[pfn_counts.pid].append(pfn_counts)
policy_set = PolicySet.load(SubCommand._parse_policy_list(options.policy))
return policy_set, dumps, pfn_counts_dict, bucket_set
def _apply_policy(self, dump, pfn_counts_dict, policy, bucket_set,
first_dump_time):
"""Aggregates the total memory size of each component.
Iterate through all stacktraces and attribute them to one of the components
based on the policy. It is important to apply policy in right order.
Args:
dump: A Dump object.
pfn_counts_dict: A dict mapping a pid to a list of PFNCounts.
policy: A Policy object.
bucket_set: A BucketSet object.
first_dump_time: An integer representing time when the first dump is
dumped.
Returns:
A dict mapping components and their corresponding sizes.
"""
LOGGER.info(' %s' % dump.path)
all_pfn_dict = {}
if pfn_counts_dict:
LOGGER.info(' shared with...')
for pid, pfnset_list in pfn_counts_dict.iteritems():
closest_pfnset_index = None
closest_pfnset_difference = 1024.0
for index, pfnset in enumerate(pfnset_list):
time_difference = pfnset.time - dump.time
if time_difference >= 3.0:
break
elif ((time_difference < 0.0 and pfnset.reason != 'Exiting') or
(0.0 <= time_difference and time_difference < 3.0)):
closest_pfnset_index = index
closest_pfnset_difference = time_difference
elif time_difference < 0.0 and pfnset.reason == 'Exiting':
closest_pfnset_index = None
break
if closest_pfnset_index:
for pfn, count in pfnset_list[closest_pfnset_index].iter_pfn:
all_pfn_dict[pfn] = all_pfn_dict.get(pfn, 0) + count
LOGGER.info(' %s (time difference = %f)' %
(pfnset_list[closest_pfnset_index].path,
closest_pfnset_difference))
else:
LOGGER.info(' (no match with pid:%d)' % pid)
sizes = dict((c, 0) for c in policy.components)
PolicyCommands._accumulate_malloc(dump, policy, bucket_set, sizes)
verify_global_stats = PolicyCommands._accumulate_maps(
dump, all_pfn_dict, policy, bucket_set, sizes)
# TODO(dmikurube): Remove the verifying code when GLOBAL_STATS is removed.
# http://crbug.com/245603.
for verify_key, verify_value in verify_global_stats.iteritems():
dump_value = dump.global_stat('%s_committed' % verify_key)
if dump_value != verify_value:
LOGGER.warn('%25s: %12d != %d (%d)' % (
verify_key, dump_value, verify_value, dump_value - verify_value))
sizes['mmap-no-log'] = (
dump.global_stat('profiled-mmap_committed') -
sizes['mmap-total-log'])
sizes['mmap-total-record'] = dump.global_stat('profiled-mmap_committed')
sizes['mmap-total-record-vm'] = dump.global_stat('profiled-mmap_virtual')
sizes['tc-no-log'] = (
dump.global_stat('profiled-malloc_committed') -
sizes['tc-total-log'])
sizes['tc-total-record'] = dump.global_stat('profiled-malloc_committed')
sizes['tc-unused'] = (
sizes['mmap-tcmalloc'] -
dump.global_stat('profiled-malloc_committed'))
if sizes['tc-unused'] < 0:
LOGGER.warn(' Assuming tc-unused=0 as it is negative: %d (bytes)' %
sizes['tc-unused'])
sizes['tc-unused'] = 0
sizes['tc-total'] = sizes['mmap-tcmalloc']
# TODO(dmikurube): global_stat will be deprecated.
# See http://crbug.com/245603.
for key, value in {
'total': 'total_committed',
'filemapped': 'file_committed',
'absent': 'absent_committed',
'file-exec': 'file-exec_committed',
'file-nonexec': 'file-nonexec_committed',
'anonymous': 'anonymous_committed',
'stack': 'stack_committed',
'other': 'other_committed',
'unhooked-absent': 'nonprofiled-absent_committed',
'total-vm': 'total_virtual',
'filemapped-vm': 'file_virtual',
'anonymous-vm': 'anonymous_virtual',
'other-vm': 'other_virtual' }.iteritems():
if key in sizes:
sizes[key] = dump.global_stat(value)
if 'mustbezero' in sizes:
removed_list = (
'profiled-mmap_committed',
'nonprofiled-absent_committed',
'nonprofiled-anonymous_committed',
'nonprofiled-file-exec_committed',
'nonprofiled-file-nonexec_committed',
'nonprofiled-stack_committed',
'nonprofiled-other_committed')
sizes['mustbezero'] = (
dump.global_stat('total_committed') -
sum(dump.global_stat(removed) for removed in removed_list))
if 'total-exclude-profiler' in sizes:
sizes['total-exclude-profiler'] = (
dump.global_stat('total_committed') -
(sizes['mmap-profiler'] + sizes['mmap-type-profiler']))
if 'hour' in sizes:
sizes['hour'] = (dump.time - first_dump_time) / 60.0 / 60.0
if 'minute' in sizes:
sizes['minute'] = (dump.time - first_dump_time) / 60.0
if 'second' in sizes:
if self._timestamp:
sizes['second'] = datetime.datetime.fromtimestamp(dump.time).isoformat()
else:
sizes['second'] = dump.time - first_dump_time
return sizes
@staticmethod
def _accumulate_malloc(dump, policy, bucket_set, sizes):
for bucket_id, _, committed, _, _ in dump.iter_stacktrace:
bucket = bucket_set.get(bucket_id)
if not bucket or bucket.allocator_type == 'malloc':
component_match = policy.find_malloc(bucket)
elif bucket.allocator_type == 'mmap':
continue
else:
assert False
sizes[component_match] += committed
assert not component_match.startswith('mmap-')
if component_match.startswith('tc-'):
sizes['tc-total-log'] += committed
else:
sizes['other-total-log'] += committed
@staticmethod
def _accumulate_maps(dump, pfn_dict, policy, bucket_set, sizes):
# TODO(dmikurube): Remove the dict when GLOBAL_STATS is removed.
# http://crbug.com/245603.
global_stats = {
'total': 0,
'file-exec': 0,
'file-nonexec': 0,
'anonymous': 0,
'stack': 0,
'other': 0,
'nonprofiled-file-exec': 0,
'nonprofiled-file-nonexec': 0,
'nonprofiled-anonymous': 0,
'nonprofiled-stack': 0,
'nonprofiled-other': 0,
'profiled-mmap': 0,
}
for key, value in dump.iter_map:
# TODO(dmikurube): Remove the subtotal code when GLOBAL_STATS is removed.
# It's temporary verification code for transition described in
# http://crbug.com/245603.
committed = 0
if 'committed' in value[1]:
committed = value[1]['committed']
global_stats['total'] += committed
key = 'other'
name = value[1]['vma']['name']
if name.startswith('/'):
if value[1]['vma']['executable'] == 'x':
key = 'file-exec'
else:
key = 'file-nonexec'
elif name == '[stack]':
key = 'stack'
elif name == '':
key = 'anonymous'
global_stats[key] += committed
if value[0] == 'unhooked':
global_stats['nonprofiled-' + key] += committed
if value[0] == 'hooked':
global_stats['profiled-mmap'] += committed
if value[0] == 'unhooked':
if pfn_dict and dump.pageframe_length:
for pageframe in value[1]['pageframe']:
component_match = policy.find_unhooked(value, pageframe, pfn_dict)
sizes[component_match] += pageframe.size
else:
component_match = policy.find_unhooked(value)
sizes[component_match] += int(value[1]['committed'])
elif value[0] == 'hooked':
if pfn_dict and dump.pageframe_length:
for pageframe in value[1]['pageframe']:
component_match, _ = policy.find_mmap(
value, bucket_set, pageframe, pfn_dict)
sizes[component_match] += pageframe.size
assert not component_match.startswith('tc-')
if component_match.startswith('mmap-'):
sizes['mmap-total-log'] += pageframe.size
else:
sizes['other-total-log'] += pageframe.size
else:
component_match, _ = policy.find_mmap(value, bucket_set)
sizes[component_match] += int(value[1]['committed'])
if component_match.startswith('mmap-'):
sizes['mmap-total-log'] += int(value[1]['committed'])
else:
sizes['other-total-log'] += int(value[1]['committed'])
else:
LOGGER.error('Unrecognized mapping status: %s' % value[0])
return global_stats
class CSVCommand(PolicyCommands):
def __init__(self):
super(CSVCommand, self).__init__('csv')
def do(self, sys_argv):
policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
return self._output(
policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
max_components = 0
for label in policy_set:
max_components = max(max_components, len(policy_set[label].components))
for label in sorted(policy_set):
components = policy_set[label].components
if len(policy_set) > 1:
out.write('%s%s\n' % (label, ',' * (max_components - 1)))
out.write('%s%s\n' % (
','.join(components), ',' * (max_components - len(components))))
LOGGER.info('Applying a policy %s to...' % label)
for index, dump in enumerate(dumps):
if index == 0:
first_dump_time = dump.time
component_sizes = self._apply_policy(
dump, pfn_counts_dict, policy_set[label], bucket_set,
first_dump_time)
s = []
for c in components:
if c in ('hour', 'minute', 'second'):
if isinstance(component_sizes[c], str):
s.append('%s' % component_sizes[c])
else:
s.append('%05.5f' % (component_sizes[c]))
else:
s.append('%05.5f' % (component_sizes[c] / 1024.0 / 1024.0))
out.write('%s%s\n' % (
','.join(s), ',' * (max_components - len(components))))
bucket_set.clear_component_cache()
return 0
class JSONCommand(PolicyCommands):
def __init__(self):
super(JSONCommand, self).__init__('json')
def do(self, sys_argv):
policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
return self._output(
policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
json_base = {
'version': 'JSON_DEEP_2',
'policies': {},
}
for label in sorted(policy_set):
json_base['policies'][label] = {
'legends': policy_set[label].components,
'snapshots': [],
}
LOGGER.info('Applying a policy %s to...' % label)
for index, dump in enumerate(dumps):
if index == 0:
first_dump_time = dump.time
component_sizes = self._apply_policy(
dump, pfn_counts_dict, policy_set[label], bucket_set,
first_dump_time)
component_sizes['dump_path'] = dump.path
component_sizes['dump_time'] = datetime.datetime.fromtimestamp(
dump.time).strftime('%Y-%m-%d %H:%M:%S')
json_base['policies'][label]['snapshots'].append(component_sizes)
bucket_set.clear_component_cache()
json.dump(json_base, out, indent=2, sort_keys=True)
return 0
class ListCommand(PolicyCommands):
def __init__(self):
super(ListCommand, self).__init__('list')
def do(self, sys_argv):
policy_set, dumps, pfn_counts_dict, bucket_set = self._set_up(sys_argv)
return self._output(
policy_set, dumps, pfn_counts_dict, bucket_set, sys.stdout)
def _output(self, policy_set, dumps, pfn_counts_dict, bucket_set, out):
for label in sorted(policy_set):
LOGGER.info('Applying a policy %s to...' % label)
for dump in dumps:
component_sizes = self._apply_policy(
dump, pfn_counts_dict, policy_set[label], bucket_set, dump.time)
out.write('%s for %s:\n' % (label, dump.path))
for c in policy_set[label].components:
if c in ['hour', 'minute', 'second']:
out.write('%40s %12.3f\n' % (c, component_sizes[c]))
else:
out.write('%40s %12d\n' % (c, component_sizes[c]))
bucket_set.clear_component_cache()
return 0