blob: 2a41bb25106740ffa1ca695a5b50ff81bea5163b [file] [log] [blame]
# Copyright 2013 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import json
import logging
import os
import re
LOGGER = logging.getLogger('dmprof')
BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json')
# Heap Profile Policy versions
# POLICY_DEEP_1 DOES NOT include allocation_type columns.
# mmap regions are distincted w/ mmap frames in the pattern column.
POLICY_DEEP_1 = 'POLICY_DEEP_1'
# POLICY_DEEP_2 DOES include allocation_type columns.
# mmap regions are distincted w/ the allocation_type column.
POLICY_DEEP_2 = 'POLICY_DEEP_2'
# POLICY_DEEP_3 is in JSON format.
POLICY_DEEP_3 = 'POLICY_DEEP_3'
# POLICY_DEEP_3 contains typeinfo.
POLICY_DEEP_4 = 'POLICY_DEEP_4'
class Rule(object):
"""Represents one matching rule in a policy file."""
def __init__(self,
name,
allocator_type,
stackfunction_pattern=None,
stacksourcefile_pattern=None,
typeinfo_pattern=None,
mappedpathname_pattern=None,
mappedpermission_pattern=None,
sharedwith=None):
self._name = name
self._allocator_type = allocator_type
self._stackfunction_pattern = None
if stackfunction_pattern:
self._stackfunction_pattern = re.compile(
stackfunction_pattern + r'\Z')
self._stacksourcefile_pattern = None
if stacksourcefile_pattern:
self._stacksourcefile_pattern = re.compile(
stacksourcefile_pattern + r'\Z')
self._typeinfo_pattern = None
if typeinfo_pattern:
self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z')
self._mappedpathname_pattern = None
if mappedpathname_pattern:
self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z')
self._mappedpermission_pattern = None
if mappedpermission_pattern:
self._mappedpermission_pattern = re.compile(
mappedpermission_pattern + r'\Z')
self._sharedwith = []
if sharedwith:
self._sharedwith = sharedwith
@property
def name(self):
return self._name
@property
def allocator_type(self):
return self._allocator_type
@property
def stackfunction_pattern(self):
return self._stackfunction_pattern
@property
def stacksourcefile_pattern(self):
return self._stacksourcefile_pattern
@property
def typeinfo_pattern(self):
return self._typeinfo_pattern
@property
def mappedpathname_pattern(self):
return self._mappedpathname_pattern
@property
def mappedpermission_pattern(self):
return self._mappedpermission_pattern
@property
def sharedwith(self):
return self._sharedwith
class Policy(object):
"""Represents a policy, a content of a policy file."""
def __init__(self, rules, version, components):
self._rules = rules
self._version = version
self._components = components
@property
def rules(self):
return self._rules
@property
def version(self):
return self._version
@property
def components(self):
return self._components
def find_rule(self, component_name):
"""Finds a rule whose name is |component_name|. """
for rule in self._rules:
if rule.name == component_name:
return rule
return None
def find_malloc(self, bucket):
"""Finds a matching component name which a given |bucket| belongs to.
Args:
bucket: A Bucket object to be searched for.
Returns:
A string representing a component name.
"""
assert not bucket or bucket.allocator_type == 'malloc'
if not bucket:
return 'no-bucket'
if bucket.component_cache:
return bucket.component_cache
stackfunction = bucket.symbolized_joined_stackfunction
stacksourcefile = bucket.symbolized_joined_stacksourcefile
typeinfo = bucket.symbolized_typeinfo
if typeinfo.startswith('0x'):
typeinfo = bucket.typeinfo_name
for rule in self._rules:
if (rule.allocator_type == 'malloc' and
(not rule.stackfunction_pattern or
rule.stackfunction_pattern.match(stackfunction)) and
(not rule.stacksourcefile_pattern or
rule.stacksourcefile_pattern.match(stacksourcefile)) and
(not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))):
bucket.component_cache = rule.name
return rule.name
assert False
def find_mmap(self, region, bucket_set,
pageframe=None, group_pfn_counts=None):
"""Finds a matching component which a given mmap |region| belongs to.
It uses |bucket_set| to match with backtraces. If |pageframe| is given,
it considers memory sharing among processes.
NOTE: Don't use Bucket's |component_cache| for mmap regions because they're
classified not only with bucket information (mappedpathname for example).
Args:
region: A tuple representing a memory region.
bucket_set: A BucketSet object to look up backtraces.
pageframe: A PageFrame object representing a pageframe maybe including
a pagecount.
group_pfn_counts: A dict mapping a PFN to the number of times the
the pageframe is mapped by the known "group (Chrome)" processes.
Returns:
A string representing a component name.
"""
assert region[0] == 'hooked'
bucket = bucket_set.get(region[1]['bucket_id'])
assert not bucket or bucket.allocator_type == 'mmap'
if not bucket:
return 'no-bucket', None
stackfunction = bucket.symbolized_joined_stackfunction
stacksourcefile = bucket.symbolized_joined_stacksourcefile
sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
for rule in self._rules:
if (rule.allocator_type == 'mmap' and
(not rule.stackfunction_pattern or
rule.stackfunction_pattern.match(stackfunction)) and
(not rule.stacksourcefile_pattern or
rule.stacksourcefile_pattern.match(stacksourcefile)) and
(not rule.mappedpathname_pattern or
rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
(not rule.mappedpermission_pattern or
rule.mappedpermission_pattern.match(
region[1]['vma']['readable'] +
region[1]['vma']['writable'] +
region[1]['vma']['executable'] +
region[1]['vma']['private'])) and
(not rule.sharedwith or
not pageframe or sharedwith in rule.sharedwith)):
return rule.name, bucket
assert False
def find_unhooked(self, region, pageframe=None, group_pfn_counts=None):
"""Finds a matching component which a given unhooked |region| belongs to.
If |pageframe| is given, it considers memory sharing among processes.
Args:
region: A tuple representing a memory region.
pageframe: A PageFrame object representing a pageframe maybe including
a pagecount.
group_pfn_counts: A dict mapping a PFN to the number of times the
the pageframe is mapped by the known "group (Chrome)" processes.
Returns:
A string representing a component name.
"""
assert region[0] == 'unhooked'
sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts)
for rule in self._rules:
if (rule.allocator_type == 'unhooked' and
(not rule.mappedpathname_pattern or
rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and
(not rule.mappedpermission_pattern or
rule.mappedpermission_pattern.match(
region[1]['vma']['readable'] +
region[1]['vma']['writable'] +
region[1]['vma']['executable'] +
region[1]['vma']['private'])) and
(not rule.sharedwith or
not pageframe or sharedwith in rule.sharedwith)):
return rule.name
assert False
@staticmethod
def load(filename, filetype):
"""Loads a policy file of |filename| in a |format|.
Args:
filename: A filename to be loaded.
filetype: A string to specify a type of the file. Only 'json' is
supported for now.
Returns:
A loaded Policy object.
"""
with open(os.path.join(BASE_PATH, filename)) as policy_f:
return Policy.parse(policy_f, filetype)
@staticmethod
def parse(policy_f, filetype):
"""Parses a policy file content in a |format|.
Args:
policy_f: An IO object to be loaded.
filetype: A string to specify a type of the file. Only 'json' is
supported for now.
Returns:
A loaded Policy object.
"""
if filetype == 'json':
return Policy._parse_json(policy_f)
else:
return None
JSON_COMMENT_REGEX = re.compile(r'//.*')
@staticmethod
def _parse_json(policy_f):
"""Parses policy file in json format.
A policy file contains component's names and their stacktrace pattern
written in regular expression. Those patterns are matched against each
symbols of each stacktraces in the order written in the policy file
Args:
policy_f: A File/IO object to read.
Returns:
A loaded policy object.
"""
policy_json = policy_f.read()
policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json)
policy = json.loads(policy_json)
rules = []
for rule in policy['rules']:
stackfunction = rule.get('stackfunction') or rule.get('stacktrace')
stacksourcefile = rule.get('stacksourcefile')
rules.append(Rule(
rule['name'],
rule['allocator'], # allocator_type
stackfunction,
stacksourcefile,
rule['typeinfo'] if 'typeinfo' in rule else None,
rule.get('mappedpathname'),
rule.get('mappedpermission'),
rule.get('sharedwith')))
return Policy(rules, policy['version'], policy['components'])
@staticmethod
def _categorize_pageframe(pageframe, group_pfn_counts):
"""Categorizes a pageframe based on its sharing status.
Returns:
'private' if |pageframe| is not shared with other processes. 'group'
if |pageframe| is shared only with group (Chrome-related) processes.
'others' if |pageframe| is shared with non-group processes.
"""
if not pageframe:
return 'private'
if pageframe.pagecount:
if pageframe.pagecount == 1:
return 'private'
elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1:
return 'group'
else:
return 'others'
else:
if pageframe.pfn in group_pfn_counts:
return 'group'
else:
return 'private'
class PolicySet(object):
"""Represents a set of policies."""
def __init__(self, policy_directory):
self._policy_directory = policy_directory
@staticmethod
def load(labels=None):
"""Loads a set of policies via the "default policy directory".
The "default policy directory" contains pairs of policies and their labels.
For example, a policy "policy.l0.json" is labeled "l0" in the default
policy directory "policies.json".
All policies in the directory are loaded by default. Policies can be
limited by |labels|.
Args:
labels: An array that contains policy labels to be loaded.
Returns:
A PolicySet object.
"""
default_policy_directory = PolicySet._load_default_policy_directory()
if labels:
specified_policy_directory = {}
for label in labels:
if label in default_policy_directory:
specified_policy_directory[label] = default_policy_directory[label]
# TODO(dmikurube): Load an un-labeled policy file.
return PolicySet._load_policies(specified_policy_directory)
else:
return PolicySet._load_policies(default_policy_directory)
def __len__(self):
return len(self._policy_directory)
def __iter__(self):
for label in self._policy_directory:
yield label
def __getitem__(self, label):
return self._policy_directory[label]
@staticmethod
def _load_default_policy_directory():
with open(POLICIES_JSON_PATH, mode='r') as policies_f:
default_policy_directory = json.load(policies_f)
return default_policy_directory
@staticmethod
def _load_policies(directory):
LOGGER.info('Loading policy files.')
policies = {}
for label in directory:
LOGGER.info(' %s: %s' % (label, directory[label]['file']))
loaded = Policy.load(directory[label]['file'], directory[label]['format'])
if loaded:
policies[label] = loaded
return PolicySet(policies)