| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| import json |
| import logging |
| import os |
| import re |
| |
| |
| LOGGER = logging.getLogger('dmprof') |
| |
| BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
| POLICIES_JSON_PATH = os.path.join(BASE_PATH, 'policies.json') |
| |
| # Heap Profile Policy versions |
| |
| # POLICY_DEEP_1 DOES NOT include allocation_type columns. |
| # mmap regions are distincted w/ mmap frames in the pattern column. |
| POLICY_DEEP_1 = 'POLICY_DEEP_1' |
| |
| # POLICY_DEEP_2 DOES include allocation_type columns. |
| # mmap regions are distincted w/ the allocation_type column. |
| POLICY_DEEP_2 = 'POLICY_DEEP_2' |
| |
| # POLICY_DEEP_3 is in JSON format. |
| POLICY_DEEP_3 = 'POLICY_DEEP_3' |
| |
| # POLICY_DEEP_3 contains typeinfo. |
| POLICY_DEEP_4 = 'POLICY_DEEP_4' |
| |
| |
| class Rule(object): |
| """Represents one matching rule in a policy file.""" |
| |
| def __init__(self, |
| name, |
| allocator_type, |
| stackfunction_pattern=None, |
| stacksourcefile_pattern=None, |
| typeinfo_pattern=None, |
| mappedpathname_pattern=None, |
| mappedpermission_pattern=None, |
| sharedwith=None): |
| self._name = name |
| self._allocator_type = allocator_type |
| |
| self._stackfunction_pattern = None |
| if stackfunction_pattern: |
| self._stackfunction_pattern = re.compile( |
| stackfunction_pattern + r'\Z') |
| |
| self._stacksourcefile_pattern = None |
| if stacksourcefile_pattern: |
| self._stacksourcefile_pattern = re.compile( |
| stacksourcefile_pattern + r'\Z') |
| |
| self._typeinfo_pattern = None |
| if typeinfo_pattern: |
| self._typeinfo_pattern = re.compile(typeinfo_pattern + r'\Z') |
| |
| self._mappedpathname_pattern = None |
| if mappedpathname_pattern: |
| self._mappedpathname_pattern = re.compile(mappedpathname_pattern + r'\Z') |
| |
| self._mappedpermission_pattern = None |
| if mappedpermission_pattern: |
| self._mappedpermission_pattern = re.compile( |
| mappedpermission_pattern + r'\Z') |
| |
| self._sharedwith = [] |
| if sharedwith: |
| self._sharedwith = sharedwith |
| |
| @property |
| def name(self): |
| return self._name |
| |
| @property |
| def allocator_type(self): |
| return self._allocator_type |
| |
| @property |
| def stackfunction_pattern(self): |
| return self._stackfunction_pattern |
| |
| @property |
| def stacksourcefile_pattern(self): |
| return self._stacksourcefile_pattern |
| |
| @property |
| def typeinfo_pattern(self): |
| return self._typeinfo_pattern |
| |
| @property |
| def mappedpathname_pattern(self): |
| return self._mappedpathname_pattern |
| |
| @property |
| def mappedpermission_pattern(self): |
| return self._mappedpermission_pattern |
| |
| @property |
| def sharedwith(self): |
| return self._sharedwith |
| |
| |
| class Policy(object): |
| """Represents a policy, a content of a policy file.""" |
| |
| def __init__(self, rules, version, components): |
| self._rules = rules |
| self._version = version |
| self._components = components |
| |
| @property |
| def rules(self): |
| return self._rules |
| |
| @property |
| def version(self): |
| return self._version |
| |
| @property |
| def components(self): |
| return self._components |
| |
| def find_rule(self, component_name): |
| """Finds a rule whose name is |component_name|. """ |
| for rule in self._rules: |
| if rule.name == component_name: |
| return rule |
| return None |
| |
| def find_malloc(self, bucket): |
| """Finds a matching component name which a given |bucket| belongs to. |
| |
| Args: |
| bucket: A Bucket object to be searched for. |
| |
| Returns: |
| A string representing a component name. |
| """ |
| assert not bucket or bucket.allocator_type == 'malloc' |
| |
| if not bucket: |
| return 'no-bucket' |
| if bucket.component_cache: |
| return bucket.component_cache |
| |
| stackfunction = bucket.symbolized_joined_stackfunction |
| stacksourcefile = bucket.symbolized_joined_stacksourcefile |
| typeinfo = bucket.symbolized_typeinfo |
| if typeinfo.startswith('0x'): |
| typeinfo = bucket.typeinfo_name |
| |
| for rule in self._rules: |
| if (rule.allocator_type == 'malloc' and |
| (not rule.stackfunction_pattern or |
| rule.stackfunction_pattern.match(stackfunction)) and |
| (not rule.stacksourcefile_pattern or |
| rule.stacksourcefile_pattern.match(stacksourcefile)) and |
| (not rule.typeinfo_pattern or rule.typeinfo_pattern.match(typeinfo))): |
| bucket.component_cache = rule.name |
| return rule.name |
| |
| assert False |
| |
| def find_mmap(self, region, bucket_set, |
| pageframe=None, group_pfn_counts=None): |
| """Finds a matching component which a given mmap |region| belongs to. |
| |
| It uses |bucket_set| to match with backtraces. If |pageframe| is given, |
| it considers memory sharing among processes. |
| |
| NOTE: Don't use Bucket's |component_cache| for mmap regions because they're |
| classified not only with bucket information (mappedpathname for example). |
| |
| Args: |
| region: A tuple representing a memory region. |
| bucket_set: A BucketSet object to look up backtraces. |
| pageframe: A PageFrame object representing a pageframe maybe including |
| a pagecount. |
| group_pfn_counts: A dict mapping a PFN to the number of times the |
| the pageframe is mapped by the known "group (Chrome)" processes. |
| |
| Returns: |
| A string representing a component name. |
| """ |
| assert region[0] == 'hooked' |
| bucket = bucket_set.get(region[1]['bucket_id']) |
| assert not bucket or bucket.allocator_type == 'mmap' |
| |
| if not bucket: |
| return 'no-bucket', None |
| |
| stackfunction = bucket.symbolized_joined_stackfunction |
| stacksourcefile = bucket.symbolized_joined_stacksourcefile |
| sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) |
| |
| for rule in self._rules: |
| if (rule.allocator_type == 'mmap' and |
| (not rule.stackfunction_pattern or |
| rule.stackfunction_pattern.match(stackfunction)) and |
| (not rule.stacksourcefile_pattern or |
| rule.stacksourcefile_pattern.match(stacksourcefile)) and |
| (not rule.mappedpathname_pattern or |
| rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and |
| (not rule.mappedpermission_pattern or |
| rule.mappedpermission_pattern.match( |
| region[1]['vma']['readable'] + |
| region[1]['vma']['writable'] + |
| region[1]['vma']['executable'] + |
| region[1]['vma']['private'])) and |
| (not rule.sharedwith or |
| not pageframe or sharedwith in rule.sharedwith)): |
| return rule.name, bucket |
| |
| assert False |
| |
| def find_unhooked(self, region, pageframe=None, group_pfn_counts=None): |
| """Finds a matching component which a given unhooked |region| belongs to. |
| |
| If |pageframe| is given, it considers memory sharing among processes. |
| |
| Args: |
| region: A tuple representing a memory region. |
| pageframe: A PageFrame object representing a pageframe maybe including |
| a pagecount. |
| group_pfn_counts: A dict mapping a PFN to the number of times the |
| the pageframe is mapped by the known "group (Chrome)" processes. |
| |
| Returns: |
| A string representing a component name. |
| """ |
| assert region[0] == 'unhooked' |
| sharedwith = self._categorize_pageframe(pageframe, group_pfn_counts) |
| |
| for rule in self._rules: |
| if (rule.allocator_type == 'unhooked' and |
| (not rule.mappedpathname_pattern or |
| rule.mappedpathname_pattern.match(region[1]['vma']['name'])) and |
| (not rule.mappedpermission_pattern or |
| rule.mappedpermission_pattern.match( |
| region[1]['vma']['readable'] + |
| region[1]['vma']['writable'] + |
| region[1]['vma']['executable'] + |
| region[1]['vma']['private'])) and |
| (not rule.sharedwith or |
| not pageframe or sharedwith in rule.sharedwith)): |
| return rule.name |
| |
| assert False |
| |
| @staticmethod |
| def load(filename, filetype): |
| """Loads a policy file of |filename| in a |format|. |
| |
| Args: |
| filename: A filename to be loaded. |
| filetype: A string to specify a type of the file. Only 'json' is |
| supported for now. |
| |
| Returns: |
| A loaded Policy object. |
| """ |
| with open(os.path.join(BASE_PATH, filename)) as policy_f: |
| return Policy.parse(policy_f, filetype) |
| |
| @staticmethod |
| def parse(policy_f, filetype): |
| """Parses a policy file content in a |format|. |
| |
| Args: |
| policy_f: An IO object to be loaded. |
| filetype: A string to specify a type of the file. Only 'json' is |
| supported for now. |
| |
| Returns: |
| A loaded Policy object. |
| """ |
| if filetype == 'json': |
| return Policy._parse_json(policy_f) |
| else: |
| return None |
| |
| JSON_COMMENT_REGEX = re.compile(r'//.*') |
| |
| @staticmethod |
| def _parse_json(policy_f): |
| """Parses policy file in json format. |
| |
| A policy file contains component's names and their stacktrace pattern |
| written in regular expression. Those patterns are matched against each |
| symbols of each stacktraces in the order written in the policy file |
| |
| Args: |
| policy_f: A File/IO object to read. |
| |
| Returns: |
| A loaded policy object. |
| """ |
| policy_json = policy_f.read() |
| policy_json = re.sub(Policy.JSON_COMMENT_REGEX, '', policy_json) |
| policy = json.loads(policy_json) |
| |
| rules = [] |
| for rule in policy['rules']: |
| stackfunction = rule.get('stackfunction') or rule.get('stacktrace') |
| stacksourcefile = rule.get('stacksourcefile') |
| rules.append(Rule( |
| rule['name'], |
| rule['allocator'], # allocator_type |
| stackfunction, |
| stacksourcefile, |
| rule['typeinfo'] if 'typeinfo' in rule else None, |
| rule.get('mappedpathname'), |
| rule.get('mappedpermission'), |
| rule.get('sharedwith'))) |
| |
| return Policy(rules, policy['version'], policy['components']) |
| |
| @staticmethod |
| def _categorize_pageframe(pageframe, group_pfn_counts): |
| """Categorizes a pageframe based on its sharing status. |
| |
| Returns: |
| 'private' if |pageframe| is not shared with other processes. 'group' |
| if |pageframe| is shared only with group (Chrome-related) processes. |
| 'others' if |pageframe| is shared with non-group processes. |
| """ |
| if not pageframe: |
| return 'private' |
| |
| if pageframe.pagecount: |
| if pageframe.pagecount == 1: |
| return 'private' |
| elif pageframe.pagecount <= group_pfn_counts.get(pageframe.pfn, 0) + 1: |
| return 'group' |
| else: |
| return 'others' |
| else: |
| if pageframe.pfn in group_pfn_counts: |
| return 'group' |
| else: |
| return 'private' |
| |
| |
| class PolicySet(object): |
| """Represents a set of policies.""" |
| |
| def __init__(self, policy_directory): |
| self._policy_directory = policy_directory |
| |
| @staticmethod |
| def load(labels=None): |
| """Loads a set of policies via the "default policy directory". |
| |
| The "default policy directory" contains pairs of policies and their labels. |
| For example, a policy "policy.l0.json" is labeled "l0" in the default |
| policy directory "policies.json". |
| |
| All policies in the directory are loaded by default. Policies can be |
| limited by |labels|. |
| |
| Args: |
| labels: An array that contains policy labels to be loaded. |
| |
| Returns: |
| A PolicySet object. |
| """ |
| default_policy_directory = PolicySet._load_default_policy_directory() |
| if labels: |
| specified_policy_directory = {} |
| for label in labels: |
| if label in default_policy_directory: |
| specified_policy_directory[label] = default_policy_directory[label] |
| # TODO(dmikurube): Load an un-labeled policy file. |
| return PolicySet._load_policies(specified_policy_directory) |
| else: |
| return PolicySet._load_policies(default_policy_directory) |
| |
| def __len__(self): |
| return len(self._policy_directory) |
| |
| def __iter__(self): |
| for label in self._policy_directory: |
| yield label |
| |
| def __getitem__(self, label): |
| return self._policy_directory[label] |
| |
| @staticmethod |
| def _load_default_policy_directory(): |
| with open(POLICIES_JSON_PATH, mode='r') as policies_f: |
| default_policy_directory = json.load(policies_f) |
| return default_policy_directory |
| |
| @staticmethod |
| def _load_policies(directory): |
| LOGGER.info('Loading policy files.') |
| policies = {} |
| for label in directory: |
| LOGGER.info(' %s: %s' % (label, directory[label]['file'])) |
| loaded = Policy.load(directory[label]['file'], directory[label]['format']) |
| if loaded: |
| policies[label] = loaded |
| return PolicySet(policies) |