| #!/usr/bin/env python |
| # Copyright 2013 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| # A script to accumulate values from the 'dmprof cat' command into CSV or else. |
| # |
| # Usage: |
| # ./accumulate.py -f <format> -t <template-name> < input.json > output |
| # |
| # <format> is one of "csv", "json", and "tree". If "csv" or "json" is given, |
| # accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given, |
| # accumulate.py dumps a human-readable breakdown tree. |
| # |
| # <template-name> is a label in templates.json. |
| |
| import datetime |
| import json |
| import logging |
| import optparse |
| import sys |
| |
| from lib.ordered_dict import OrderedDict |
| |
| |
| LOGGER = logging.getLogger('dmprof-accumulate') |
| |
| |
| def visit_in_template(template, snapshot, depth): |
| """Visits all categories via a given template. |
| |
| This function is not used. It's a sample function to traverse a template. |
| """ |
| world = template[0] |
| breakdown = template[1] |
| rules = template[2] |
| |
| for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems(): |
| print (' ' * depth) + rule |
| if rule in rules: |
| visit_in_template(rules[rule], snapshot, depth + 1) |
| |
| |
| def accumulate(template, snapshot, units_dict, target_units): |
| """Accumulates units in a JSON |snapshot| with applying a given |template|. |
| |
| Args: |
| template: A template tree included in a dmprof cat JSON file. |
| snapshot: A snapshot in a dmprof cat JSON file. |
| units_dict: A dict of units in worlds. |
| target_units: A list of unit ids which are a target of this accumulation. |
| """ |
| world = template[0] |
| breakdown = template[1] |
| rules = template[2] |
| |
| remainder_units = target_units.copy() |
| category_tree = OrderedDict() |
| total = 0 |
| |
| for rule, match in snapshot[world]['breakdown'][breakdown].iteritems(): |
| if 'hidden' in match and match['hidden']: |
| continue |
| matched_units = set(match['units']).intersection(target_units) |
| subtotal = 0 |
| for unit_id in matched_units: |
| subtotal += units_dict[world][unit_id] |
| total += subtotal |
| remainder_units = remainder_units.difference(matched_units) |
| if rule not in rules: |
| # A category matched with |rule| is a leaf of the breakdown tree. |
| # It is NOT broken down more. |
| category_tree[rule] = subtotal |
| continue |
| |
| # A category matched with |rule| is broken down more. |
| subtemplate = rules[rule] |
| subworld = subtemplate[0] |
| subbreakdown = subtemplate[1] |
| |
| if subworld == world: |
| # Break down in the same world: consider units. |
| category_tree[rule], accounted_total, subremainder_units = accumulate( |
| subtemplate, snapshot, units_dict, matched_units) |
| subremainder_total = 0 |
| if subremainder_units: |
| for unit_id in subremainder_units: |
| subremainder_total += units_dict[world][unit_id] |
| category_tree[rule][None] = subremainder_total |
| if subtotal != accounted_total + subremainder_total: |
| print >> sys.stderr, ( |
| 'WARNING: Sum of %s:%s is different from %s by %d bytes.' % ( |
| subworld, subbreakdown, rule, |
| subtotal - (accounted_total + subremainder_total))) |
| else: |
| # Break down in a different world: consider only the total size. |
| category_tree[rule], accounted_total, _ = accumulate( |
| subtemplate, snapshot, units_dict, set(units_dict[subworld].keys())) |
| if subtotal >= accounted_total: |
| category_tree[rule][None] = subtotal - accounted_total |
| else: |
| print >> sys.stderr, ( |
| 'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % ( |
| subworld, subbreakdown, rule, accounted_total - subtotal)) |
| print >> sys.stderr, ( |
| 'WARNING: Assuming remainder of %s is 0.' % rule) |
| category_tree[rule][None] = 0 |
| |
| return category_tree, total, remainder_units |
| |
| |
| def flatten(category_tree, header=''): |
| """Flattens a category tree into a flat list.""" |
| result = [] |
| for rule, sub in category_tree.iteritems(): |
| if not rule: |
| rule = 'remaining' |
| if header: |
| flattened_rule = header + '>' + rule |
| else: |
| flattened_rule = rule |
| if isinstance(sub, dict) or isinstance(sub, OrderedDict): |
| result.extend(flatten(sub, flattened_rule)) |
| else: |
| result.append((flattened_rule, sub)) |
| return result |
| |
| |
| def print_category_tree(category_tree, output, depth=0): |
| """Prints a category tree in a human-readable format.""" |
| for label in category_tree: |
| print >> output, (' ' * depth), |
| if (isinstance(category_tree[label], dict) or |
| isinstance(category_tree[label], OrderedDict)): |
| print >> output, '%s:' % label |
| print_category_tree(category_tree[label], output, depth + 1) |
| else: |
| print >> output, '%s: %d' % (label, category_tree[label]) |
| |
| |
| def flatten_all_category_trees(category_trees): |
| flattened_labels = set() |
| flattened_table = [] |
| for category_tree in category_trees: |
| flattened = OrderedDict() |
| for label, subtotal in flatten(category_tree): |
| flattened_labels.add(label) |
| flattened[label] = subtotal |
| flattened_table.append(flattened) |
| return flattened_labels, flattened_table |
| |
| |
| def output_csv(output, category_trees, data, first_time, output_exponent): |
| flattened_labels, flattened_table = flatten_all_category_trees(category_trees) |
| |
| sorted_flattened_labels = sorted(flattened_labels) |
| print >> output, ','.join(['second'] + sorted_flattened_labels) |
| for index, row in enumerate(flattened_table): |
| values = [str(data['snapshots'][index]['time'] - first_time)] |
| for label in sorted_flattened_labels: |
| if label in row: |
| divisor = 1 |
| if output_exponent.upper() == 'K': |
| divisor = 1024.0 |
| elif output_exponent.upper() == 'M': |
| divisor = 1024.0 * 1024.0 |
| values.append(str(row[label] / divisor)) |
| else: |
| values.append('0') |
| print >> output, ','.join(values) |
| |
| |
| def output_json(output, category_trees, data, first_time, template_label): |
| flattened_labels, flattened_table = flatten_all_category_trees(category_trees) |
| |
| json_snapshots = [] |
| for index, row in enumerate(flattened_table): |
| row_with_meta = row.copy() |
| row_with_meta['second'] = data['snapshots'][index]['time'] - first_time |
| row_with_meta['dump_time'] = datetime.datetime.fromtimestamp( |
| data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S') |
| json_snapshots.append(row_with_meta) |
| json_root = { |
| 'version': 'JSON_DEEP_2', |
| 'policies': { |
| template_label: { |
| 'legends': sorted(flattened_labels), |
| 'snapshots': json_snapshots |
| } |
| } |
| } |
| json.dump(json_root, output, indent=2, sort_keys=True) |
| |
| |
| def output_tree(output, category_trees): |
| for index, category_tree in enumerate(category_trees): |
| print >> output, '< Snapshot #%d >' % index |
| print_category_tree(category_tree, output, 1) |
| print >> output, '' |
| |
| |
| def do_main(cat_input, output, template_label, output_format, output_exponent): |
| """Does the main work: accumulate for every snapshot and print a result.""" |
| if output_format not in ['csv', 'json', 'tree']: |
| raise NotImplementedError('The output format \"%s\" is not implemented.' % |
| output_format) |
| |
| if output_exponent.upper() not in ['B', 'K', 'M']: |
| raise NotImplementedError('The exponent \"%s\" is not implemented.' % |
| output_exponent) |
| |
| data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict) |
| |
| templates = data['templates'] |
| if not template_label: |
| template_label = data['default_template'] |
| if template_label not in templates: |
| LOGGER.error('A template \'%s\' is not found.' % template_label) |
| return |
| template = templates[template_label] |
| |
| category_trees = [] |
| first_time = None |
| |
| for snapshot in data['snapshots']: |
| if not first_time: |
| first_time = snapshot['time'] |
| |
| units = {} |
| for world_name in snapshot['worlds']: |
| world_units = {} |
| for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems(): |
| world_units[int(unit_id)] = sizes[0] |
| units[world_name] = world_units |
| |
| category_tree, _, _ = accumulate( |
| template, snapshot['worlds'], units, set(units[template[0]].keys())) |
| category_trees.append(category_tree) |
| |
| if output_format == 'csv': |
| output_csv(output, category_trees, data, first_time, output_exponent) |
| elif output_format == 'json': |
| output_json(output, category_trees, data, first_time, template_label) |
| elif output_format == 'tree': |
| output_tree(output, category_trees) |
| |
| |
| def main(): |
| LOGGER.setLevel(logging.DEBUG) |
| handler = logging.StreamHandler() |
| handler.setLevel(logging.INFO) |
| formatter = logging.Formatter('%(message)s') |
| handler.setFormatter(formatter) |
| LOGGER.addHandler(handler) |
| |
| parser = optparse.OptionParser() |
| parser.add_option('-t', '--template', dest='template', |
| metavar='TEMPLATE', |
| help='Apply TEMPLATE to list up.') |
| parser.add_option('-f', '--format', dest='format', default='csv', |
| help='Specify the output format: csv, json or tree.') |
| parser.add_option('-e', '--exponent', dest='exponent', default='M', |
| help='Specify B (bytes), K (kilobytes) or M (megabytes).') |
| |
| options, _ = parser.parse_args(sys.argv) |
| do_main(sys.stdin, sys.stdout, |
| options.template, options.format, options.exponent) |
| |
| |
| if __name__ == '__main__': |
| sys.exit(main()) |