tools/deep_memory_profiler/accumulate.py - platform/external/chromium_org - Git at Google

 #!/usr/bin/env python
 # Copyright 2013 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 # A script to accumulate values from the 'dmprof cat' command into CSV or else.
 #
 # Usage:
 #   ./accumulate.py -f <format> -t <template-name> < input.json > output
 #
 # <format> is one of "csv", "json", and "tree". If "csv" or "json" is given,
 # accumulate.py dumps a similar file to "dmprof csv|json". If "tree" is given,
 # accumulate.py dumps a human-readable breakdown tree.
 #
 # <template-name> is a label in templates.json.

 import datetime
 import json
 import logging
 import optparse
 import sys

 from lib.ordered_dict import OrderedDict


 LOGGER = logging.getLogger('dmprof-accumulate')


 def visit_in_template(template, snapshot, depth):
   """Visits all categories via a given template.

   This function is not used. It's a sample function to traverse a template.
   """
   world = template[0]
   breakdown = template[1]
   rules = template[2]

   for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems():
     print ('  ' * depth) + rule
     if rule in rules:
       visit_in_template(rules[rule], snapshot, depth + 1)


 def accumulate(template, snapshot, units_dict, target_units):
   """Accumulates units in a JSON |snapshot| with applying a given |template|.

   Args:
       template: A template tree included in a dmprof cat JSON file.
       snapshot: A snapshot in a dmprof cat JSON file.
       units_dict: A dict of units in worlds.
       target_units: A list of unit ids which are a target of this accumulation.
   """
   world = template[0]
   breakdown = template[1]
   rules = template[2]

   remainder_units = target_units.copy()
   category_tree = OrderedDict()
   total = 0

   for rule, match in snapshot[world]['breakdown'][breakdown].iteritems():
     if 'hidden' in match and match['hidden']:
       continue
     matched_units = set(match['units']).intersection(target_units)
     subtotal = 0
     for unit_id in matched_units:
       subtotal += units_dict[world][unit_id]
     total += subtotal
     remainder_units = remainder_units.difference(matched_units)
     if rule not in rules:
       # A category matched with |rule| is a leaf of the breakdown tree.
       # It is NOT broken down more.
       category_tree[rule] = subtotal
       continue

     # A category matched with |rule| is broken down more.
     subtemplate = rules[rule]
     subworld = subtemplate[0]
     subbreakdown = subtemplate[1]

     if subworld == world:
       # Break down in the same world: consider units.
       category_tree[rule], accounted_total, subremainder_units = accumulate(
           subtemplate, snapshot, units_dict, matched_units)
       subremainder_total = 0
       if subremainder_units:
         for unit_id in subremainder_units:
           subremainder_total += units_dict[world][unit_id]
         category_tree[rule][None] = subremainder_total
       if subtotal != accounted_total + subremainder_total:
         print >> sys.stderr, (
             'WARNING: Sum of %s:%s is different from %s by %d bytes.' % (
                 subworld, subbreakdown, rule,
                 subtotal - (accounted_total + subremainder_total)))
     else:
       # Break down in a different world: consider only the total size.
       category_tree[rule], accounted_total, _ = accumulate(
           subtemplate, snapshot, units_dict, set(units_dict[subworld].keys()))
       if subtotal >= accounted_total:
         category_tree[rule][None] = subtotal - accounted_total
       else:
         print >> sys.stderr, (
             'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % (
                 subworld, subbreakdown, rule, accounted_total - subtotal))
         print >> sys.stderr, (
             'WARNING:   Assuming remainder of %s is 0.' % rule)
         category_tree[rule][None] = 0

   return category_tree, total, remainder_units


 def flatten(category_tree, header=''):
   """Flattens a category tree into a flat list."""
   result = []
   for rule, sub in category_tree.iteritems():
     if not rule:
       rule = 'remaining'
     if header:
       flattened_rule = header + '>' + rule
     else:
       flattened_rule = rule
     if isinstance(sub, dict) or isinstance(sub, OrderedDict):
       result.extend(flatten(sub, flattened_rule))
     else:
       result.append((flattened_rule, sub))
   return result


 def print_category_tree(category_tree, output, depth=0):
   """Prints a category tree in a human-readable format."""
   for label in category_tree:
     print >> output, ('  ' * depth),
     if (isinstance(category_tree[label], dict) or
         isinstance(category_tree[label], OrderedDict)):
       print >> output, '%s:' % label
       print_category_tree(category_tree[label], output, depth + 1)
     else:
       print >> output, '%s: %d' % (label, category_tree[label])


 def flatten_all_category_trees(category_trees):
   flattened_labels = set()
   flattened_table = []
   for category_tree in category_trees:
     flattened = OrderedDict()
     for label, subtotal in flatten(category_tree):
       flattened_labels.add(label)
       flattened[label] = subtotal
     flattened_table.append(flattened)
   return flattened_labels, flattened_table


 def output_csv(output, category_trees, data, first_time, output_exponent):
   flattened_labels, flattened_table = flatten_all_category_trees(category_trees)

   sorted_flattened_labels = sorted(flattened_labels)
   print >> output, ','.join(['second'] + sorted_flattened_labels)
   for index, row in enumerate(flattened_table):
     values = [str(data['snapshots'][index]['time'] - first_time)]
     for label in sorted_flattened_labels:
       if label in row:
         divisor = 1
         if output_exponent.upper() == 'K':
           divisor = 1024.0
         elif output_exponent.upper() == 'M':
           divisor = 1024.0 * 1024.0
         values.append(str(row[label] / divisor))
       else:
         values.append('0')
     print >> output, ','.join(values)


 def output_json(output, category_trees, data, first_time, template_label):
   flattened_labels, flattened_table = flatten_all_category_trees(category_trees)

   json_snapshots = []
   for index, row in enumerate(flattened_table):
     row_with_meta = row.copy()
     row_with_meta['second'] = data['snapshots'][index]['time'] - first_time
     row_with_meta['dump_time'] = datetime.datetime.fromtimestamp(
         data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S')
     json_snapshots.append(row_with_meta)
   json_root = {
       'version': 'JSON_DEEP_2',
       'policies': {
           template_label: {
               'legends': sorted(flattened_labels),
               'snapshots': json_snapshots
               }
           }
       }
   json.dump(json_root, output, indent=2, sort_keys=True)


 def output_tree(output, category_trees):
   for index, category_tree in enumerate(category_trees):
     print >> output, '< Snapshot #%d >' % index
     print_category_tree(category_tree, output, 1)
     print >> output, ''


 def do_main(cat_input, output, template_label, output_format, output_exponent):
   """Does the main work: accumulate for every snapshot and print a result."""
   if output_format not in ['csv', 'json', 'tree']:
     raise NotImplementedError('The output format \"%s\" is not implemented.' %
                               output_format)

   if output_exponent.upper() not in ['B', 'K', 'M']:
     raise NotImplementedError('The exponent \"%s\" is not implemented.' %
                               output_exponent)

   data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict)

   templates = data['templates']
   if not template_label:
     template_label = data['default_template']
   if template_label not in templates:
     LOGGER.error('A template \'%s\' is not found.' % template_label)
     return
   template = templates[template_label]

   category_trees = []
   first_time = None

   for snapshot in data['snapshots']:
     if not first_time:
       first_time = snapshot['time']

     units = {}
     for world_name in snapshot['worlds']:
       world_units = {}
       for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems():
         world_units[int(unit_id)] = sizes[0]
       units[world_name] = world_units

     category_tree, _, _ = accumulate(
         template, snapshot['worlds'], units, set(units[template[0]].keys()))
     category_trees.append(category_tree)

   if output_format == 'csv':
     output_csv(output, category_trees, data, first_time, output_exponent)
   elif output_format == 'json':
     output_json(output, category_trees, data, first_time, template_label)
   elif output_format == 'tree':
     output_tree(output, category_trees)


 def main():
   LOGGER.setLevel(logging.DEBUG)
   handler = logging.StreamHandler()
   handler.setLevel(logging.INFO)
   formatter = logging.Formatter('%(message)s')
   handler.setFormatter(formatter)
   LOGGER.addHandler(handler)

   parser = optparse.OptionParser()
   parser.add_option('-t', '--template', dest='template',
                     metavar='TEMPLATE',
                     help='Apply TEMPLATE to list up.')
   parser.add_option('-f', '--format', dest='format', default='csv',
                     help='Specify the output format: csv, json or tree.')
   parser.add_option('-e', '--exponent', dest='exponent', default='M',
                     help='Specify B (bytes), K (kilobytes) or M (megabytes).')

   options, _ = parser.parse_args(sys.argv)
   do_main(sys.stdin, sys.stdout,
           options.template, options.format, options.exponent)


 if __name__ == '__main__':
   sys.exit(main())
	#!/usr/bin/env python
	# Copyright 2013 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	# A script to accumulate values from the 'dmprof cat' command into CSV or else.
	#
	# Usage:
	# ./accumulate.py -f <format> -t <template-name> < input.json > output
	#
	# <format> is one of "csv", "json", and "tree". If "csv" or "json" is given,
	# accumulate.py dumps a similar file to "dmprof csv\|json". If "tree" is given,
	# accumulate.py dumps a human-readable breakdown tree.
	#
	# <template-name> is a label in templates.json.

	import datetime
	import json
	import logging
	import optparse
	import sys

	from lib.ordered_dict import OrderedDict


	LOGGER = logging.getLogger('dmprof-accumulate')


	def visit_in_template(template, snapshot, depth):
	"""Visits all categories via a given template.

	This function is not used. It's a sample function to traverse a template.
	"""
	world = template[0]
	breakdown = template[1]
	rules = template[2]

	for rule, _ in snapshot[world]['breakdown'][breakdown].iteritems():
	print (' ' * depth) + rule
	if rule in rules:
	visit_in_template(rules[rule], snapshot, depth + 1)


	def accumulate(template, snapshot, units_dict, target_units):
	"""Accumulates units in a JSON \|snapshot\| with applying a given \|template\|.

	Args:
	template: A template tree included in a dmprof cat JSON file.
	snapshot: A snapshot in a dmprof cat JSON file.
	units_dict: A dict of units in worlds.
	target_units: A list of unit ids which are a target of this accumulation.
	"""
	world = template[0]
	breakdown = template[1]
	rules = template[2]

	remainder_units = target_units.copy()
	category_tree = OrderedDict()
	total = 0

	for rule, match in snapshot[world]['breakdown'][breakdown].iteritems():
	if 'hidden' in match and match['hidden']:
	continue
	matched_units = set(match['units']).intersection(target_units)
	subtotal = 0
	for unit_id in matched_units:
	subtotal += units_dict[world][unit_id]
	total += subtotal
	remainder_units = remainder_units.difference(matched_units)
	if rule not in rules:
	# A category matched with \|rule\| is a leaf of the breakdown tree.
	# It is NOT broken down more.
	category_tree[rule] = subtotal
	continue

	# A category matched with \|rule\| is broken down more.
	subtemplate = rules[rule]
	subworld = subtemplate[0]
	subbreakdown = subtemplate[1]

	if subworld == world:
	# Break down in the same world: consider units.
	category_tree[rule], accounted_total, subremainder_units = accumulate(
	subtemplate, snapshot, units_dict, matched_units)
	subremainder_total = 0
	if subremainder_units:
	for unit_id in subremainder_units:
	subremainder_total += units_dict[world][unit_id]
	category_tree[rule][None] = subremainder_total
	if subtotal != accounted_total + subremainder_total:
	print >> sys.stderr, (
	'WARNING: Sum of %s:%s is different from %s by %d bytes.' % (
	subworld, subbreakdown, rule,
	subtotal - (accounted_total + subremainder_total)))
	else:
	# Break down in a different world: consider only the total size.
	category_tree[rule], accounted_total, _ = accumulate(
	subtemplate, snapshot, units_dict, set(units_dict[subworld].keys()))
	if subtotal >= accounted_total:
	category_tree[rule][None] = subtotal - accounted_total
	else:
	print >> sys.stderr, (
	'WARNING: Sum of %s:%s is larger than %s by %d bytes.' % (
	subworld, subbreakdown, rule, accounted_total - subtotal))
	print >> sys.stderr, (
	'WARNING: Assuming remainder of %s is 0.' % rule)
	category_tree[rule][None] = 0

	return category_tree, total, remainder_units


	def flatten(category_tree, header=''):
	"""Flattens a category tree into a flat list."""
	result = []
	for rule, sub in category_tree.iteritems():
	if not rule:
	rule = 'remaining'
	if header:
	flattened_rule = header + '>' + rule
	else:
	flattened_rule = rule
	if isinstance(sub, dict) or isinstance(sub, OrderedDict):
	result.extend(flatten(sub, flattened_rule))
	else:
	result.append((flattened_rule, sub))
	return result


	def print_category_tree(category_tree, output, depth=0):
	"""Prints a category tree in a human-readable format."""
	for label in category_tree:
	print >> output, (' ' * depth),
	if (isinstance(category_tree[label], dict) or
	isinstance(category_tree[label], OrderedDict)):
	print >> output, '%s:' % label
	print_category_tree(category_tree[label], output, depth + 1)
	else:
	print >> output, '%s: %d' % (label, category_tree[label])


	def flatten_all_category_trees(category_trees):
	flattened_labels = set()
	flattened_table = []
	for category_tree in category_trees:
	flattened = OrderedDict()
	for label, subtotal in flatten(category_tree):
	flattened_labels.add(label)
	flattened[label] = subtotal
	flattened_table.append(flattened)
	return flattened_labels, flattened_table


	def output_csv(output, category_trees, data, first_time, output_exponent):
	flattened_labels, flattened_table = flatten_all_category_trees(category_trees)

	sorted_flattened_labels = sorted(flattened_labels)
	print >> output, ','.join(['second'] + sorted_flattened_labels)
	for index, row in enumerate(flattened_table):
	values = [str(data['snapshots'][index]['time'] - first_time)]
	for label in sorted_flattened_labels:
	if label in row:
	divisor = 1
	if output_exponent.upper() == 'K':
	divisor = 1024.0
	elif output_exponent.upper() == 'M':
	divisor = 1024.0 * 1024.0
	values.append(str(row[label] / divisor))
	else:
	values.append('0')
	print >> output, ','.join(values)


	def output_json(output, category_trees, data, first_time, template_label):
	flattened_labels, flattened_table = flatten_all_category_trees(category_trees)

	json_snapshots = []
	for index, row in enumerate(flattened_table):
	row_with_meta = row.copy()
	row_with_meta['second'] = data['snapshots'][index]['time'] - first_time
	row_with_meta['dump_time'] = datetime.datetime.fromtimestamp(
	data['snapshots'][index]['time']).strftime('%Y-%m-%d %H:%M:%S')
	json_snapshots.append(row_with_meta)
	json_root = {
	'version': 'JSON_DEEP_2',
	'policies': {
	template_label: {
	'legends': sorted(flattened_labels),
	'snapshots': json_snapshots
	}
	}
	}
	json.dump(json_root, output, indent=2, sort_keys=True)


	def output_tree(output, category_trees):
	for index, category_tree in enumerate(category_trees):
	print >> output, '< Snapshot #%d >' % index
	print_category_tree(category_tree, output, 1)
	print >> output, ''


	def do_main(cat_input, output, template_label, output_format, output_exponent):
	"""Does the main work: accumulate for every snapshot and print a result."""
	if output_format not in ['csv', 'json', 'tree']:
	raise NotImplementedError('The output format \"%s\" is not implemented.' %
	output_format)

	if output_exponent.upper() not in ['B', 'K', 'M']:
	raise NotImplementedError('The exponent \"%s\" is not implemented.' %
	output_exponent)

	data = json.loads(cat_input.read(), object_pairs_hook=OrderedDict)

	templates = data['templates']
	if not template_label:
	template_label = data['default_template']
	if template_label not in templates:
	LOGGER.error('A template \'%s\' is not found.' % template_label)
	return
	template = templates[template_label]

	category_trees = []
	first_time = None

	for snapshot in data['snapshots']:
	if not first_time:
	first_time = snapshot['time']

	units = {}
	for world_name in snapshot['worlds']:
	world_units = {}
	for unit_id, sizes in snapshot['worlds'][world_name]['units'].iteritems():
	world_units[int(unit_id)] = sizes[0]
	units[world_name] = world_units

	category_tree, _, _ = accumulate(
	template, snapshot['worlds'], units, set(units[template[0]].keys()))
	category_trees.append(category_tree)

	if output_format == 'csv':
	output_csv(output, category_trees, data, first_time, output_exponent)
	elif output_format == 'json':
	output_json(output, category_trees, data, first_time, template_label)
	elif output_format == 'tree':
	output_tree(output, category_trees)


	def main():
	LOGGER.setLevel(logging.DEBUG)
	handler = logging.StreamHandler()
	handler.setLevel(logging.INFO)
	formatter = logging.Formatter('%(message)s')
	handler.setFormatter(formatter)
	LOGGER.addHandler(handler)

	parser = optparse.OptionParser()
	parser.add_option('-t', '--template', dest='template',
	metavar='TEMPLATE',
	help='Apply TEMPLATE to list up.')
	parser.add_option('-f', '--format', dest='format', default='csv',
	help='Specify the output format: csv, json or tree.')
	parser.add_option('-e', '--exponent', dest='exponent', default='M',
	help='Specify B (bytes), K (kilobytes) or M (megabytes).')

	options, _ = parser.parse_args(sys.argv)
	do_main(sys.stdin, sys.stdout,
	options.template, options.format, options.exponent)


	if __name__ == '__main__':
	sys.exit(main())