user_activity_benchmarks/benchmark_metrics.py - platform/external/toolchain-utils - Git at Google

 # Copyright 2016 The Chromium OS Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.
 """Computes the metrics for functions, Chrome OS components and benchmarks."""

 from collections import defaultdict


 def ComputeDistanceForFunction(child_functions_statistics_sample,
                                child_functions_statistics_reference):
   """Computes the distance metric for a function.

   Args:
     child_functions_statistics_sample: A dict that has as a key the name of a
       function and as a value the inclusive count fraction. The keys are
       the child functions of a sample parent function.
     child_functions_statistics_reference: A dict that has as a key the name of
       a function and as a value the inclusive count fraction. The keys are
       the child functions of a reference parent function.

   Returns:
     A float value representing the sum of inclusive count fraction
     differences of pairs of common child functions. If a child function is
     present in a single data set, then we consider the missing inclusive
     count fraction as 0. This value describes the difference in behaviour
     between a sample and the reference parent function.
   """
   # We initialize the distance with a small value to avoid the further
   # division by zero.
   distance = 1.0

   for child_function, inclusive_count_fraction_reference in \
       child_functions_statistics_reference.iteritems():
     inclusive_count_fraction_sample = 0.0

     if child_function in child_functions_statistics_sample:
       inclusive_count_fraction_sample = \
           child_functions_statistics_sample[child_function]
     distance += \
         abs(inclusive_count_fraction_sample -
             inclusive_count_fraction_reference)

   for child_function, inclusive_count_fraction_sample in \
       child_functions_statistics_sample.iteritems():
     if child_function not in child_functions_statistics_reference:
       distance += inclusive_count_fraction_sample

   return distance


 def ComputeScoreForFunction(distance, reference_fraction, sample_fraction):
   """Computes the score for a function.

   Args:
     distance: A float value representing the difference in behaviour between
       the sample and the reference function.
     reference_fraction: A float value representing the inclusive count
       fraction of the reference function.
     sample_fraction: A float value representing the inclusive count
       fraction of the sample function.

   Returns:
     A float value representing the score of the function.
   """
   return reference_fraction * sample_fraction / distance


 def ComputeMetricsForComponents(cwp_function_groups, function_metrics):
   """Computes the metrics for a set of Chrome OS components.

   For every Chrome OS group, we compute the number of functions matching the
   group, the cumulative and average score, the cumulative and average distance
   of all those functions. A function matches a group if the path of the file
   containing its definition contains the common path describing the group.

   Args:
     cwp_function_groups: A dict having as a key the name of the group and as a
       value a common path describing the group.
     function_metrics: A dict having as a key the name of the function and the
       name of the file where it is declared concatenated by a ',', and as a
       value a tuple containing the distance and the score metrics.

   Returns:
     A dict containing as a key the name of the group and as a value a tuple
     with the group file path, the number of functions matching the group,
     the cumulative and average score, cumulative and average distance of all
     those functions.
   """
   function_groups_metrics = defaultdict(lambda: (0, 0.0, 0.0, 0.0, 0.0))

   for function_key, metric in function_metrics.iteritems():
     _, function_file = function_key.split(',')

     for group, common_path in cwp_function_groups:
       if common_path not in function_file:
         continue

       function_distance = metric[0]
       function_score = metric[1]
       group_statistic = function_groups_metrics[group]

       function_count = group_statistic[1] + 1
       function_distance_cum = function_distance + group_statistic[2]
       function_distance_avg = function_distance_cum / float(function_count)
       function_score_cum = function_score + group_statistic[4]
       function_score_avg = function_score_cum / float(function_count)

       function_groups_metrics[group] = \
           (common_path,
            function_count,
            function_distance_cum,
            function_distance_avg,
            function_score_cum,
            function_score_avg)
       break

   return function_groups_metrics


 def ComputeMetricsForBenchmark(function_metrics):
   function_count = len(function_metrics.keys())
   distance_cum = 0.0
   distance_avg = 0.0
   score_cum = 0.0
   score_avg = 0.0

   for distance, score in function_metrics.values():
     distance_cum += distance
     score_cum += score

   distance_avg = distance_cum / float(function_count)
   score_avg = score_cum / float(function_count)
   return function_count, distance_cum, distance_avg, score_cum, score_avg


 def ComputeFunctionCountForBenchmarkSet(set_function_metrics, cwp_functions,
                                         metric_string):
   """Computes the function count metric pair for the benchmark set.

      For the function count metric, we count the unique functions covered by the
      set of benchmarks. We compute the fraction of unique functions out
      of the amount of CWP functions given.

      We compute also the same metric pair for every group from the keys of the
      set_function_metrics dict.

   Args:
     set_function_metrics: A list of dicts having as a key the name of a group
       and as value a list of functions that match the given group.
     cwp_functions: A dict having as a key the name of the groups and as a value
       the list of CWP functions that match an individual group.
     metric_string: A tuple of strings that will be mapped to the tuple of metric
       values in the returned function group dict. This is done for convenience
       for the JSON output.

   Returns:
     A tuple with the metric pair and a dict with the group names and values
     of the metric pair. The first value of the metric pair represents the
     function count and the second value the function count fraction.
     The dict has as a key the name of the group and as a value a dict that
     maps the metric_string  to the values of the metric pair of the group.
   """
   cwp_functions_count = sum(len(functions)
                             for functions in cwp_functions.itervalues())
   set_groups_functions = defaultdict(set)
   for benchmark_function_metrics in set_function_metrics:
     for group_name in benchmark_function_metrics:
       set_groups_functions[group_name] |= \
           set(benchmark_function_metrics[group_name])

   set_groups_functions_count = {}
   set_functions_count = 0
   for group_name, functions \
       in set_groups_functions.iteritems():
     set_group_functions_count = len(functions)
     if group_name in cwp_functions:
       set_groups_functions_count[group_name] = {
           metric_string[0]: set_group_functions_count,
           metric_string[1]:
           set_group_functions_count / float(len(cwp_functions[group_name]))}
     else:
       set_groups_functions_count[group_name] = \
           {metric_string[0]: set_group_functions_count, metric_string[1]: 0.0}
     set_functions_count += set_group_functions_count

   set_functions_count_fraction = \
       set_functions_count / float(cwp_functions_count)
   return (set_functions_count, set_functions_count_fraction), \
       set_groups_functions_count


 def ComputeDistanceForBenchmarkSet(set_function_metrics, cwp_functions,
                                    metric_string):
   """Computes the distance variation metric pair for the benchmark set.

      For the distance variation metric, we compute the sum of the distance
      variations of the functions covered by a set of benchmarks.
      We define the distance variation as the difference between the distance
      value of a functions and the ideal distance value (1.0).
      If a function appears in multiple common functions files, we consider
      only the minimum value. We compute also the distance variation per
      function.

      In addition, we compute also the same metric pair for every group from
      the keys of the set_function_metrics dict.

   Args:
     set_function_metrics: A list of dicts having as a key the name of a group
       and as value a list of functions that match the given group.
     cwp_functions: A dict having as a key the name of the groups and as a value
       the list of CWP functions that match an individual group.
     metric_string: A tuple of strings that will be mapped to the tuple of metric
       values in the returned function group dict. This is done for convenience
       for the JSON output.

   Returns:
     A tuple with the metric pair and a dict with the group names and values
     of the metric pair. The first value of the metric pair represents the
     distance variation per function and the second value the distance variation.
     The dict has as a key the name of the group and as a value a dict that
     maps the metric_string to the values of the metric pair of the group.
   """
   set_unique_functions = defaultdict(lambda: defaultdict(lambda: float('inf')))
   set_function_count = 0
   total_distance_variation = 0.0
   for benchmark_function_metrics in set_function_metrics:
     for group_name in benchmark_function_metrics:
       for function_key, metrics in \
           benchmark_function_metrics[group_name].iteritems():
         previous_distance = \
             set_unique_functions[group_name][function_key]
         min_distance = min(metrics[0], previous_distance)
         set_unique_functions[group_name][function_key] = min_distance
   groups_distance_variations = defaultdict(lambda: (0.0, 0.0))
   for group_name, functions_distances in set_unique_functions.iteritems():
     group_function_count = len(functions_distances)
     group_distance_variation = \
         sum(functions_distances.itervalues()) - float(group_function_count)
     total_distance_variation += group_distance_variation
     set_function_count += group_function_count
     groups_distance_variations[group_name] = \
         {metric_string[0]:
          group_distance_variation / float(group_function_count),
          metric_string[1]: group_distance_variation}

   return (total_distance_variation / set_function_count,
           total_distance_variation), groups_distance_variations


 def ComputeScoreForBenchmarkSet(set_function_metrics, cwp_functions,
                                 metric_string):
   """Computes the function count metric pair for the benchmark set.

      For the score metric, we compute the sum of the scores of the functions
      from a set of benchmarks. If a function appears in multiple common
      functions files, we consider only the maximum value. We compute also the
      fraction of this sum from the sum of all the scores of the functions from
      the CWP data covering the given groups, in the ideal case (the ideal
      score of a function is 1.0).

      In addition, we compute the same metric pair for every group from the
      keys of the set_function_metrics dict.

   Args:
     set_function_metrics: A list of dicts having as a key the name of a group
       and as value a list of functions that match the given group.
     cwp_functions: A dict having as a key the name of the groups and as a value
       the list of CWP functions that match an individual group.
     metric_string: A tuple of strings that will be mapped to the tuple of metric
       values in the returned function group dict. This is done for convenience
       for the JSON output.

   Returns:
     A tuple with the metric pair and a dict with the group names and values
     of the metric pair. The first value of the pair is the fraction of the sum
     of the scores from the ideal case and the second value represents the
     sum of scores of the functions. The dict has as a key the name of the group
     and as a value a dict that maps the metric_string to the values of the
     metric pair of the group.
   """
   cwp_functions_count = sum(len(functions)
                             for functions in cwp_functions.itervalues())
   set_unique_functions = defaultdict(lambda: defaultdict(lambda: 0.0))
   total_score = 0.0

   for benchmark_function_metrics in set_function_metrics:
     for group_name in benchmark_function_metrics:
       for function_key, metrics in \
           benchmark_function_metrics[group_name].iteritems():
         previous_score = \
             set_unique_functions[group_name][function_key]
         max_score = max(metrics[1], previous_score)
         set_unique_functions[group_name][function_key] = max_score

   groups_scores = defaultdict(lambda: (0.0, 0.0))

   for group_name, function_scores in set_unique_functions.iteritems():
     group_function_count = float(len(cwp_functions[group_name]))
     group_score = sum(function_scores.itervalues())
     total_score += group_score
     groups_scores[group_name] = {
         metric_string[0]: group_score / group_function_count,
         metric_string[1]: group_score
     }

   return (total_score / cwp_functions_count, total_score), groups_scores
	# Copyright 2016 The Chromium OS Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.
	"""Computes the metrics for functions, Chrome OS components and benchmarks."""

	from collections import defaultdict


	def ComputeDistanceForFunction(child_functions_statistics_sample,
	child_functions_statistics_reference):
	"""Computes the distance metric for a function.

	Args:
	child_functions_statistics_sample: A dict that has as a key the name of a
	function and as a value the inclusive count fraction. The keys are
	the child functions of a sample parent function.
	child_functions_statistics_reference: A dict that has as a key the name of
	a function and as a value the inclusive count fraction. The keys are
	the child functions of a reference parent function.

	Returns:
	A float value representing the sum of inclusive count fraction
	differences of pairs of common child functions. If a child function is
	present in a single data set, then we consider the missing inclusive
	count fraction as 0. This value describes the difference in behaviour
	between a sample and the reference parent function.
	"""
	# We initialize the distance with a small value to avoid the further
	# division by zero.
	distance = 1.0

	for child_function, inclusive_count_fraction_reference in \
	child_functions_statistics_reference.iteritems():
	inclusive_count_fraction_sample = 0.0

	if child_function in child_functions_statistics_sample:
	inclusive_count_fraction_sample = \
	child_functions_statistics_sample[child_function]
	distance += \
	abs(inclusive_count_fraction_sample -
	inclusive_count_fraction_reference)

	for child_function, inclusive_count_fraction_sample in \
	child_functions_statistics_sample.iteritems():
	if child_function not in child_functions_statistics_reference:
	distance += inclusive_count_fraction_sample

	return distance


	def ComputeScoreForFunction(distance, reference_fraction, sample_fraction):
	"""Computes the score for a function.

	Args:
	distance: A float value representing the difference in behaviour between
	the sample and the reference function.
	reference_fraction: A float value representing the inclusive count
	fraction of the reference function.
	sample_fraction: A float value representing the inclusive count
	fraction of the sample function.

	Returns:
	A float value representing the score of the function.
	"""
	return reference_fraction * sample_fraction / distance


	def ComputeMetricsForComponents(cwp_function_groups, function_metrics):
	"""Computes the metrics for a set of Chrome OS components.

	For every Chrome OS group, we compute the number of functions matching the
	group, the cumulative and average score, the cumulative and average distance
	of all those functions. A function matches a group if the path of the file
	containing its definition contains the common path describing the group.

	Args:
	cwp_function_groups: A dict having as a key the name of the group and as a
	value a common path describing the group.
	function_metrics: A dict having as a key the name of the function and the
	name of the file where it is declared concatenated by a ',', and as a
	value a tuple containing the distance and the score metrics.

	Returns:
	A dict containing as a key the name of the group and as a value a tuple
	with the group file path, the number of functions matching the group,
	the cumulative and average score, cumulative and average distance of all
	those functions.
	"""
	function_groups_metrics = defaultdict(lambda: (0, 0.0, 0.0, 0.0, 0.0))

	for function_key, metric in function_metrics.iteritems():
	_, function_file = function_key.split(',')

	for group, common_path in cwp_function_groups:
	if common_path not in function_file:
	continue

	function_distance = metric[0]
	function_score = metric[1]
	group_statistic = function_groups_metrics[group]

	function_count = group_statistic[1] + 1
	function_distance_cum = function_distance + group_statistic[2]
	function_distance_avg = function_distance_cum / float(function_count)
	function_score_cum = function_score + group_statistic[4]
	function_score_avg = function_score_cum / float(function_count)

	function_groups_metrics[group] = \
	(common_path,
	function_count,
	function_distance_cum,
	function_distance_avg,
	function_score_cum,
	function_score_avg)
	break

	return function_groups_metrics


	def ComputeMetricsForBenchmark(function_metrics):
	function_count = len(function_metrics.keys())
	distance_cum = 0.0
	distance_avg = 0.0
	score_cum = 0.0
	score_avg = 0.0

	for distance, score in function_metrics.values():
	distance_cum += distance
	score_cum += score

	distance_avg = distance_cum / float(function_count)
	score_avg = score_cum / float(function_count)
	return function_count, distance_cum, distance_avg, score_cum, score_avg


	def ComputeFunctionCountForBenchmarkSet(set_function_metrics, cwp_functions,
	metric_string):
	"""Computes the function count metric pair for the benchmark set.

	For the function count metric, we count the unique functions covered by the
	set of benchmarks. We compute the fraction of unique functions out
	of the amount of CWP functions given.

	We compute also the same metric pair for every group from the keys of the
	set_function_metrics dict.

	Args:
	set_function_metrics: A list of dicts having as a key the name of a group
	and as value a list of functions that match the given group.
	cwp_functions: A dict having as a key the name of the groups and as a value
	the list of CWP functions that match an individual group.
	metric_string: A tuple of strings that will be mapped to the tuple of metric
	values in the returned function group dict. This is done for convenience
	for the JSON output.

	Returns:
	A tuple with the metric pair and a dict with the group names and values
	of the metric pair. The first value of the metric pair represents the
	function count and the second value the function count fraction.
	The dict has as a key the name of the group and as a value a dict that
	maps the metric_string to the values of the metric pair of the group.
	"""
	cwp_functions_count = sum(len(functions)
	for functions in cwp_functions.itervalues())
	set_groups_functions = defaultdict(set)
	for benchmark_function_metrics in set_function_metrics:
	for group_name in benchmark_function_metrics:
	set_groups_functions[group_name] \|= \
	set(benchmark_function_metrics[group_name])

	set_groups_functions_count = {}
	set_functions_count = 0
	for group_name, functions \
	in set_groups_functions.iteritems():
	set_group_functions_count = len(functions)
	if group_name in cwp_functions:
	set_groups_functions_count[group_name] = {
	metric_string[0]: set_group_functions_count,
	metric_string[1]:
	set_group_functions_count / float(len(cwp_functions[group_name]))}
	else:
	set_groups_functions_count[group_name] = \
	{metric_string[0]: set_group_functions_count, metric_string[1]: 0.0}
	set_functions_count += set_group_functions_count

	set_functions_count_fraction = \
	set_functions_count / float(cwp_functions_count)
	return (set_functions_count, set_functions_count_fraction), \
	set_groups_functions_count


	def ComputeDistanceForBenchmarkSet(set_function_metrics, cwp_functions,
	metric_string):
	"""Computes the distance variation metric pair for the benchmark set.

	For the distance variation metric, we compute the sum of the distance
	variations of the functions covered by a set of benchmarks.
	We define the distance variation as the difference between the distance
	value of a functions and the ideal distance value (1.0).
	If a function appears in multiple common functions files, we consider
	only the minimum value. We compute also the distance variation per
	function.

	In addition, we compute also the same metric pair for every group from
	the keys of the set_function_metrics dict.

	Args:
	set_function_metrics: A list of dicts having as a key the name of a group
	and as value a list of functions that match the given group.
	cwp_functions: A dict having as a key the name of the groups and as a value
	the list of CWP functions that match an individual group.
	metric_string: A tuple of strings that will be mapped to the tuple of metric
	values in the returned function group dict. This is done for convenience
	for the JSON output.

	Returns:
	A tuple with the metric pair and a dict with the group names and values
	of the metric pair. The first value of the metric pair represents the
	distance variation per function and the second value the distance variation.
	The dict has as a key the name of the group and as a value a dict that
	maps the metric_string to the values of the metric pair of the group.
	"""
	set_unique_functions = defaultdict(lambda: defaultdict(lambda: float('inf')))
	set_function_count = 0
	total_distance_variation = 0.0
	for benchmark_function_metrics in set_function_metrics:
	for group_name in benchmark_function_metrics:
	for function_key, metrics in \
	benchmark_function_metrics[group_name].iteritems():
	previous_distance = \
	set_unique_functions[group_name][function_key]
	min_distance = min(metrics[0], previous_distance)
	set_unique_functions[group_name][function_key] = min_distance
	groups_distance_variations = defaultdict(lambda: (0.0, 0.0))
	for group_name, functions_distances in set_unique_functions.iteritems():
	group_function_count = len(functions_distances)
	group_distance_variation = \
	sum(functions_distances.itervalues()) - float(group_function_count)
	total_distance_variation += group_distance_variation
	set_function_count += group_function_count
	groups_distance_variations[group_name] = \
	{metric_string[0]:
	group_distance_variation / float(group_function_count),
	metric_string[1]: group_distance_variation}

	return (total_distance_variation / set_function_count,
	total_distance_variation), groups_distance_variations


	def ComputeScoreForBenchmarkSet(set_function_metrics, cwp_functions,
	metric_string):
	"""Computes the function count metric pair for the benchmark set.

	For the score metric, we compute the sum of the scores of the functions
	from a set of benchmarks. If a function appears in multiple common
	functions files, we consider only the maximum value. We compute also the
	fraction of this sum from the sum of all the scores of the functions from
	the CWP data covering the given groups, in the ideal case (the ideal
	score of a function is 1.0).

	In addition, we compute the same metric pair for every group from the
	keys of the set_function_metrics dict.

	Args:
	set_function_metrics: A list of dicts having as a key the name of a group
	and as value a list of functions that match the given group.
	cwp_functions: A dict having as a key the name of the groups and as a value
	the list of CWP functions that match an individual group.
	metric_string: A tuple of strings that will be mapped to the tuple of metric
	values in the returned function group dict. This is done for convenience
	for the JSON output.

	Returns:
	A tuple with the metric pair and a dict with the group names and values
	of the metric pair. The first value of the pair is the fraction of the sum
	of the scores from the ideal case and the second value represents the
	sum of scores of the functions. The dict has as a key the name of the group
	and as a value a dict that maps the metric_string to the values of the
	metric pair of the group.
	"""
	cwp_functions_count = sum(len(functions)
	for functions in cwp_functions.itervalues())
	set_unique_functions = defaultdict(lambda: defaultdict(lambda: 0.0))
	total_score = 0.0

	for benchmark_function_metrics in set_function_metrics:
	for group_name in benchmark_function_metrics:
	for function_key, metrics in \
	benchmark_function_metrics[group_name].iteritems():
	previous_score = \
	set_unique_functions[group_name][function_key]
	max_score = max(metrics[1], previous_score)
	set_unique_functions[group_name][function_key] = max_score

	groups_scores = defaultdict(lambda: (0.0, 0.0))

	for group_name, function_scores in set_unique_functions.iteritems():
	group_function_count = float(len(cwp_functions[group_name]))
	group_score = sum(function_scores.itervalues())
	total_score += group_score
	groups_scores[group_name] = {
	metric_string[0]: group_score / group_function_count,
	metric_string[1]: group_score
	}

	return (total_score / cwp_functions_count, total_score), groups_scores