catapult/experimental/statistical_analysis/results_stats_unittest.py - platform/external/chromium-trace - Git at Google

 #!/usr/bin/env python
 # Copyright 2016 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Tests for results_stats."""

 import os
 import sys

 import unittest

 try:
   import numpy as np
 except ImportError:
   np = None

 sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
                                                 '..')))
 from statistical_analysis import results_stats


 class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase):
   """Unit testing of several functions in results_stats."""

   def testGetChartsFromBenchmarkResultJson(self):
     """Unit test for errors raised when getting the charts element.

     Also makes sure that the 'trace' element is deleted if it exists.
     """
     input_json_wrong_format = {'charts_wrong': {}}
     input_json_empty = {'charts': {}}
     with self.assertRaises(ValueError):
       (results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format))
     with self.assertRaises(ValueError):
       (results_stats.GetChartsFromBenchmarkResultJson(input_json_empty))

     input_json_with_trace = {'charts':
                              {'trace': {},
                               'Ex_metric_1':
                               {'Ex_page_1': {'type': 'list_of_scalar_values',
                                              'values': [1, 2]},
                                'Ex_page_2': {'type': 'histogram',
                                              'values': [1, 2]}},
                               'Ex_metric_2':
                               {'Ex_page_1': {'type': 'list_of_scalar_values'},
                                'Ex_page_2': {'type': 'list_of_scalar_values',
                                              'values': [1, 2]}}}}

     output = (results_stats.
               GetChartsFromBenchmarkResultJson(input_json_with_trace))
     expected_output = {'Ex_metric_1':
                        {'Ex_page_1': {'type': 'list_of_scalar_values',
                                       'values': [1, 2]}},
                        'Ex_metric_2':
                        {'Ex_page_2': {'type': 'list_of_scalar_values',
                                       'values': [1, 2]}}}
     self.assertEqual(output, expected_output)

   def testCreateBenchmarkResultDict(self):
     """Unit test for benchmark result dict created from a benchmark json.

     Creates a json of the format created by tools/perf/run_benchmark and then
     compares the output dict against an expected predefined output dict.
     """
     metric_names = ['messageloop_start_time',
                     'open_tabs_time',
                     'window_display_time']
     metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]

     input_json = {'charts': {}}
     for metric, metric_vals in zip(metric_names, metric_values):
       input_json['charts'][metric] = {'summary':
                                       {'values': metric_vals,
                                        'type': 'list_of_scalar_values'}}

     output = results_stats.CreateBenchmarkResultDict(input_json)
     expected_output = {'messageloop_start_time': [55, 72, 60],
                        'open_tabs_time': [54, 42, 65],
                        'window_display_time': [44, 89]}

     self.assertEqual(output, expected_output)

   def testCreatePagesetBenchmarkResultDict(self):
     """Unit test for pageset benchmark result dict created from benchmark json.

     Creates a json of the format created by tools/perf/run_benchmark when it
     includes a pageset and then compares the output dict against an expected
     predefined output dict.
     """
     metric_names = ['messageloop_start_time',
                     'open_tabs_time',
                     'window_display_time']
     metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
     page_names = ['Ex_page_1', 'Ex_page_2']

     input_json = {'charts': {}}
     for metric, metric_vals in zip(metric_names, metric_values):
       input_json['charts'][metric] = {'summary':
                                       {'values': [0, 1, 2, 3],
                                        'type': 'list_of_scalar_values'}}
       for page in page_names:
         input_json['charts'][metric][page] = {'values': metric_vals,
                                               'type': 'list_of_scalar_values'}

     output = results_stats.CreatePagesetBenchmarkResultDict(input_json)
     expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60],
                                                   'Ex_page_2': [55, 72, 60]},
                        'open_tabs_time': {'Ex_page_1': [54, 42, 65],
                                           'Ex_page_2': [54, 42, 65]},
                        'window_display_time': {'Ex_page_1': [44, 89],
                                                'Ex_page_2': [44, 89]}}

     self.assertEqual(output, expected_output)

   def testCombinePValues(self):
     """Unit test for Fisher's Method that combines multiple p-values."""
     test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01]

     expected_output = 0.00047334256271885721
     output = results_stats.CombinePValues(test_p_values)

     self.assertEqual(output, expected_output)

   def CreateRandomNormalDistribution(self, mean=0, size=30):
     """Creates two pseudo random samples for testing in multiple methods."""
     if not np:
       raise ImportError('This function requires Numpy.')

     np.random.seed(0)
     sample = np.random.normal(loc=mean, scale=1, size=size)

     return sample

   def testIsNormallyDistributed(self):
     """Unit test for values returned when testing for normality."""
     if not np:
       self.skipTest("Numpy is not installed.")

     test_samples = [self.CreateRandomNormalDistribution(0),
                     self.CreateRandomNormalDistribution(1)]

     expected_outputs = [(True, 0.5253966450691223),
                         (True, 0.5253913402557373)]
     for sample, expected_output in zip(test_samples, expected_outputs):
       output = results_stats.IsNormallyDistributed(sample)

       self.assertEqual(output, expected_output)

   def testAreSamplesDifferent(self):
     """Unit test for values returned after running the statistical tests.

     Creates two pseudo-random normally distributed samples to run the
     statistical tests and compares the resulting answer and p-value against
     their pre-calculated values.
     """
     test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]]
     with self.assertRaises(results_stats.SampleSizeError):
       results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
                                         test=results_stats.MANN)
     with self.assertRaises(results_stats.NonNormalSampleError):
       results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
                                         test=results_stats.WELCH)

     test_samples_equal = (20 * [1], 20 * [1])
     expected_output_equal = (False, 1.0)
     output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0],
                                                      test_samples_equal[1],
                                                      test=results_stats.MANN)
     self.assertEqual(output_equal, expected_output_equal)

     if not np:
       self.skipTest("Numpy is not installed.")

     test_samples = [self.CreateRandomNormalDistribution(0),
                     self.CreateRandomNormalDistribution(1)]
     test_options = results_stats.ALL_TEST_OPTIONS

     expected_outputs = [(True, 2 * 0.00068516628052438266),
                         (True, 0.0017459498829507842),
                         (True, 0.00084765230478226514)]

     for test, expected_output in zip(test_options, expected_outputs):
       output = results_stats.AreSamplesDifferent(test_samples[0],
                                                  test_samples[1],
                                                  test=test)
       self.assertEqual(output, expected_output)

   def testAssertThatKeysMatch(self):
     """Unit test for exception raised when input dicts' metrics don't match."""
     differing_input_dicts = [{'messageloop_start_time': [55, 72, 60],
                               'display_time': [44, 89]},
                              {'messageloop_start_time': [55, 72, 60]}]
     with self.assertRaises(results_stats.DictMismatchError):
       results_stats.AssertThatKeysMatch(differing_input_dicts[0],
                                         differing_input_dicts[1])

   def testAreBenchmarkResultsDifferent(self):
     """Unit test for statistical test outcome dict."""
     test_input_dicts = [{'open_tabs_time':
                          self.CreateRandomNormalDistribution(0),
                          'display_time':
                          self.CreateRandomNormalDistribution(0)},
                         {'open_tabs_time':
                          self.CreateRandomNormalDistribution(0),
                          'display_time':
                          self.CreateRandomNormalDistribution(1)}]
     test_options = results_stats.ALL_TEST_OPTIONS

     expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425),
                          'display_time': (True, 2 * 0.00068516628052438266)},
                         {'open_tabs_time': (False, 1.0),
                          'display_time': (True, 0.0017459498829507842)},
                         {'open_tabs_time': (False, 1.0),
                          'display_time': (True, 0.00084765230478226514)}]

     for test, expected_output in zip(test_options, expected_outputs):
       output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0],
                                                           test_input_dicts[1],
                                                           test=test)
       self.assertEqual(output, expected_output)

   def testArePagesetBenchmarkResultsDifferent(self):
     """Unit test for statistical test outcome dict."""
     distributions = (self.CreateRandomNormalDistribution(0),
                      self.CreateRandomNormalDistribution(1))
     test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0],
                                             'Ex_page_2': distributions[0]},
                          'display_time': {'Ex_page_1': distributions[1],
                                           'Ex_page_2': distributions[1]}},
                         {'open_tabs_time': {'Ex_page_1': distributions[0],
                                             'Ex_page_2': distributions[1]},
                          'display_time': {'Ex_page_1': distributions[1],
                                           'Ex_page_2': distributions[0]}})
     test_options = results_stats.ALL_TEST_OPTIONS

     expected_outputs = ({'open_tabs_time':  # Mann.
                          {'Ex_page_1': (False, 2 * 0.49704973080841425),
                           'Ex_page_2': (True, 2 * 0.00068516628052438266)},
                          'display_time':
                          {'Ex_page_1': (False, 2 * 0.49704973080841425),
                           'Ex_page_2': (True, 2 * 0.00068516628052438266)}},
                         {'open_tabs_time':  # Kolmogorov.
                          {'Ex_page_1': (False, 1.0),
                           'Ex_page_2': (True, 0.0017459498829507842)},
                          'display_time':
                          {'Ex_page_1': (False, 1.0),
                           'Ex_page_2': (True, 0.0017459498829507842)}},
                         {'open_tabs_time':  # Welch.
                          {'Ex_page_1': (False, 1.0),
                           'Ex_page_2': (True, 0.00084765230478226514)},
                          'display_time':
                          {'Ex_page_1': (False, 1.0),
                           'Ex_page_2': (True, 0.00084765230478226514)}})

     for test, expected_output in zip(test_options, expected_outputs):
       output = (results_stats.
                 ArePagesetBenchmarkResultsDifferent(test_input_dicts[0],
                                                     test_input_dicts[1],
                                                     test=test))
       self.assertEqual(output, expected_output)


 if __name__ == '__main__':
   sys.exit(unittest.main())
	#!/usr/bin/env python
	# Copyright 2016 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Tests for results_stats."""

	import os
	import sys

	import unittest

	try:
	import numpy as np
	except ImportError:
	np = None

	sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__),
	'..')))
	from statistical_analysis import results_stats


	class StatisticalBenchmarkResultsAnalysisTest(unittest.TestCase):
	"""Unit testing of several functions in results_stats."""

	def testGetChartsFromBenchmarkResultJson(self):
	"""Unit test for errors raised when getting the charts element.

	Also makes sure that the 'trace' element is deleted if it exists.
	"""
	input_json_wrong_format = {'charts_wrong': {}}
	input_json_empty = {'charts': {}}
	with self.assertRaises(ValueError):
	(results_stats.GetChartsFromBenchmarkResultJson(input_json_wrong_format))
	with self.assertRaises(ValueError):
	(results_stats.GetChartsFromBenchmarkResultJson(input_json_empty))

	input_json_with_trace = {'charts':
	{'trace': {},
	'Ex_metric_1':
	{'Ex_page_1': {'type': 'list_of_scalar_values',
	'values': [1, 2]},
	'Ex_page_2': {'type': 'histogram',
	'values': [1, 2]}},
	'Ex_metric_2':
	{'Ex_page_1': {'type': 'list_of_scalar_values'},
	'Ex_page_2': {'type': 'list_of_scalar_values',
	'values': [1, 2]}}}}

	output = (results_stats.
	GetChartsFromBenchmarkResultJson(input_json_with_trace))
	expected_output = {'Ex_metric_1':
	{'Ex_page_1': {'type': 'list_of_scalar_values',
	'values': [1, 2]}},
	'Ex_metric_2':
	{'Ex_page_2': {'type': 'list_of_scalar_values',
	'values': [1, 2]}}}
	self.assertEqual(output, expected_output)

	def testCreateBenchmarkResultDict(self):
	"""Unit test for benchmark result dict created from a benchmark json.

	Creates a json of the format created by tools/perf/run_benchmark and then
	compares the output dict against an expected predefined output dict.
	"""
	metric_names = ['messageloop_start_time',
	'open_tabs_time',
	'window_display_time']
	metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]

	input_json = {'charts': {}}
	for metric, metric_vals in zip(metric_names, metric_values):
	input_json['charts'][metric] = {'summary':
	{'values': metric_vals,
	'type': 'list_of_scalar_values'}}

	output = results_stats.CreateBenchmarkResultDict(input_json)
	expected_output = {'messageloop_start_time': [55, 72, 60],
	'open_tabs_time': [54, 42, 65],
	'window_display_time': [44, 89]}

	self.assertEqual(output, expected_output)

	def testCreatePagesetBenchmarkResultDict(self):
	"""Unit test for pageset benchmark result dict created from benchmark json.

	Creates a json of the format created by tools/perf/run_benchmark when it
	includes a pageset and then compares the output dict against an expected
	predefined output dict.
	"""
	metric_names = ['messageloop_start_time',
	'open_tabs_time',
	'window_display_time']
	metric_values = [[55, 72, 60], [54, 42, 65], [44, 89]]
	page_names = ['Ex_page_1', 'Ex_page_2']

	input_json = {'charts': {}}
	for metric, metric_vals in zip(metric_names, metric_values):
	input_json['charts'][metric] = {'summary':
	{'values': [0, 1, 2, 3],
	'type': 'list_of_scalar_values'}}
	for page in page_names:
	input_json['charts'][metric][page] = {'values': metric_vals,
	'type': 'list_of_scalar_values'}

	output = results_stats.CreatePagesetBenchmarkResultDict(input_json)
	expected_output = {'messageloop_start_time': {'Ex_page_1': [55, 72, 60],
	'Ex_page_2': [55, 72, 60]},
	'open_tabs_time': {'Ex_page_1': [54, 42, 65],
	'Ex_page_2': [54, 42, 65]},
	'window_display_time': {'Ex_page_1': [44, 89],
	'Ex_page_2': [44, 89]}}

	self.assertEqual(output, expected_output)

	def testCombinePValues(self):
	"""Unit test for Fisher's Method that combines multiple p-values."""
	test_p_values = [0.05, 0.04, 0.10, 0.07, 0.01]

	expected_output = 0.00047334256271885721
	output = results_stats.CombinePValues(test_p_values)

	self.assertEqual(output, expected_output)

	def CreateRandomNormalDistribution(self, mean=0, size=30):
	"""Creates two pseudo random samples for testing in multiple methods."""
	if not np:
	raise ImportError('This function requires Numpy.')

	np.random.seed(0)
	sample = np.random.normal(loc=mean, scale=1, size=size)

	return sample

	def testIsNormallyDistributed(self):
	"""Unit test for values returned when testing for normality."""
	if not np:
	self.skipTest("Numpy is not installed.")

	test_samples = [self.CreateRandomNormalDistribution(0),
	self.CreateRandomNormalDistribution(1)]

	expected_outputs = [(True, 0.5253966450691223),
	(True, 0.5253913402557373)]
	for sample, expected_output in zip(test_samples, expected_outputs):
	output = results_stats.IsNormallyDistributed(sample)

	self.assertEqual(output, expected_output)

	def testAreSamplesDifferent(self):
	"""Unit test for values returned after running the statistical tests.

	Creates two pseudo-random normally distributed samples to run the
	statistical tests and compares the resulting answer and p-value against
	their pre-calculated values.
	"""
	test_samples = [3 * [0, 0, 2, 4, 4], 3 * [5, 5, 7, 9, 9]]
	with self.assertRaises(results_stats.SampleSizeError):
	results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
	test=results_stats.MANN)
	with self.assertRaises(results_stats.NonNormalSampleError):
	results_stats.AreSamplesDifferent(test_samples[0], test_samples[1],
	test=results_stats.WELCH)

	test_samples_equal = (20 * [1], 20 * [1])
	expected_output_equal = (False, 1.0)
	output_equal = results_stats.AreSamplesDifferent(test_samples_equal[0],
	test_samples_equal[1],
	test=results_stats.MANN)
	self.assertEqual(output_equal, expected_output_equal)

	if not np:
	self.skipTest("Numpy is not installed.")

	test_samples = [self.CreateRandomNormalDistribution(0),
	self.CreateRandomNormalDistribution(1)]
	test_options = results_stats.ALL_TEST_OPTIONS

	expected_outputs = [(True, 2 * 0.00068516628052438266),
	(True, 0.0017459498829507842),
	(True, 0.00084765230478226514)]

	for test, expected_output in zip(test_options, expected_outputs):
	output = results_stats.AreSamplesDifferent(test_samples[0],
	test_samples[1],
	test=test)
	self.assertEqual(output, expected_output)

	def testAssertThatKeysMatch(self):
	"""Unit test for exception raised when input dicts' metrics don't match."""
	differing_input_dicts = [{'messageloop_start_time': [55, 72, 60],
	'display_time': [44, 89]},
	{'messageloop_start_time': [55, 72, 60]}]
	with self.assertRaises(results_stats.DictMismatchError):
	results_stats.AssertThatKeysMatch(differing_input_dicts[0],
	differing_input_dicts[1])

	def testAreBenchmarkResultsDifferent(self):
	"""Unit test for statistical test outcome dict."""
	test_input_dicts = [{'open_tabs_time':
	self.CreateRandomNormalDistribution(0),
	'display_time':
	self.CreateRandomNormalDistribution(0)},
	{'open_tabs_time':
	self.CreateRandomNormalDistribution(0),
	'display_time':
	self.CreateRandomNormalDistribution(1)}]
	test_options = results_stats.ALL_TEST_OPTIONS

	expected_outputs = [{'open_tabs_time': (False, 2 * 0.49704973080841425),
	'display_time': (True, 2 * 0.00068516628052438266)},
	{'open_tabs_time': (False, 1.0),
	'display_time': (True, 0.0017459498829507842)},
	{'open_tabs_time': (False, 1.0),
	'display_time': (True, 0.00084765230478226514)}]

	for test, expected_output in zip(test_options, expected_outputs):
	output = results_stats.AreBenchmarkResultsDifferent(test_input_dicts[0],
	test_input_dicts[1],
	test=test)
	self.assertEqual(output, expected_output)

	def testArePagesetBenchmarkResultsDifferent(self):
	"""Unit test for statistical test outcome dict."""
	distributions = (self.CreateRandomNormalDistribution(0),
	self.CreateRandomNormalDistribution(1))
	test_input_dicts = ({'open_tabs_time': {'Ex_page_1': distributions[0],
	'Ex_page_2': distributions[0]},
	'display_time': {'Ex_page_1': distributions[1],
	'Ex_page_2': distributions[1]}},
	{'open_tabs_time': {'Ex_page_1': distributions[0],
	'Ex_page_2': distributions[1]},
	'display_time': {'Ex_page_1': distributions[1],
	'Ex_page_2': distributions[0]}})
	test_options = results_stats.ALL_TEST_OPTIONS

	expected_outputs = ({'open_tabs_time': # Mann.
	{'Ex_page_1': (False, 2 * 0.49704973080841425),
	'Ex_page_2': (True, 2 * 0.00068516628052438266)},
	'display_time':
	{'Ex_page_1': (False, 2 * 0.49704973080841425),
	'Ex_page_2': (True, 2 * 0.00068516628052438266)}},
	{'open_tabs_time': # Kolmogorov.
	{'Ex_page_1': (False, 1.0),
	'Ex_page_2': (True, 0.0017459498829507842)},
	'display_time':
	{'Ex_page_1': (False, 1.0),
	'Ex_page_2': (True, 0.0017459498829507842)}},
	{'open_tabs_time': # Welch.
	{'Ex_page_1': (False, 1.0),
	'Ex_page_2': (True, 0.00084765230478226514)},
	'display_time':
	{'Ex_page_1': (False, 1.0),
	'Ex_page_2': (True, 0.00084765230478226514)}})

	for test, expected_output in zip(test_options, expected_outputs):
	output = (results_stats.
	ArePagesetBenchmarkResultsDifferent(test_input_dicts[0],
	test_input_dicts[1],
	test=test))
	self.assertEqual(output, expected_output)


	if __name__ == '__main__':
	sys.exit(unittest.main())