catapult/dashboard/dashboard/ttest_test.py - platform/external/chromium-trace - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 import unittest

 import mock

 from dashboard import ttest


 class TTestTest(unittest.TestCase):
   """Tests for the t-test functions."""

   def setUp(self):
     """Sets the t-table values for the tests below."""
     table_patch = mock.patch.object(
         ttest, '_TABLE',
         [
             (1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
             (2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
             (3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
             (4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
             (10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
             (100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
         ])
     table_patch.start()
     self.addCleanup(table_patch.stop)
     two_tail_patch = mock.patch.object(
         ttest, '_TWO_TAIL',
         [1, 0.2, 0.1, 0.05, 0.02, 0.01])
     two_tail_patch.start()
     self.addCleanup(two_tail_patch.stop)

   def testWelchsTTest(self):
     """Tests the t value and degrees of freedom output of Welch's t-test."""
     # The t-value can be checked with scipy.stats.ttest_ind(equal_var=False).
     # However the t-value output by scipy.stats.ttest_ind is -6.32455532034.
     # This implementation produces slightly different results.
     result = ttest.WelchsTTest([2, 3, 2, 3, 2, 3], [4, 5, 4, 5, 4, 5])
     self.assertAlmostEqual(10.0, result.df)
     self.assertAlmostEqual(-6.325, result.t, delta=1.0)

   def testWelchsTTest_EmptySample_RaisesError(self):
     """An error should be raised when an empty sample is passed in."""
     with self.assertRaises(RuntimeError):
       ttest.WelchsTTest([], [])
     with self.assertRaises(RuntimeError):
       ttest.WelchsTTest([], [1, 2, 3])
     with self.assertRaises(RuntimeError):
       ttest.WelchsTTest([1, 2, 3], [])

   def testTTest_EqualSamples_PValueIsOne(self):
     """Checks that t = 0 and p = 1 when the samples are the same."""
     result = ttest.WelchsTTest([1, 2, 3], [1, 2, 3])
     self.assertEqual(0, result.t)
     self.assertEqual(1, result.p)

   def testTTest_VeryDifferentSamples_PValueIsLow(self):
     """Checks that p is very low when the samples are clearly different."""
     result = ttest.WelchsTTest([100, 101, 100, 101, 100],
                                [1, 2, 1, 2, 1, 2, 1, 2])
     self.assertLessEqual(250, result.t)
     self.assertLessEqual(0.01, result.p)

   def testTTest_DifferentVariance(self):
     """Verifies that higher variance -> higher p value."""
     result_low_var = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
     result_high_var = ttest.WelchsTTest([1, 4, 1, 4], [3, 6, 3, 6])
     self.assertLess(result_low_var.p, result_high_var.p)

   def testTTest_DifferentSampleSize(self):
     """Verifies that smaller sample size -> higher p value."""
     result_larger_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
     result_smaller_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5])
     self.assertLess(result_larger_sample.p, result_smaller_sample.p)

   def testTTest_DifferentMeanDifference(self):
     """Verifies that smaller difference between means -> higher p value."""
     result_far_means = ttest.WelchsTTest([2, 3, 2, 3], [5, 6, 5, 6])
     result_near_means = ttest.WelchsTTest([2, 3, 2, 3], [3, 4, 3, 4])
     self.assertLess(result_far_means.p, result_near_means.p)

   def testTValue(self):
     """Tests calculation of the t-value using Welch's formula."""
     # Results can be verified by directly plugging variables into Welch's
     # equation (e.g. using a calculator or the Python interpreter).
     stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
     stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
     # Note that a negative t-value is obtained when the first sample has a
     # smaller mean than the second, otherwise a positive value is returned.
     self.assertAlmostEqual(-0.27968236, ttest._TValue(stats1, stats2))
     self.assertAlmostEqual(0.27968236, ttest._TValue(stats2, stats1))

   def testTValue_ConstantSamples_ResultIsInfinity(self):
     """If there is no variation, infinity is used as the t-statistic value."""
     stats = ttest.SampleStats(mean=1.0, var=0, size=10)
     self.assertEqual(float('inf'), ttest._TValue(stats, stats))

   def testDegreesOfFreedom(self):
     """Tests calculation of estimated degrees of freedom."""
     # The formula used to estimate degrees of freedom for independent-samples
     # t-test is called the Welch-Satterthwaite equation. Note that since the
     # Welch-Satterthwaite equation gives an estimate of degrees of freedom,
     # the result is a floating-point number and not an integer.
     stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
     stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
     self.assertAlmostEqual(
         307.19879975, ttest._DegreesOfFreedom(stats1, stats2))

   def testDegreesOfFreedom_ZeroVariance_ResultIsOne(self):
     """The lowest possible value is returned for df if variance is zero."""
     stats = ttest.SampleStats(mean=1.0, var=0, size=10)
     self.assertEqual(1.0, ttest._DegreesOfFreedom(stats, stats))

   def testDegreesOfFreedom_SmallSample_RaisesError(self):
     """Degrees of freedom can't be calculated if sample size is too small."""
     size_0 = ttest.SampleStats(mean=0, var=0, size=0)
     size_1 = ttest.SampleStats(mean=1.0, var=0, size=1)
     size_5 = ttest.SampleStats(mean=2.0, var=0.5, size=5)

     # An error is raised if the size of one of the samples is too small.
     with self.assertRaises(RuntimeError):
       ttest._DegreesOfFreedom(size_0, size_5)
     with self.assertRaises(RuntimeError):
       ttest._DegreesOfFreedom(size_1, size_5)
     with self.assertRaises(RuntimeError):
       ttest._DegreesOfFreedom(size_5, size_0)
     with self.assertRaises(RuntimeError):
       ttest._DegreesOfFreedom(size_5, size_1)

     # If both of the samples have a variance of 0, no error is raised.
     self.assertEqual(1.0, ttest._DegreesOfFreedom(size_1, size_1))


 class LookupPValueTest(unittest.TestCase):

   def setUp(self):
     """Sets the t-table values for the tests below."""
     table_patch = mock.patch.object(
         ttest, '_TABLE',
         [
             (1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
             (2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
             (3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
             (4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
             (10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
             (100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
         ])
     table_patch.start()
     self.addCleanup(table_patch.stop)
     two_tail_patch = mock.patch.object(
         ttest, '_TWO_TAIL',
         [1, 0.2, 0.1, 0.05, 0.02, 0.01])
     two_tail_patch.start()
     self.addCleanup(two_tail_patch.stop)

   def testLookupPValue_ExactMatchInTable(self):
     """Tests looking up an entry that is in the table."""
     self.assertEqual(0.1, ttest._LookupPValue(3.182, 3.0))
     self.assertEqual(0.1, ttest._LookupPValue(-3.182, 3.0))

   def testLookupPValue_TValueBetweenTwoValues_SmallerColumnIsUsed(self):
     # The second column is used because 3.1 is below 4.303,
     # so the next-lowest t-value, 2.920, is used.
     self.assertEqual(0.2, ttest._LookupPValue(3.1, 2.0))
     self.assertEqual(0.2, ttest._LookupPValue(-3.1, 2.0))

   def testLookup_DFBetweenTwoValues_SmallerRowIsUsed(self):
     self.assertEqual(0.05, ttest._LookupPValue(2.228, 45.0))
     self.assertEqual(0.05, ttest._LookupPValue(-2.228, 45.0))

   def testLookup_DFAndTValueBetweenTwoValues_SmallerRowAndColumnIsUsed(self):
     self.assertEqual(0.1, ttest._LookupPValue(2.0, 45.0))
     self.assertEqual(0.1, ttest._LookupPValue(-2.0, 45.0))

   def testLookupPValue_LargeTValue_LastColumnIsUsed(self):
     # The smallest possible p-value will be used when t is large.
     self.assertEqual(0.01, ttest._LookupPValue(500.0, 1.0))
     self.assertEqual(0.01, ttest._LookupPValue(-500.0, 1.0))

   def testLookupPValue_ZeroTValue_FirstColumnIsUsed(self):
     # The largest possible p-value will be used when t is zero.
     self.assertEqual(1.0, ttest._LookupPValue(0.0, 1.0))
     self.assertEqual(1.0, ttest._LookupPValue(0.0, 2.0))

   def testLookupPValue_SmallTValue_FirstColumnIsUsed(self):
     # The largest possible p-value will be used when t is almost zero.
     self.assertEqual(1.0, ttest._LookupPValue(0.1, 2.0))
     self.assertEqual(1.0, ttest._LookupPValue(-0.1, 2.0))

   def testLookupPValue_LargeDegreesOfFreedom_LastRowIsUsed(self):
     # The last row of the table should be used.
     self.assertEqual(0.02, ttest._LookupPValue(2.365, 100.0))


 if __name__ == '__main__':
   unittest.main()
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	import unittest

	import mock

	from dashboard import ttest


	class TTestTest(unittest.TestCase):
	"""Tests for the t-test functions."""

	def setUp(self):
	"""Sets the t-table values for the tests below."""
	table_patch = mock.patch.object(
	ttest, '_TABLE',
	[
	(1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
	(2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
	(3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
	(4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
	(10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
	(100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
	])
	table_patch.start()
	self.addCleanup(table_patch.stop)
	two_tail_patch = mock.patch.object(
	ttest, '_TWO_TAIL',
	[1, 0.2, 0.1, 0.05, 0.02, 0.01])
	two_tail_patch.start()
	self.addCleanup(two_tail_patch.stop)

	def testWelchsTTest(self):
	"""Tests the t value and degrees of freedom output of Welch's t-test."""
	# The t-value can be checked with scipy.stats.ttest_ind(equal_var=False).
	# However the t-value output by scipy.stats.ttest_ind is -6.32455532034.
	# This implementation produces slightly different results.
	result = ttest.WelchsTTest([2, 3, 2, 3, 2, 3], [4, 5, 4, 5, 4, 5])
	self.assertAlmostEqual(10.0, result.df)
	self.assertAlmostEqual(-6.325, result.t, delta=1.0)

	def testWelchsTTest_EmptySample_RaisesError(self):
	"""An error should be raised when an empty sample is passed in."""
	with self.assertRaises(RuntimeError):
	ttest.WelchsTTest([], [])
	with self.assertRaises(RuntimeError):
	ttest.WelchsTTest([], [1, 2, 3])
	with self.assertRaises(RuntimeError):
	ttest.WelchsTTest([1, 2, 3], [])

	def testTTest_EqualSamples_PValueIsOne(self):
	"""Checks that t = 0 and p = 1 when the samples are the same."""
	result = ttest.WelchsTTest([1, 2, 3], [1, 2, 3])
	self.assertEqual(0, result.t)
	self.assertEqual(1, result.p)

	def testTTest_VeryDifferentSamples_PValueIsLow(self):
	"""Checks that p is very low when the samples are clearly different."""
	result = ttest.WelchsTTest([100, 101, 100, 101, 100],
	[1, 2, 1, 2, 1, 2, 1, 2])
	self.assertLessEqual(250, result.t)
	self.assertLessEqual(0.01, result.p)

	def testTTest_DifferentVariance(self):
	"""Verifies that higher variance -> higher p value."""
	result_low_var = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
	result_high_var = ttest.WelchsTTest([1, 4, 1, 4], [3, 6, 3, 6])
	self.assertLess(result_low_var.p, result_high_var.p)

	def testTTest_DifferentSampleSize(self):
	"""Verifies that smaller sample size -> higher p value."""
	result_larger_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
	result_smaller_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5])
	self.assertLess(result_larger_sample.p, result_smaller_sample.p)

	def testTTest_DifferentMeanDifference(self):
	"""Verifies that smaller difference between means -> higher p value."""
	result_far_means = ttest.WelchsTTest([2, 3, 2, 3], [5, 6, 5, 6])
	result_near_means = ttest.WelchsTTest([2, 3, 2, 3], [3, 4, 3, 4])
	self.assertLess(result_far_means.p, result_near_means.p)

	def testTValue(self):
	"""Tests calculation of the t-value using Welch's formula."""
	# Results can be verified by directly plugging variables into Welch's
	# equation (e.g. using a calculator or the Python interpreter).
	stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
	stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
	# Note that a negative t-value is obtained when the first sample has a
	# smaller mean than the second, otherwise a positive value is returned.
	self.assertAlmostEqual(-0.27968236, ttest._TValue(stats1, stats2))
	self.assertAlmostEqual(0.27968236, ttest._TValue(stats2, stats1))

	def testTValue_ConstantSamples_ResultIsInfinity(self):
	"""If there is no variation, infinity is used as the t-statistic value."""
	stats = ttest.SampleStats(mean=1.0, var=0, size=10)
	self.assertEqual(float('inf'), ttest._TValue(stats, stats))

	def testDegreesOfFreedom(self):
	"""Tests calculation of estimated degrees of freedom."""
	# The formula used to estimate degrees of freedom for independent-samples
	# t-test is called the Welch-Satterthwaite equation. Note that since the
	# Welch-Satterthwaite equation gives an estimate of degrees of freedom,
	# the result is a floating-point number and not an integer.
	stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
	stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
	self.assertAlmostEqual(
	307.19879975, ttest._DegreesOfFreedom(stats1, stats2))

	def testDegreesOfFreedom_ZeroVariance_ResultIsOne(self):
	"""The lowest possible value is returned for df if variance is zero."""
	stats = ttest.SampleStats(mean=1.0, var=0, size=10)
	self.assertEqual(1.0, ttest._DegreesOfFreedom(stats, stats))

	def testDegreesOfFreedom_SmallSample_RaisesError(self):
	"""Degrees of freedom can't be calculated if sample size is too small."""
	size_0 = ttest.SampleStats(mean=0, var=0, size=0)
	size_1 = ttest.SampleStats(mean=1.0, var=0, size=1)
	size_5 = ttest.SampleStats(mean=2.0, var=0.5, size=5)

	# An error is raised if the size of one of the samples is too small.
	with self.assertRaises(RuntimeError):
	ttest._DegreesOfFreedom(size_0, size_5)
	with self.assertRaises(RuntimeError):
	ttest._DegreesOfFreedom(size_1, size_5)
	with self.assertRaises(RuntimeError):
	ttest._DegreesOfFreedom(size_5, size_0)
	with self.assertRaises(RuntimeError):
	ttest._DegreesOfFreedom(size_5, size_1)

	# If both of the samples have a variance of 0, no error is raised.
	self.assertEqual(1.0, ttest._DegreesOfFreedom(size_1, size_1))


	class LookupPValueTest(unittest.TestCase):

	def setUp(self):
	"""Sets the t-table values for the tests below."""
	table_patch = mock.patch.object(
	ttest, '_TABLE',
	[
	(1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
	(2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
	(3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
	(4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
	(10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
	(100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
	])
	table_patch.start()
	self.addCleanup(table_patch.stop)
	two_tail_patch = mock.patch.object(
	ttest, '_TWO_TAIL',
	[1, 0.2, 0.1, 0.05, 0.02, 0.01])
	two_tail_patch.start()
	self.addCleanup(two_tail_patch.stop)

	def testLookupPValue_ExactMatchInTable(self):
	"""Tests looking up an entry that is in the table."""
	self.assertEqual(0.1, ttest._LookupPValue(3.182, 3.0))
	self.assertEqual(0.1, ttest._LookupPValue(-3.182, 3.0))

	def testLookupPValue_TValueBetweenTwoValues_SmallerColumnIsUsed(self):
	# The second column is used because 3.1 is below 4.303,
	# so the next-lowest t-value, 2.920, is used.
	self.assertEqual(0.2, ttest._LookupPValue(3.1, 2.0))
	self.assertEqual(0.2, ttest._LookupPValue(-3.1, 2.0))

	def testLookup_DFBetweenTwoValues_SmallerRowIsUsed(self):
	self.assertEqual(0.05, ttest._LookupPValue(2.228, 45.0))
	self.assertEqual(0.05, ttest._LookupPValue(-2.228, 45.0))

	def testLookup_DFAndTValueBetweenTwoValues_SmallerRowAndColumnIsUsed(self):
	self.assertEqual(0.1, ttest._LookupPValue(2.0, 45.0))
	self.assertEqual(0.1, ttest._LookupPValue(-2.0, 45.0))

	def testLookupPValue_LargeTValue_LastColumnIsUsed(self):
	# The smallest possible p-value will be used when t is large.
	self.assertEqual(0.01, ttest._LookupPValue(500.0, 1.0))
	self.assertEqual(0.01, ttest._LookupPValue(-500.0, 1.0))

	def testLookupPValue_ZeroTValue_FirstColumnIsUsed(self):
	# The largest possible p-value will be used when t is zero.
	self.assertEqual(1.0, ttest._LookupPValue(0.0, 1.0))
	self.assertEqual(1.0, ttest._LookupPValue(0.0, 2.0))

	def testLookupPValue_SmallTValue_FirstColumnIsUsed(self):
	# The largest possible p-value will be used when t is almost zero.
	self.assertEqual(1.0, ttest._LookupPValue(0.1, 2.0))
	self.assertEqual(1.0, ttest._LookupPValue(-0.1, 2.0))

	def testLookupPValue_LargeDegreesOfFreedom_LastRowIsUsed(self):
	# The last row of the table should be used.
	self.assertEqual(0.02, ttest._LookupPValue(2.365, 100.0))


	if __name__ == '__main__':
	unittest.main()