blob: bcabf9cecb42d8a020d7a449bec0f221fa2bb82a [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import unittest
import mock
from dashboard import ttest
class TTestTest(unittest.TestCase):
"""Tests for the t-test functions."""
def setUp(self):
"""Sets the t-table values for the tests below."""
table_patch = mock.patch.object(
ttest, '_TABLE',
[
(1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
(2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
(3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
(4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
(10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
(100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
])
table_patch.start()
self.addCleanup(table_patch.stop)
two_tail_patch = mock.patch.object(
ttest, '_TWO_TAIL',
[1, 0.2, 0.1, 0.05, 0.02, 0.01])
two_tail_patch.start()
self.addCleanup(two_tail_patch.stop)
def testWelchsTTest(self):
"""Tests the t value and degrees of freedom output of Welch's t-test."""
# The t-value can be checked with scipy.stats.ttest_ind(equal_var=False).
# However the t-value output by scipy.stats.ttest_ind is -6.32455532034.
# This implementation produces slightly different results.
result = ttest.WelchsTTest([2, 3, 2, 3, 2, 3], [4, 5, 4, 5, 4, 5])
self.assertAlmostEqual(10.0, result.df)
self.assertAlmostEqual(-6.325, result.t, delta=1.0)
def testWelchsTTest_EmptySample_RaisesError(self):
"""An error should be raised when an empty sample is passed in."""
with self.assertRaises(RuntimeError):
ttest.WelchsTTest([], [])
with self.assertRaises(RuntimeError):
ttest.WelchsTTest([], [1, 2, 3])
with self.assertRaises(RuntimeError):
ttest.WelchsTTest([1, 2, 3], [])
def testTTest_EqualSamples_PValueIsOne(self):
"""Checks that t = 0 and p = 1 when the samples are the same."""
result = ttest.WelchsTTest([1, 2, 3], [1, 2, 3])
self.assertEqual(0, result.t)
self.assertEqual(1, result.p)
def testTTest_VeryDifferentSamples_PValueIsLow(self):
"""Checks that p is very low when the samples are clearly different."""
result = ttest.WelchsTTest([100, 101, 100, 101, 100],
[1, 2, 1, 2, 1, 2, 1, 2])
self.assertLessEqual(250, result.t)
self.assertLessEqual(0.01, result.p)
def testTTest_DifferentVariance(self):
"""Verifies that higher variance -> higher p value."""
result_low_var = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
result_high_var = ttest.WelchsTTest([1, 4, 1, 4], [3, 6, 3, 6])
self.assertLess(result_low_var.p, result_high_var.p)
def testTTest_DifferentSampleSize(self):
"""Verifies that smaller sample size -> higher p value."""
result_larger_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5, 4, 5])
result_smaller_sample = ttest.WelchsTTest([2, 3, 2, 3], [4, 5])
self.assertLess(result_larger_sample.p, result_smaller_sample.p)
def testTTest_DifferentMeanDifference(self):
"""Verifies that smaller difference between means -> higher p value."""
result_far_means = ttest.WelchsTTest([2, 3, 2, 3], [5, 6, 5, 6])
result_near_means = ttest.WelchsTTest([2, 3, 2, 3], [3, 4, 3, 4])
self.assertLess(result_far_means.p, result_near_means.p)
def testTValue(self):
"""Tests calculation of the t-value using Welch's formula."""
# Results can be verified by directly plugging variables into Welch's
# equation (e.g. using a calculator or the Python interpreter).
stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
# Note that a negative t-value is obtained when the first sample has a
# smaller mean than the second, otherwise a positive value is returned.
self.assertAlmostEqual(-0.27968236, ttest._TValue(stats1, stats2))
self.assertAlmostEqual(0.27968236, ttest._TValue(stats2, stats1))
def testTValue_ConstantSamples_ResultIsInfinity(self):
"""If there is no variation, infinity is used as the t-statistic value."""
stats = ttest.SampleStats(mean=1.0, var=0, size=10)
self.assertEqual(float('inf'), ttest._TValue(stats, stats))
def testDegreesOfFreedom(self):
"""Tests calculation of estimated degrees of freedom."""
# The formula used to estimate degrees of freedom for independent-samples
# t-test is called the Welch-Satterthwaite equation. Note that since the
# Welch-Satterthwaite equation gives an estimate of degrees of freedom,
# the result is a floating-point number and not an integer.
stats1 = ttest.SampleStats(mean=0.299, var=0.05, size=150)
stats2 = ttest.SampleStats(mean=0.307, var=0.08, size=165)
self.assertAlmostEqual(
307.19879975, ttest._DegreesOfFreedom(stats1, stats2))
def testDegreesOfFreedom_ZeroVariance_ResultIsOne(self):
"""The lowest possible value is returned for df if variance is zero."""
stats = ttest.SampleStats(mean=1.0, var=0, size=10)
self.assertEqual(1.0, ttest._DegreesOfFreedom(stats, stats))
def testDegreesOfFreedom_SmallSample_RaisesError(self):
"""Degrees of freedom can't be calculated if sample size is too small."""
size_0 = ttest.SampleStats(mean=0, var=0, size=0)
size_1 = ttest.SampleStats(mean=1.0, var=0, size=1)
size_5 = ttest.SampleStats(mean=2.0, var=0.5, size=5)
# An error is raised if the size of one of the samples is too small.
with self.assertRaises(RuntimeError):
ttest._DegreesOfFreedom(size_0, size_5)
with self.assertRaises(RuntimeError):
ttest._DegreesOfFreedom(size_1, size_5)
with self.assertRaises(RuntimeError):
ttest._DegreesOfFreedom(size_5, size_0)
with self.assertRaises(RuntimeError):
ttest._DegreesOfFreedom(size_5, size_1)
# If both of the samples have a variance of 0, no error is raised.
self.assertEqual(1.0, ttest._DegreesOfFreedom(size_1, size_1))
class LookupPValueTest(unittest.TestCase):
def setUp(self):
"""Sets the t-table values for the tests below."""
table_patch = mock.patch.object(
ttest, '_TABLE',
[
(1, [0, 6.314, 12.71, 31.82, 63.66, 318.31]),
(2, [0, 2.920, 4.303, 6.965, 9.925, 22.327]),
(3, [0, 2.353, 3.182, 4.541, 5.841, 10.215]),
(4, [0, 2.132, 2.776, 3.747, 4.604, 7.173]),
(10, [0, 1.372, 1.812, 2.228, 2.764, 3.169]),
(100, [0, 1.290, 1.660, 1.984, 2.364, 2.626]),
])
table_patch.start()
self.addCleanup(table_patch.stop)
two_tail_patch = mock.patch.object(
ttest, '_TWO_TAIL',
[1, 0.2, 0.1, 0.05, 0.02, 0.01])
two_tail_patch.start()
self.addCleanup(two_tail_patch.stop)
def testLookupPValue_ExactMatchInTable(self):
"""Tests looking up an entry that is in the table."""
self.assertEqual(0.1, ttest._LookupPValue(3.182, 3.0))
self.assertEqual(0.1, ttest._LookupPValue(-3.182, 3.0))
def testLookupPValue_TValueBetweenTwoValues_SmallerColumnIsUsed(self):
# The second column is used because 3.1 is below 4.303,
# so the next-lowest t-value, 2.920, is used.
self.assertEqual(0.2, ttest._LookupPValue(3.1, 2.0))
self.assertEqual(0.2, ttest._LookupPValue(-3.1, 2.0))
def testLookup_DFBetweenTwoValues_SmallerRowIsUsed(self):
self.assertEqual(0.05, ttest._LookupPValue(2.228, 45.0))
self.assertEqual(0.05, ttest._LookupPValue(-2.228, 45.0))
def testLookup_DFAndTValueBetweenTwoValues_SmallerRowAndColumnIsUsed(self):
self.assertEqual(0.1, ttest._LookupPValue(2.0, 45.0))
self.assertEqual(0.1, ttest._LookupPValue(-2.0, 45.0))
def testLookupPValue_LargeTValue_LastColumnIsUsed(self):
# The smallest possible p-value will be used when t is large.
self.assertEqual(0.01, ttest._LookupPValue(500.0, 1.0))
self.assertEqual(0.01, ttest._LookupPValue(-500.0, 1.0))
def testLookupPValue_ZeroTValue_FirstColumnIsUsed(self):
# The largest possible p-value will be used when t is zero.
self.assertEqual(1.0, ttest._LookupPValue(0.0, 1.0))
self.assertEqual(1.0, ttest._LookupPValue(0.0, 2.0))
def testLookupPValue_SmallTValue_FirstColumnIsUsed(self):
# The largest possible p-value will be used when t is almost zero.
self.assertEqual(1.0, ttest._LookupPValue(0.1, 2.0))
self.assertEqual(1.0, ttest._LookupPValue(-0.1, 2.0))
def testLookupPValue_LargeDegreesOfFreedom_LastRowIsUsed(self):
# The last row of the table should be used.
self.assertEqual(0.02, ttest._LookupPValue(2.365, 100.0))
if __name__ == '__main__':
unittest.main()