blob: c4cc1f062a721b9a6821fdc13e87db7467907869 [file] [log] [blame]
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""An experimental alternative to find_change_points.FindChangePoints.
The general approach that this function takes is similar to that
of skiaperf.com. For the implementation that this is based on, see:
http://goo.gl/yikYZY
"""
import math
from dashboard import math_utils
def FindStep(data_series, score_threshold=4.0):
"""Finds a point in the data series where there appears to be step.
Args:
data_series: A list of ordered (x, y) pairs.
score_threshold: The minimum "regression size" score for any
step to be considered sufficiently large.
Returns:
The x-value at which there appears to be a step, or None.
"""
if len(data_series) < 2:
return None
x_values, y_values = zip(*data_series)
step_index = _MinimizeDistanceFromStep(y_values)
score = _RegressionSizeScore(y_values, step_index)
if score > score_threshold:
return x_values[step_index]
return None
def Steppiness(values, step_index):
"""Returns a number between 0 and 1 that indicates how step-like |values| is.
Args:
values: A list of numbers. Should have non-zero variance, non-zero length.
step_index: An index in values.
Returns:
A number between zero and one that indicates how similar the shape of the
given series is to a step function.
"""
normalized = _Normalize(values)
return 1.0 - _DistanceFromStep(normalized, step_index)
def _Normalize(values):
"""Makes a series with the same shape but with variance = 1, mean = 0."""
mean = math_utils.Mean(values)
zeroed = [x - mean for x in values]
stddev = math_utils.StandardDeviation(zeroed)
return [math_utils.Divide(x, stddev) for x in zeroed]
def _MinimizeDistanceFromStep(values):
"""Returns the step index which minimizes difference from a step function."""
indices = range(1, len(values))
return min(indices, key=lambda i: _DistanceFromStep(values, i))
def _RegressionSizeScore(values, step_index):
"""Returns the "regression size" score at the given point."""
left, right = values[:step_index], values[step_index:]
step_size = abs(math_utils.Mean(left) - math_utils.Mean(right))
if not step_size:
return 0.0
distance = _DistanceFromStep(values, step_index)
if not distance:
return float('inf')
return step_size / distance
def _DistanceFromStep(values, step_index):
"""Returns the root-mean-square deviation from a step function."""
step_values = _StepFunctionValues(values, step_index)
return _RootMeanSquareDeviation(values, step_values)
def _StepFunctionValues(values, step_index):
"""Makes values for a step function corresponding to |values|."""
left, right = values[:step_index], values[step_index:]
step_left = len(left) * [math_utils.Mean(left)]
step_right = len(right) * [math_utils.Mean(right)]
return step_left + step_right
def _RootMeanSquareDeviation(values1, values2):
"""Returns the root-mean-square deviation between two lists of values."""
if len(values1) == 0:
return 0.0
return math.sqrt(_SumOfSquaredResiduals(values1, values2) / len(values1))
def _SumOfSquaredResiduals(values1, values2):
"""Returns the sum of the squared deviations between corresponding values."""
return sum((v1 - v2) ** 2 for v1, v2 in zip(values1, values2))