catapult/dashboard/dashboard/debug_alert.py - platform/external/chromium-trace - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """Provides an interface for debugging the anomaly detection function."""

 import json
 import urllib

 from dashboard import datastore_hooks
 from dashboard import find_anomalies
 from dashboard import find_change_points
 from dashboard import request_handler
 from dashboard import utils
 from dashboard.models import anomaly
 from dashboard.models import anomaly_config
 from dashboard.models import graph_data

 # Default number of points before and after a point to analyze.
 _NUM_BEFORE = 40
 _NUM_AFTER = 10


 class QueryParameterError(Exception):
   pass


 class DebugAlertHandler(request_handler.RequestHandler):
   """Request handler for the /debug_alert page."""

   def get(self):
     """Displays UI for debugging the anomaly detection function.

     Request parameters:
       test_path: Full test path (Master/bot/suite/chart) for test with alert.
       rev: A revision (Row id number) to center the graph on.
       num_before: Maximum number of points after the given revision to get.
       num_after: Maximum number of points before the given revision.
       config: Config parameters for in JSON form.

     Outputs:
       A HTML page with a chart (if test_path is given) and a form.
     """
     try:
       test = self._GetTest()
       num_before, num_after = self._GetNumBeforeAfter()
       config_name = self._GetConfigName(test)
       config_dict = anomaly_config.CleanConfigDict(self._GetConfigDict(test))
     except QueryParameterError as e:
       self.RenderHtml('debug_alert.html', {'error': e.message})
       return

     revision = self.request.get('rev')
     if revision:
       rows = _FetchRowsAroundRev(test, int(revision), num_before, num_after)
     else:
       rows = _FetchLatestRows(test, num_before)

     chart_series = _ChartSeries(rows)
     lookup = _RevisionList(rows)

     # Get the anomaly data from the new anomaly detection module. This will
     # also be passed to the template so that it can be shown on the page.
     change_points = SimulateAlertProcessing(chart_series, **config_dict)
     anomaly_indexes = [c.x_value for c in change_points]
     anomaly_points = [(i, chart_series[i][1]) for i in anomaly_indexes]
     anomaly_segments = _AnomalySegmentSeries(change_points)

     plot_data = _GetPlotData(chart_series, anomaly_points, anomaly_segments)

     # Render the debug_alert page with all of the parameters filled in.
     self.RenderHtml('debug_alert.html', {
         'test_path': test.test_path,
         'rev': revision or '',
         'num_before': num_before,
         'num_after': num_after,
         'sheriff_name': 'None' if not test.sheriff else test.sheriff.id(),
         'config_name': config_name,
         'config_json': json.dumps(config_dict, indent=2, sort_keys=True),
         'plot_data': json.dumps(plot_data),
         'lookup': json.dumps(lookup),
         'anomalies': json.dumps([c.AsDict() for c in change_points]),
         'csv_url': _CsvUrl(test.test_path, rows),
         'graph_url': _GraphUrl(test, revision),
         'stored_anomalies': _FetchStoredAnomalies(test, lookup),
     })

   def post(self):
     """A POST request to this endpoint does the same thing as a GET request."""
     return self.get()

   def _GetTest(self):
     test_path = self.request.get('test_path')
     if not test_path:
       raise QueryParameterError('No test specified.')
     test = utils.TestKey(test_path).get()
     if not test:
       raise QueryParameterError('Test "%s" not found.' % test_path)
     return test

   def _GetNumBeforeAfter(self):
     try:
       num_before = int(self.request.get('num_before', _NUM_BEFORE))
       num_after = int(self.request.get('num_after', _NUM_AFTER))
     except ValueError:
       raise QueryParameterError('Invalid "num_before" or "num_after".')
     return num_before, num_after

   def _GetConfigName(self, test):
     """Gets the name of the custom anomaly threshold, just for display."""
     if test.overridden_anomaly_config:
       return test.overridden_anomaly_config.string_id()
     if self.request.get('config'):
       return 'Custom config'
     return 'Default config'

   def _GetConfigDict(self, test):
     """Gets the name of the anomaly threshold dict to use."""
     input_config_json = self.request.get('config')
     if not input_config_json:
       return anomaly_config.GetAnomalyConfigDict(test)
     try:
       return json.loads(input_config_json)
     except ValueError:
       raise QueryParameterError('Invalid JSON.')


 def SimulateAlertProcessing(chart_series, **config_dict):
   """Finds the same alerts as would be found normally as points are added.

   Each time a new point is added to a data series on dashboard, the
   FindChangePoints function is called with some points from that series.
   In order to simulate this here, we need to repeatedly call FindChangePoints.

   Args:
     chart_series: A list of (x, y) pairs.
     **config_dict: An alert threshold config dict.

   Returns:
     A list of find_change_points.ChangePoint objects, one for each alert found.
   """
   all_change_points = []
   highest_x = None  # This is used to avoid finding duplicate alerts.
   # The number of points that are passed in to FindChangePoints normally may
   # depend on either the specific "max_window_size" value or another default
   # used in find_anomalies.
   window = config_dict.get('max_window_size', find_anomalies.DEFAULT_NUM_POINTS)
   for end in range(1, len(chart_series)):
     start = max(0, end - window)
     series = chart_series[start:end]
     change_points = find_change_points.FindChangePoints(series, **config_dict)
     change_points = [c for c in change_points if c.x_value > highest_x]
     if change_points:
       highest_x = max(c.x_value for c in change_points)
       all_change_points.extend(change_points)
   return all_change_points


 def _AnomalySegmentSeries(change_points):
   """Makes a list of data series for showing segments next to anomalies.

   Args:
     change_points: A list of find_change_points.ChangePoint objects.

   Returns:
     A list of data series (lists of pairs) to be graphed by Flot.
   """
   # We make a separate series for each anomaly, since segments may overlap.
   anomaly_series_list = []

   for change_point in change_points:
     anomaly_series = []

     # In a Flot data series, null is treated as a special value which
     # indicates a discontinuity. We want to end each segment with null
     # so that they show up as separate segments on the graph.
     anomaly_series.append([change_point.window_start, None])

     for x in range(change_point.window_start + 1, change_point.x_value):
       anomaly_series.append([x, change_point.median_before])
     anomaly_series.append([change_point.x_value, None])

     for x in range(change_point.x_value + 1, change_point.window_end + 1):
       anomaly_series.append([x, change_point.median_after])
     anomaly_series.append([change_point.window_end, None])
     anomaly_series_list.append(anomaly_series)

   return anomaly_series_list


 def _GetPlotData(chart_series, anomaly_points, anomaly_segments):
   """Returns data to embed on the front-end for the chart.

   Args:
     chart_series: A series, i.e. a list of (index, value) pairs.
     anomaly_points: A series which contains the list of points where the
         anomalies were detected.
     anomaly_segments: A list of series, each of which represents one segment,
         which is a horizontal line across a range of values used in finding
         an anomaly.

   Returns:
     A list of data series, in the format accepted by Flot, which can be
     serialized as JSON and embedded on the page.
   """
   data = [
       {
           'data': chart_series,
           'color': '#666',
           'lines': {'show': True},
           'points': {'show': False},
       },
       {
           'data': anomaly_points,
           'color': '#f90',
           'lines': {'show': False},
           'points': {'show': True, 'radius': 4}
       },
   ]
   for series in anomaly_segments:
     data.append({
         'data': series,
         'color': '#f90',
         'lines': {'show': True},
         'points': {'show': False},
     })
   return data


 def _ChartSeries(rows):
   """Returns a data series and index to revision map."""
   return [(i, r.value) for i, r in enumerate(rows)]


 def _RevisionList(rows):
   """Returns a list of revisions."""
   return [r.revision for r in rows]


 def _FetchLatestRows(test, num_points):
   """Does a query for the latest Row entities in the given test.

   Args:
     test: A Test entity to fetch Row entities for.
     num_points: Number of points to fetch.

   Returns:
     A list of Row entities, ordered by revision. The number to fetch is limited
     to the number that is expected to be processed at once by GASP.
   """
   assert utils.IsInternalUser() or not test.internal_only
   datastore_hooks.SetSinglePrivilegedRequest()
   q = graph_data.Row.query(projection=['revision', 'value'])
   q = q.filter(graph_data.Row.parent_test == test.key)
   q = q.order(-graph_data.Row.revision)
   rows = list(reversed(q.fetch(limit=num_points)))
   return rows


 def _FetchRowsAroundRev(test, revision, num_before, num_after):
   """Fetches Row entities before and after a given revision.

   Args:
     test: A Test entity.
     revision: A Row ID.
     num_before: Maximum number of Rows before |revision| to fetch.
     num_after: Max number of Rows starting from |revision| to fetch.

   Returns:
     A list of Row entities ordered by ID. The Row entities will have at least
     the "revision" and "value" properties, which are the only ones relevant
     to their use in this module.
   """
   assert utils.IsInternalUser() or not test.internal_only
   query = graph_data.Row.query(projection=['revision', 'value'])
   query = query.filter(graph_data.Row.parent_test == test.key)

   before_query = query.filter(graph_data.Row.revision < revision)
   before_query = before_query.order(-graph_data.Row.revision)
   datastore_hooks.SetSinglePrivilegedRequest()
   rows_before = list(reversed(before_query.fetch(limit=num_before)))

   after_query = query.filter(graph_data.Row.revision >= revision)
   after_query = after_query.order(graph_data.Row.revision)
   datastore_hooks.SetSinglePrivilegedRequest()
   rows_at_and_after = after_query.fetch(num_after)

   return rows_before + rows_at_and_after


 def _FetchStoredAnomalies(test, revisions):
   """Makes a list of data about Anomaly entities for a Test."""
   stored_anomalies = anomaly.Anomaly.query().filter(
       anomaly.Anomaly.test == test.key).fetch()

   stored_anomaly_dicts = []
   for a in stored_anomalies:
     if a.end_revision > revisions[0]:
       stored_anomaly_dicts.append({
           'revision': a.end_revision,
           'median_before': a.median_before_anomaly,
           'median_after': a.median_after_anomaly,
           'percent_changed': a.percent_changed,
           'bug_id': _GetDisplayBugId(a.bug_id),
           'timestamp': a.timestamp,
       })
   return stored_anomaly_dicts


 def _CsvUrl(test_path, rows):
   """Constructs an URL for requesting data from /graph_csv for |rows|."""
   # Using a list of pairs ensures a predictable order for the parameters.
   params = [('test_path', test_path)]
   if rows:
     params += [
         ('num_points', len(rows)),
         ('rev', rows[-1].revision),
     ]
   return '/graph_csv?%s' % urllib.urlencode(params)


 def _GraphUrl(test, revision):
   """Constructs an URL for requesting data from /graph_csv for |rows|."""
   params = [
       ('masters', test.master_name),
       ('bots', test.bot_name),
       ('tests', '/'.join(test.test_path.split('/')[2:])),
   ]
   if revision:
     params.append(('rev', revision))
   return '/report?%s' % urllib.urlencode(params)


 def _GetDisplayBugId(bug_id):
   """Returns a display string for the given bug ID property of an anomaly."""
   special_ids = {-1: 'INVALID', -2: 'IGNORE', None: 'NONE'}
   return special_ids.get(bug_id, str(bug_id))
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""Provides an interface for debugging the anomaly detection function."""

	import json
	import urllib

	from dashboard import datastore_hooks
	from dashboard import find_anomalies
	from dashboard import find_change_points
	from dashboard import request_handler
	from dashboard import utils
	from dashboard.models import anomaly
	from dashboard.models import anomaly_config
	from dashboard.models import graph_data

	# Default number of points before and after a point to analyze.
	_NUM_BEFORE = 40
	_NUM_AFTER = 10


	class QueryParameterError(Exception):
	pass


	class DebugAlertHandler(request_handler.RequestHandler):
	"""Request handler for the /debug_alert page."""

	def get(self):
	"""Displays UI for debugging the anomaly detection function.

	Request parameters:
	test_path: Full test path (Master/bot/suite/chart) for test with alert.
	rev: A revision (Row id number) to center the graph on.
	num_before: Maximum number of points after the given revision to get.
	num_after: Maximum number of points before the given revision.
	config: Config parameters for in JSON form.

	Outputs:
	A HTML page with a chart (if test_path is given) and a form.
	"""
	try:
	test = self._GetTest()
	num_before, num_after = self._GetNumBeforeAfter()
	config_name = self._GetConfigName(test)
	config_dict = anomaly_config.CleanConfigDict(self._GetConfigDict(test))
	except QueryParameterError as e:
	self.RenderHtml('debug_alert.html', {'error': e.message})
	return

	revision = self.request.get('rev')
	if revision:
	rows = _FetchRowsAroundRev(test, int(revision), num_before, num_after)
	else:
	rows = _FetchLatestRows(test, num_before)

	chart_series = _ChartSeries(rows)
	lookup = _RevisionList(rows)

	# Get the anomaly data from the new anomaly detection module. This will
	# also be passed to the template so that it can be shown on the page.
	change_points = SimulateAlertProcessing(chart_series, **config_dict)
	anomaly_indexes = [c.x_value for c in change_points]
	anomaly_points = [(i, chart_series[i][1]) for i in anomaly_indexes]
	anomaly_segments = _AnomalySegmentSeries(change_points)

	plot_data = _GetPlotData(chart_series, anomaly_points, anomaly_segments)

	# Render the debug_alert page with all of the parameters filled in.
	self.RenderHtml('debug_alert.html', {
	'test_path': test.test_path,
	'rev': revision or '',
	'num_before': num_before,
	'num_after': num_after,
	'sheriff_name': 'None' if not test.sheriff else test.sheriff.id(),
	'config_name': config_name,
	'config_json': json.dumps(config_dict, indent=2, sort_keys=True),
	'plot_data': json.dumps(plot_data),
	'lookup': json.dumps(lookup),
	'anomalies': json.dumps([c.AsDict() for c in change_points]),
	'csv_url': _CsvUrl(test.test_path, rows),
	'graph_url': _GraphUrl(test, revision),
	'stored_anomalies': _FetchStoredAnomalies(test, lookup),
	})

	def post(self):
	"""A POST request to this endpoint does the same thing as a GET request."""
	return self.get()

	def _GetTest(self):
	test_path = self.request.get('test_path')
	if not test_path:
	raise QueryParameterError('No test specified.')
	test = utils.TestKey(test_path).get()
	if not test:
	raise QueryParameterError('Test "%s" not found.' % test_path)
	return test

	def _GetNumBeforeAfter(self):
	try:
	num_before = int(self.request.get('num_before', _NUM_BEFORE))
	num_after = int(self.request.get('num_after', _NUM_AFTER))
	except ValueError:
	raise QueryParameterError('Invalid "num_before" or "num_after".')
	return num_before, num_after

	def _GetConfigName(self, test):
	"""Gets the name of the custom anomaly threshold, just for display."""
	if test.overridden_anomaly_config:
	return test.overridden_anomaly_config.string_id()
	if self.request.get('config'):
	return 'Custom config'
	return 'Default config'

	def _GetConfigDict(self, test):
	"""Gets the name of the anomaly threshold dict to use."""
	input_config_json = self.request.get('config')
	if not input_config_json:
	return anomaly_config.GetAnomalyConfigDict(test)
	try:
	return json.loads(input_config_json)
	except ValueError:
	raise QueryParameterError('Invalid JSON.')


	def SimulateAlertProcessing(chart_series, **config_dict):
	"""Finds the same alerts as would be found normally as points are added.

	Each time a new point is added to a data series on dashboard, the
	FindChangePoints function is called with some points from that series.
	In order to simulate this here, we need to repeatedly call FindChangePoints.

	Args:
	chart_series: A list of (x, y) pairs.
	**config_dict: An alert threshold config dict.

	Returns:
	A list of find_change_points.ChangePoint objects, one for each alert found.
	"""
	all_change_points = []
	highest_x = None # This is used to avoid finding duplicate alerts.
	# The number of points that are passed in to FindChangePoints normally may
	# depend on either the specific "max_window_size" value or another default
	# used in find_anomalies.
	window = config_dict.get('max_window_size', find_anomalies.DEFAULT_NUM_POINTS)
	for end in range(1, len(chart_series)):
	start = max(0, end - window)
	series = chart_series[start:end]
	change_points = find_change_points.FindChangePoints(series, **config_dict)
	change_points = [c for c in change_points if c.x_value > highest_x]
	if change_points:
	highest_x = max(c.x_value for c in change_points)
	all_change_points.extend(change_points)
	return all_change_points


	def _AnomalySegmentSeries(change_points):
	"""Makes a list of data series for showing segments next to anomalies.

	Args:
	change_points: A list of find_change_points.ChangePoint objects.

	Returns:
	A list of data series (lists of pairs) to be graphed by Flot.
	"""
	# We make a separate series for each anomaly, since segments may overlap.
	anomaly_series_list = []

	for change_point in change_points:
	anomaly_series = []

	# In a Flot data series, null is treated as a special value which
	# indicates a discontinuity. We want to end each segment with null
	# so that they show up as separate segments on the graph.
	anomaly_series.append([change_point.window_start, None])

	for x in range(change_point.window_start + 1, change_point.x_value):
	anomaly_series.append([x, change_point.median_before])
	anomaly_series.append([change_point.x_value, None])

	for x in range(change_point.x_value + 1, change_point.window_end + 1):
	anomaly_series.append([x, change_point.median_after])
	anomaly_series.append([change_point.window_end, None])
	anomaly_series_list.append(anomaly_series)

	return anomaly_series_list


	def _GetPlotData(chart_series, anomaly_points, anomaly_segments):
	"""Returns data to embed on the front-end for the chart.

	Args:
	chart_series: A series, i.e. a list of (index, value) pairs.
	anomaly_points: A series which contains the list of points where the
	anomalies were detected.
	anomaly_segments: A list of series, each of which represents one segment,
	which is a horizontal line across a range of values used in finding
	an anomaly.

	Returns:
	A list of data series, in the format accepted by Flot, which can be
	serialized as JSON and embedded on the page.
	"""
	data = [
	{
	'data': chart_series,
	'color': '#666',
	'lines': {'show': True},
	'points': {'show': False},
	},
	{
	'data': anomaly_points,
	'color': '#f90',
	'lines': {'show': False},
	'points': {'show': True, 'radius': 4}
	},
	]
	for series in anomaly_segments:
	data.append({
	'data': series,
	'color': '#f90',
	'lines': {'show': True},
	'points': {'show': False},
	})
	return data


	def _ChartSeries(rows):
	"""Returns a data series and index to revision map."""
	return [(i, r.value) for i, r in enumerate(rows)]


	def _RevisionList(rows):
	"""Returns a list of revisions."""
	return [r.revision for r in rows]


	def _FetchLatestRows(test, num_points):
	"""Does a query for the latest Row entities in the given test.

	Args:
	test: A Test entity to fetch Row entities for.
	num_points: Number of points to fetch.

	Returns:
	A list of Row entities, ordered by revision. The number to fetch is limited
	to the number that is expected to be processed at once by GASP.
	"""
	assert utils.IsInternalUser() or not test.internal_only
	datastore_hooks.SetSinglePrivilegedRequest()
	q = graph_data.Row.query(projection=['revision', 'value'])
	q = q.filter(graph_data.Row.parent_test == test.key)
	q = q.order(-graph_data.Row.revision)
	rows = list(reversed(q.fetch(limit=num_points)))
	return rows


	def _FetchRowsAroundRev(test, revision, num_before, num_after):
	"""Fetches Row entities before and after a given revision.

	Args:
	test: A Test entity.
	revision: A Row ID.
	num_before: Maximum number of Rows before \|revision\| to fetch.
	num_after: Max number of Rows starting from \|revision\| to fetch.

	Returns:
	A list of Row entities ordered by ID. The Row entities will have at least
	the "revision" and "value" properties, which are the only ones relevant
	to their use in this module.
	"""
	assert utils.IsInternalUser() or not test.internal_only
	query = graph_data.Row.query(projection=['revision', 'value'])
	query = query.filter(graph_data.Row.parent_test == test.key)

	before_query = query.filter(graph_data.Row.revision < revision)
	before_query = before_query.order(-graph_data.Row.revision)
	datastore_hooks.SetSinglePrivilegedRequest()
	rows_before = list(reversed(before_query.fetch(limit=num_before)))

	after_query = query.filter(graph_data.Row.revision >= revision)
	after_query = after_query.order(graph_data.Row.revision)
	datastore_hooks.SetSinglePrivilegedRequest()
	rows_at_and_after = after_query.fetch(num_after)

	return rows_before + rows_at_and_after


	def _FetchStoredAnomalies(test, revisions):
	"""Makes a list of data about Anomaly entities for a Test."""
	stored_anomalies = anomaly.Anomaly.query().filter(
	anomaly.Anomaly.test == test.key).fetch()

	stored_anomaly_dicts = []
	for a in stored_anomalies:
	if a.end_revision > revisions[0]:
	stored_anomaly_dicts.append({
	'revision': a.end_revision,
	'median_before': a.median_before_anomaly,
	'median_after': a.median_after_anomaly,
	'percent_changed': a.percent_changed,
	'bug_id': _GetDisplayBugId(a.bug_id),
	'timestamp': a.timestamp,
	})
	return stored_anomaly_dicts


	def _CsvUrl(test_path, rows):
	"""Constructs an URL for requesting data from /graph_csv for \|rows\|."""
	# Using a list of pairs ensures a predictable order for the parameters.
	params = [('test_path', test_path)]
	if rows:
	params += [
	('num_points', len(rows)),
	('rev', rows[-1].revision),
	]
	return '/graph_csv?%s' % urllib.urlencode(params)


	def _GraphUrl(test, revision):
	"""Constructs an URL for requesting data from /graph_csv for \|rows\|."""
	params = [
	('masters', test.master_name),
	('bots', test.bot_name),
	('tests', '/'.join(test.test_path.split('/')[2:])),
	]
	if revision:
	params.append(('rev', revision))
	return '/report?%s' % urllib.urlencode(params)


	def _GetDisplayBugId(bug_id):
	"""Returns a display string for the given bug ID property of an anomaly."""
	special_ids = {-1: 'INVALID', -2: 'IGNORE', None: 'NONE'}
	return special_ids.get(bug_id, str(bug_id))