catapult/dashboard/dashboard/auto_bisect.py - platform/external/chromium-trace - Git at Google

 # Copyright 2015 The Chromium Authors. All rights reserved.
 # Use of this source code is governed by a BSD-style license that can be
 # found in the LICENSE file.

 """URL endpoint for a cron job to automatically run bisects."""

 import datetime
 import logging

 from dashboard import can_bisect
 from dashboard import datastore_hooks
 from dashboard import request_handler
 from dashboard import start_try_job
 from dashboard import utils
 from dashboard.models import anomaly
 from dashboard.models import graph_data
 from dashboard.models import try_job

 # Days between successive bisect restarts.
 _BISECT_RESTART_PERIOD_DAYS = [0, 1, 7, 14]


 class AutoBisectHandler(request_handler.RequestHandler):
   """URL endpoint for a cron job to automatically run bisects."""

   def get(self):
     """A get request is the same a post request for this endpoint."""
     self.post()

   def post(self):
     """Runs auto bisects."""
     if 'stats' in self.request.query_string:
       self.RenderHtml('result.html', _PrintStartedAndFailedBisectJobs())
       return
     datastore_hooks.SetPrivilegedRequest()
     if _RestartFailedBisectJobs():
       utils.TickMonitoringCustomMetric('RestartFailedBisectJobs')


 class NotBisectableError(Exception):
   """An error indicating that a bisect couldn't be automatically started."""
   pass


 def _RestartFailedBisectJobs():
   """Restarts failed bisect jobs.

   Bisect jobs that ran out of retries will be deleted.

   Returns:
     True if all bisect jobs that were retried were successfully triggered,
     and False otherwise.
   """
   bisect_jobs = try_job.TryJob.query(try_job.TryJob.status == 'failed').fetch()
   all_successful = True
   for job in bisect_jobs:
     if job.run_count > 0:
       if job.run_count <= len(_BISECT_RESTART_PERIOD_DAYS):
         if _IsBisectJobDueForRestart(job):
           # Start bisect right away if this is the first retry. Otherwise,
           # try bisect with different config.
           if job.run_count == 1:
             try:
               start_try_job.PerformBisect(job)
             except request_handler.InvalidInputError as e:
               logging.error(e.message)
               all_successful = False
           elif job.bug_id:
             restart_successful = _RestartBisect(job)
             if not restart_successful:
               all_successful = False
       else:
         if job.bug_id:
           comment = ('Failed to run bisect %s times.'
                      'Stopping automatic restart for this job.' %
                      job.run_count)
           start_try_job.LogBisectResult(job.bug_id, comment)
         job.key.delete()
   return all_successful


 def _RestartBisect(bisect_job):
   """Re-starts a bisect-job after modifying it's config based on run count.

   Args:
     bisect_job: TryJob entity with initialized bot name and config.

   Returns:
     True if the bisect was successfully triggered and False otherwise.
   """
   try:
     new_bisect_job = _MakeBisectTryJob(
         bisect_job.bug_id, bisect_job.run_count)
   except NotBisectableError:
     return False
   bisect_job.config = new_bisect_job.config
   bisect_job.bot = new_bisect_job.bot
   bisect_job.use_buildbucket = new_bisect_job.use_buildbucket
   bisect_job.put()
   try:
     start_try_job.PerformBisect(bisect_job)
   except request_handler.InvalidInputError as e:
     logging.error(e.message)
     return False
   return True


 def StartNewBisectForBug(bug_id):
   """Tries to trigger a bisect job for the alerts associated with a bug.

   Args:
     bug_id: A bug ID number.

   Returns:
     If successful, a dict containing "issue_id" and "issue_url" for the
     bisect job. Otherwise, a dict containing "error", with some description
     of the reason why a job wasn't started.
   """
   try:
     bisect_job = _MakeBisectTryJob(bug_id)
   except NotBisectableError as e:
     return {'error': e.message}
   bisect_job_key = bisect_job.put()

   try:
     bisect_result = start_try_job.PerformBisect(bisect_job)
   except request_handler.InvalidInputError as e:
     bisect_result = {'error': e.message}
   if 'error' in bisect_result:
     bisect_job_key.delete()
   return bisect_result


 def _MakeBisectTryJob(bug_id, run_count=0):
   """Tries to automatically select parameters for a bisect job.

   Args:
     bug_id: A bug ID which some alerts are associated with.
     run_count: An integer; this is supposed to represent the number of times
         that a bisect has been tried for this bug; it is used to try different
         config parameters on different re-try attempts.

   Returns:
     A TryJob entity, which has not yet been put in the datastore.

   Raises:
     NotBisectableError: A valid bisect config could not be created.
   """
   anomalies = anomaly.Anomaly.query(anomaly.Anomaly.bug_id == bug_id).fetch()
   if not anomalies:
     raise NotBisectableError('No Anomaly alerts found for this bug.')

   good_revision, bad_revision = _ChooseRevisionRange(anomalies)
   if not can_bisect.IsValidRevisionForBisect(good_revision):
     raise NotBisectableError('Invalid "good" revision: %s.' % good_revision)
   if not can_bisect.IsValidRevisionForBisect(bad_revision):
     raise NotBisectableError('Invalid "bad" revision: %s.' % bad_revision)

   test = _ChooseTest(anomalies, run_count)
   if not test or not can_bisect.IsValidTestForBisect(test.test_path):
     raise NotBisectableError('Could not select a test.')

   metric = start_try_job.GuessMetric(test.test_path)

   bisect_bot = start_try_job.GuessBisectBot(test.master_name, test.bot_name)
   if not bisect_bot or '_' not in bisect_bot:
     raise NotBisectableError('Could not select a bisect bot.')

   use_recipe = bool(start_try_job.GetBisectDirectorForTester(bisect_bot))

   new_bisect_config = start_try_job.GetBisectConfig(
       bisect_bot=bisect_bot,
       master_name=test.master_name,
       suite=test.suite_name,
       metric=metric,
       good_revision=good_revision,
       bad_revision=bad_revision,
       repeat_count=10,
       max_time_minutes=20,
       bug_id=bug_id,
       use_archive='true',
       use_buildbucket=use_recipe)

   if 'error' in new_bisect_config:
     raise NotBisectableError('Could not make a valid config.')

   config_python_string = utils.BisectConfigPythonString(new_bisect_config)

   bisect_job = try_job.TryJob(
       bot=bisect_bot,
       config=config_python_string,
       bug_id=bug_id,
       master_name=test.master_name,
       internal_only=test.internal_only,
       job_type='bisect',
       use_buildbucket=use_recipe)

   return bisect_job


 def _IsBisectJobDueForRestart(bisect_job):
   """Whether bisect job is due for restart."""
   old_timestamp = (datetime.datetime.now() - datetime.timedelta(
       days=_BISECT_RESTART_PERIOD_DAYS[bisect_job.run_count - 1]))
   return bisect_job.last_ran_timestamp <= old_timestamp


 def _ChooseTest(anomalies, index=0):
   """Chooses a test to use for a bisect job.

   The particular Test chosen determines the command and metric name that is
   chosen. The test to choose could depend on which of the anomalies has the
   largest regression size.

   Ideally, the choice of bisect bot to use should be based on bisect bot queue
   length, and the choice of metric should be based on regression size and noise
   level.

   However, we don't choose bisect bot and metric independently, since some
   regressions only happen for some tests on some platforms; we should generally
   only bisect with a given bisect bot on a given metric if we know that the
   regression showed up on that platform for that metric.

   Args:
     anomalies: A non-empty list of Anomaly entities.
     index: Index of the first Anomaly entity to look at. If this is greater
         than the number of Anomalies, it will wrap around. This is used to
         make it easier to get different suggestions for what test to use given
         the same list of alerts.

   Returns:
     A Test entity, or None if no valid Test could be chosen.
   """
   if not anomalies:
     return None
   index %= len(anomalies)
   anomalies.sort(cmp=_CompareAnomalyBisectability)
   for anomaly_entity in anomalies[index:]:
     if can_bisect.IsValidTestForBisect(utils.TestPath(anomaly_entity.test)):
       return anomaly_entity.test.get()
   return None


 def _CompareAnomalyBisectability(a1, a2):
   """Compares two Anomalies to decide which Anomaly's Test is better to use.

   TODO(qyearsley): Take other factors into account:
    - Consider bisect bot queue length. Note: If there's a simple API to fetch
      this from build.chromium.org, that would be best; even if there is not,
      it would be possible to fetch the HTML page for the builder and check the
      pending list from that.
    - Prefer some platforms over others. For example, bisects on Linux may run
      faster; also, if we fetch info from build.chromium.org, we can check recent
      failures.
    - Consider test run time. This may not be feasible without using a list
      of approximate test run times for different test suites.
    - Consider stddev of test; less noise -> more reliable bisect.

   Args:
     a1: The first Anomaly entity.
     a2: The second Anomaly entity.

   Returns:
     Negative integer if a1 is better than a2, positive integer if a2 is better
     than a1, or zero if they're equally good.
   """
   if a1.percent_changed > a2.percent_changed:
     return -1
   elif a1.percent_changed < a2.percent_changed:
     return 1
   return 0


 def _ChooseRevisionRange(anomalies):
   """Chooses a revision range to use for a bisect job.

   Note that the first number in the chosen revision range is the "last known
   good" revision, whereas the start_revision property of an Anomaly is the
   "first possible bad" revision.

   If the given set of anomalies is non-overlapping, the revision range chosen
   should be the intersection of the ranges.

   Args:
     anomalies: A non-empty list of Anomaly entities.

   Returns:
     A pair of revision numbers (good, bad), or (None, None) if no valid
     revision range can be chosen.

   Raises:
     NotBisectableError: A valid revision range could not be returned.
   """
   good_rev, good_test = max((a.start_revision - 1, a.test) for a in anomalies)
   bad_rev, bad_test = min((a.end_revision, a.test) for a in anomalies)
   if good_rev < bad_rev:
     good_rev = _GetRevisionForBisect(good_rev, good_test)
     bad_rev = _GetRevisionForBisect(bad_rev, bad_test)
     return (good_rev, bad_rev)
   raise NotBisectableError(
       'Good rev %s not smaller than bad rev %s.' % (good_rev, bad_rev))


 def _GetRevisionForBisect(revision, test_key):
   """Gets a start or end revision value which can be used when bisecting.

   Note: This logic is parallel to that in elements/chart-container.html
   in the method getRevisionForBisect.

   Args:
     revision: The ID of a Row, not necessarily an actual revision number.
     test_key: The ndb.Key for a Test.

   Returns:
     An int or string value which can be used when bisecting.
   """
   row_parent_key = utils.GetTestContainerKey(test_key)
   row = graph_data.Row.get_by_id(revision, parent=row_parent_key)
   if row and hasattr(row, 'a_default_rev') and hasattr(row, row.a_default_rev):
     return getattr(row, row.a_default_rev)
   return revision


 def _PrintStartedAndFailedBisectJobs():
   """Prints started and failed bisect jobs in datastore."""
   failed_jobs = try_job.TryJob.query(
       try_job.TryJob.status == 'failed').fetch()
   started_jobs = try_job.TryJob.query(
       try_job.TryJob.status == 'started').fetch()
   failed_jobs.sort(key=lambda b: b.run_count)
   started_jobs.sort(key=lambda b: b.run_count)

   return {
       'headline': 'Bisect Jobs',
       'results': [
           _JobsListResult('Failed jobs', failed_jobs),
           _JobsListResult('Started jobs', started_jobs),
       ]
   }


 def _JobsListResult(title, jobs):
   """Returns one item in a list of results to be displayed on result.html."""
   return {
       'name': '%s: %d' % (title, len(jobs)),
       'value': '\n'.join(_JobLine(job) for job in jobs),
       'class': 'results-pre'
   }


 def _JobLine(job):
   """Returns a string with information about one TryJob entity."""
   config = job.config.replace('\n', '') if job.config else 'No config.'
   return 'Run count %d. Bug ID %d. %s' % (job.run_count, job.bug_id, config)
	# Copyright 2015 The Chromium Authors. All rights reserved.
	# Use of this source code is governed by a BSD-style license that can be
	# found in the LICENSE file.

	"""URL endpoint for a cron job to automatically run bisects."""

	import datetime
	import logging

	from dashboard import can_bisect
	from dashboard import datastore_hooks
	from dashboard import request_handler
	from dashboard import start_try_job
	from dashboard import utils
	from dashboard.models import anomaly
	from dashboard.models import graph_data
	from dashboard.models import try_job

	# Days between successive bisect restarts.
	_BISECT_RESTART_PERIOD_DAYS = [0, 1, 7, 14]


	class AutoBisectHandler(request_handler.RequestHandler):
	"""URL endpoint for a cron job to automatically run bisects."""

	def get(self):
	"""A get request is the same a post request for this endpoint."""
	self.post()

	def post(self):
	"""Runs auto bisects."""
	if 'stats' in self.request.query_string:
	self.RenderHtml('result.html', _PrintStartedAndFailedBisectJobs())
	return
	datastore_hooks.SetPrivilegedRequest()
	if _RestartFailedBisectJobs():
	utils.TickMonitoringCustomMetric('RestartFailedBisectJobs')


	class NotBisectableError(Exception):
	"""An error indicating that a bisect couldn't be automatically started."""
	pass


	def _RestartFailedBisectJobs():
	"""Restarts failed bisect jobs.

	Bisect jobs that ran out of retries will be deleted.

	Returns:
	True if all bisect jobs that were retried were successfully triggered,
	and False otherwise.
	"""
	bisect_jobs = try_job.TryJob.query(try_job.TryJob.status == 'failed').fetch()
	all_successful = True
	for job in bisect_jobs:
	if job.run_count > 0:
	if job.run_count <= len(_BISECT_RESTART_PERIOD_DAYS):
	if _IsBisectJobDueForRestart(job):
	# Start bisect right away if this is the first retry. Otherwise,
	# try bisect with different config.
	if job.run_count == 1:
	try:
	start_try_job.PerformBisect(job)
	except request_handler.InvalidInputError as e:
	logging.error(e.message)
	all_successful = False
	elif job.bug_id:
	restart_successful = _RestartBisect(job)
	if not restart_successful:
	all_successful = False
	else:
	if job.bug_id:
	comment = ('Failed to run bisect %s times.'
	'Stopping automatic restart for this job.' %
	job.run_count)
	start_try_job.LogBisectResult(job.bug_id, comment)
	job.key.delete()
	return all_successful


	def _RestartBisect(bisect_job):
	"""Re-starts a bisect-job after modifying it's config based on run count.

	Args:
	bisect_job: TryJob entity with initialized bot name and config.

	Returns:
	True if the bisect was successfully triggered and False otherwise.
	"""
	try:
	new_bisect_job = _MakeBisectTryJob(
	bisect_job.bug_id, bisect_job.run_count)
	except NotBisectableError:
	return False
	bisect_job.config = new_bisect_job.config
	bisect_job.bot = new_bisect_job.bot
	bisect_job.use_buildbucket = new_bisect_job.use_buildbucket
	bisect_job.put()
	try:
	start_try_job.PerformBisect(bisect_job)
	except request_handler.InvalidInputError as e:
	logging.error(e.message)
	return False
	return True


	def StartNewBisectForBug(bug_id):
	"""Tries to trigger a bisect job for the alerts associated with a bug.

	Args:
	bug_id: A bug ID number.

	Returns:
	If successful, a dict containing "issue_id" and "issue_url" for the
	bisect job. Otherwise, a dict containing "error", with some description
	of the reason why a job wasn't started.
	"""
	try:
	bisect_job = _MakeBisectTryJob(bug_id)
	except NotBisectableError as e:
	return {'error': e.message}
	bisect_job_key = bisect_job.put()

	try:
	bisect_result = start_try_job.PerformBisect(bisect_job)
	except request_handler.InvalidInputError as e:
	bisect_result = {'error': e.message}
	if 'error' in bisect_result:
	bisect_job_key.delete()
	return bisect_result


	def _MakeBisectTryJob(bug_id, run_count=0):
	"""Tries to automatically select parameters for a bisect job.

	Args:
	bug_id: A bug ID which some alerts are associated with.
	run_count: An integer; this is supposed to represent the number of times
	that a bisect has been tried for this bug; it is used to try different
	config parameters on different re-try attempts.

	Returns:
	A TryJob entity, which has not yet been put in the datastore.

	Raises:
	NotBisectableError: A valid bisect config could not be created.
	"""
	anomalies = anomaly.Anomaly.query(anomaly.Anomaly.bug_id == bug_id).fetch()
	if not anomalies:
	raise NotBisectableError('No Anomaly alerts found for this bug.')

	good_revision, bad_revision = _ChooseRevisionRange(anomalies)
	if not can_bisect.IsValidRevisionForBisect(good_revision):
	raise NotBisectableError('Invalid "good" revision: %s.' % good_revision)
	if not can_bisect.IsValidRevisionForBisect(bad_revision):
	raise NotBisectableError('Invalid "bad" revision: %s.' % bad_revision)

	test = _ChooseTest(anomalies, run_count)
	if not test or not can_bisect.IsValidTestForBisect(test.test_path):
	raise NotBisectableError('Could not select a test.')

	metric = start_try_job.GuessMetric(test.test_path)

	bisect_bot = start_try_job.GuessBisectBot(test.master_name, test.bot_name)
	if not bisect_bot or '_' not in bisect_bot:
	raise NotBisectableError('Could not select a bisect bot.')

	use_recipe = bool(start_try_job.GetBisectDirectorForTester(bisect_bot))

	new_bisect_config = start_try_job.GetBisectConfig(
	bisect_bot=bisect_bot,
	master_name=test.master_name,
	suite=test.suite_name,
	metric=metric,
	good_revision=good_revision,
	bad_revision=bad_revision,
	repeat_count=10,
	max_time_minutes=20,
	bug_id=bug_id,
	use_archive='true',
	use_buildbucket=use_recipe)

	if 'error' in new_bisect_config:
	raise NotBisectableError('Could not make a valid config.')

	config_python_string = utils.BisectConfigPythonString(new_bisect_config)

	bisect_job = try_job.TryJob(
	bot=bisect_bot,
	config=config_python_string,
	bug_id=bug_id,
	master_name=test.master_name,
	internal_only=test.internal_only,
	job_type='bisect',
	use_buildbucket=use_recipe)

	return bisect_job


	def _IsBisectJobDueForRestart(bisect_job):
	"""Whether bisect job is due for restart."""
	old_timestamp = (datetime.datetime.now() - datetime.timedelta(
	days=_BISECT_RESTART_PERIOD_DAYS[bisect_job.run_count - 1]))
	return bisect_job.last_ran_timestamp <= old_timestamp


	def _ChooseTest(anomalies, index=0):
	"""Chooses a test to use for a bisect job.

	The particular Test chosen determines the command and metric name that is
	chosen. The test to choose could depend on which of the anomalies has the
	largest regression size.

	Ideally, the choice of bisect bot to use should be based on bisect bot queue
	length, and the choice of metric should be based on regression size and noise
	level.

	However, we don't choose bisect bot and metric independently, since some
	regressions only happen for some tests on some platforms; we should generally
	only bisect with a given bisect bot on a given metric if we know that the
	regression showed up on that platform for that metric.

	Args:
	anomalies: A non-empty list of Anomaly entities.
	index: Index of the first Anomaly entity to look at. If this is greater
	than the number of Anomalies, it will wrap around. This is used to
	make it easier to get different suggestions for what test to use given
	the same list of alerts.

	Returns:
	A Test entity, or None if no valid Test could be chosen.
	"""
	if not anomalies:
	return None
	index %= len(anomalies)
	anomalies.sort(cmp=_CompareAnomalyBisectability)
	for anomaly_entity in anomalies[index:]:
	if can_bisect.IsValidTestForBisect(utils.TestPath(anomaly_entity.test)):
	return anomaly_entity.test.get()
	return None


	def _CompareAnomalyBisectability(a1, a2):
	"""Compares two Anomalies to decide which Anomaly's Test is better to use.

	TODO(qyearsley): Take other factors into account:
	- Consider bisect bot queue length. Note: If there's a simple API to fetch
	this from build.chromium.org, that would be best; even if there is not,
	it would be possible to fetch the HTML page for the builder and check the
	pending list from that.
	- Prefer some platforms over others. For example, bisects on Linux may run
	faster; also, if we fetch info from build.chromium.org, we can check recent
	failures.
	- Consider test run time. This may not be feasible without using a list
	of approximate test run times for different test suites.
	- Consider stddev of test; less noise -> more reliable bisect.

	Args:
	a1: The first Anomaly entity.
	a2: The second Anomaly entity.

	Returns:
	Negative integer if a1 is better than a2, positive integer if a2 is better
	than a1, or zero if they're equally good.
	"""
	if a1.percent_changed > a2.percent_changed:
	return -1
	elif a1.percent_changed < a2.percent_changed:
	return 1
	return 0


	def _ChooseRevisionRange(anomalies):
	"""Chooses a revision range to use for a bisect job.

	Note that the first number in the chosen revision range is the "last known
	good" revision, whereas the start_revision property of an Anomaly is the
	"first possible bad" revision.

	If the given set of anomalies is non-overlapping, the revision range chosen
	should be the intersection of the ranges.

	Args:
	anomalies: A non-empty list of Anomaly entities.

	Returns:
	A pair of revision numbers (good, bad), or (None, None) if no valid
	revision range can be chosen.

	Raises:
	NotBisectableError: A valid revision range could not be returned.
	"""
	good_rev, good_test = max((a.start_revision - 1, a.test) for a in anomalies)
	bad_rev, bad_test = min((a.end_revision, a.test) for a in anomalies)
	if good_rev < bad_rev:
	good_rev = _GetRevisionForBisect(good_rev, good_test)
	bad_rev = _GetRevisionForBisect(bad_rev, bad_test)
	return (good_rev, bad_rev)
	raise NotBisectableError(
	'Good rev %s not smaller than bad rev %s.' % (good_rev, bad_rev))


	def _GetRevisionForBisect(revision, test_key):
	"""Gets a start or end revision value which can be used when bisecting.

	Note: This logic is parallel to that in elements/chart-container.html
	in the method getRevisionForBisect.

	Args:
	revision: The ID of a Row, not necessarily an actual revision number.
	test_key: The ndb.Key for a Test.

	Returns:
	An int or string value which can be used when bisecting.
	"""
	row_parent_key = utils.GetTestContainerKey(test_key)
	row = graph_data.Row.get_by_id(revision, parent=row_parent_key)
	if row and hasattr(row, 'a_default_rev') and hasattr(row, row.a_default_rev):
	return getattr(row, row.a_default_rev)
	return revision


	def _PrintStartedAndFailedBisectJobs():
	"""Prints started and failed bisect jobs in datastore."""
	failed_jobs = try_job.TryJob.query(
	try_job.TryJob.status == 'failed').fetch()
	started_jobs = try_job.TryJob.query(
	try_job.TryJob.status == 'started').fetch()
	failed_jobs.sort(key=lambda b: b.run_count)
	started_jobs.sort(key=lambda b: b.run_count)

	return {
	'headline': 'Bisect Jobs',
	'results': [
	_JobsListResult('Failed jobs', failed_jobs),
	_JobsListResult('Started jobs', started_jobs),
	]
	}


	def _JobsListResult(title, jobs):
	"""Returns one item in a list of results to be displayed on result.html."""
	return {
	'name': '%s: %d' % (title, len(jobs)),
	'value': '\n'.join(_JobLine(job) for job in jobs),
	'class': 'results-pre'
	}


	def _JobLine(job):
	"""Returns a string with information about one TryJob entity."""
	config = job.config.replace('\n', '') if job.config else 'No config.'
	return 'Run count %d. Bug ID %d. %s' % (job.run_count, job.bug_id, config)