autotest: don't allow retry in retrying provision to raise error correctly.
This CL raise a retryableException in auto_update() for callers to
decide whether they want to retry provision.
BUG=chromium:731274
TEST=Ran unittest.
Change-Id: I0a97aee70c97718708e7b8a103ac3e4e364d31a3
Reviewed-on: https://chromium-review.googlesource.com/528430
Commit-Ready: Xixuan Wu <xixuan@chromium.org>
Tested-by: Xixuan Wu <xixuan@chromium.org>
Reviewed-by: Don Garrett <dgarrett@chromium.org>
diff --git a/client/common_lib/cros/dev_server.py b/client/common_lib/cros/dev_server.py
index fbc13fc..99495ed 100644
--- a/client/common_lib/cros/dev_server.py
+++ b/client/common_lib/cros/dev_server.py
@@ -150,6 +150,9 @@
"""Raised when the dev server returns a non-200 HTTP response."""
pass
+class RetryableProvisionException(DevServerException):
+ """Raised when provision fails due to a retryable reason."""
+ pass
class DevServerOverloadException(Exception):
"""Raised when the dev server returns a 502 HTTP response."""
@@ -2049,12 +2052,10 @@
@param force_original: Whether to force stateful update with the
original payload.
- @return A set (is_success, is_retryable) in which:
- 1. is_success indicates whether this auto_update succeeds.
- 2. is_retryable indicates whether we should retry auto_update if
- if it fails.
+ @return is_success, which indicates whether this auto_update succeeds.
@raise DevServerException if auto_update fails and is not retryable.
+ @raise RetryableProvisionException if it fails and is retryable.
"""
kwargs = {'host_name': host_name,
'build_name': build_name,
@@ -2196,8 +2197,8 @@
'dut_host_name': host_name}
c.increment(fields=f)
- if is_au_success or retry_with_another_devserver:
- return (is_au_success, retry_with_another_devserver)
+ if is_au_success:
+ return is_au_success
# If errors happen in the CrOS AU process, report the first error
# since the following errors might be caused by the first error.
@@ -2205,7 +2206,12 @@
# auto-update logs, or killing auto-update processes, just report
# them together.
if error_list:
- raise DevServerException(error_msg % (host_name, error_list[0]))
+ if retry_with_another_devserver:
+ raise RetryableProvisionException(
+ error_msg % (host_name, error_list[0]))
+ else:
+ raise DevServerException(
+ error_msg % (host_name, error_list[0]))
else:
raise DevServerException(error_msg % (
host_name, ('RPC calls after the whole auto-update '
diff --git a/server/hosts/cros_host.py b/server/hosts/cros_host.py
index 0a7b379..e7b7e5c 100644
--- a/server/hosts/cros_host.py
+++ b/server/hosts/cros_host.py
@@ -689,7 +689,10 @@
monarch_fields['host'] = self.hostname
c.increment(fields=monarch_fields)
- return devserver.auto_update(
+ # Won't retry auto_update in a retry of auto-update.
+ # In other words, we only retry auto-update once with a different
+ # devservers.
+ devserver.auto_update(
self.hostname, build,
original_board=self.get_board().replace(
ds_constants.BOARD_PREFIX, ''),
@@ -789,32 +792,33 @@
force_original = self.get_chromeos_release_milestone() is None
- success, retryable = devserver.auto_update(
- self.hostname, build,
- original_board=self.get_board().replace(
- ds_constants.BOARD_PREFIX, ''),
- original_release_version=self.get_release_version(),
- log_dir=self.job.resultdir,
- force_update=force_update,
- full_update=force_full_update,
- force_original=force_original)
- if not success and retryable:
- # It indicates that last provision failed due to devserver load
- # issue, so another devserver is resolved to kick off provision
- # job once again and only once.
- logging.debug('Provision failed due to devserver issue,'
- 'retry it with another devserver.')
+ try:
+ devserver.auto_update(
+ self.hostname, build,
+ original_board=self.get_board().replace(
+ ds_constants.BOARD_PREFIX, ''),
+ original_release_version=self.get_release_version(),
+ log_dir=self.job.resultdir,
+ force_update=force_update,
+ full_update=force_full_update,
+ force_original=force_original)
+ except dev_server.RetryableProvisionException:
+ # It indicates that last provision failed due to devserver load
+ # issue, so another devserver is resolved to kick off provision
+ # job once again and only once.
+ logging.debug('Provision failed due to devserver issue,'
+ 'retry it with another devserver.')
- # Check first whether this DUT is completely offline. If so, skip
- # the following provision tries.
- logging.debug('Checking whether host %s is online.', self.hostname)
- if utils.ping(self.hostname, tries=1, deadline=1) == 0:
- self._retry_auto_update_with_new_devserver(
- build, devserver, force_update, force_full_update,
- force_original)
- else:
- raise error.AutoservError(
- 'No answer to ping from %s' % self.hostname)
+ # Check first whether this DUT is completely offline. If so, skip
+ # the following provision tries.
+ logging.debug('Checking whether host %s is online.', self.hostname)
+ if utils.ping(self.hostname, tries=1, deadline=1) == 0:
+ self._retry_auto_update_with_new_devserver(
+ build, devserver, force_update, force_full_update,
+ force_original)
+ else:
+ raise error.AutoservError(
+ 'No answer to ping from %s' % self.hostname)
# The reason to resolve a new devserver in function machine_install
# is mostly because that the update_url there may has a strange format,