autotest: don't allow retry in retrying provision to raise error correctly.

This CL raise a retryableException in auto_update() for callers to
decide whether they want to retry provision.

BUG=chromium:731274
TEST=Ran unittest.

Change-Id: I0a97aee70c97718708e7b8a103ac3e4e364d31a3
Reviewed-on: https://chromium-review.googlesource.com/528430
Commit-Ready: Xixuan Wu <xixuan@chromium.org>
Tested-by: Xixuan Wu <xixuan@chromium.org>
Reviewed-by: Don Garrett <dgarrett@chromium.org>
diff --git a/client/common_lib/cros/dev_server.py b/client/common_lib/cros/dev_server.py
index fbc13fc..99495ed 100644
--- a/client/common_lib/cros/dev_server.py
+++ b/client/common_lib/cros/dev_server.py
@@ -150,6 +150,9 @@
     """Raised when the dev server returns a non-200 HTTP response."""
     pass
 
+class RetryableProvisionException(DevServerException):
+    """Raised when provision fails due to a retryable reason."""
+    pass
 
 class DevServerOverloadException(Exception):
     """Raised when the dev server returns a 502 HTTP response."""
@@ -2049,12 +2052,10 @@
         @param force_original: Whether to force stateful update with the
                                original payload.
 
-        @return A set (is_success, is_retryable) in which:
-            1. is_success indicates whether this auto_update succeeds.
-            2. is_retryable indicates whether we should retry auto_update if
-               if it fails.
+        @return is_success, which indicates whether this auto_update succeeds.
 
         @raise DevServerException if auto_update fails and is not retryable.
+        @raise RetryableProvisionException if it fails and is retryable.
         """
         kwargs = {'host_name': host_name,
                   'build_name': build_name,
@@ -2196,8 +2197,8 @@
              'dut_host_name': host_name}
         c.increment(fields=f)
 
-        if is_au_success or retry_with_another_devserver:
-            return (is_au_success, retry_with_another_devserver)
+        if is_au_success:
+            return is_au_success
 
         # If errors happen in the CrOS AU process, report the first error
         # since the following errors might be caused by the first error.
@@ -2205,7 +2206,12 @@
         # auto-update logs, or killing auto-update processes, just report
         # them together.
         if error_list:
-            raise DevServerException(error_msg % (host_name, error_list[0]))
+            if retry_with_another_devserver:
+                raise RetryableProvisionException(
+                        error_msg % (host_name, error_list[0]))
+            else:
+                raise DevServerException(
+                        error_msg % (host_name, error_list[0]))
         else:
             raise DevServerException(error_msg % (
                         host_name, ('RPC calls after the whole auto-update '
diff --git a/server/hosts/cros_host.py b/server/hosts/cros_host.py
index 0a7b379..e7b7e5c 100644
--- a/server/hosts/cros_host.py
+++ b/server/hosts/cros_host.py
@@ -689,7 +689,10 @@
         monarch_fields['host'] = self.hostname
         c.increment(fields=monarch_fields)
 
-        return devserver.auto_update(
+        # Won't retry auto_update in a retry of auto-update.
+        # In other words, we only retry auto-update once with a different
+        # devservers.
+        devserver.auto_update(
                 self.hostname, build,
                 original_board=self.get_board().replace(
                         ds_constants.BOARD_PREFIX, ''),
@@ -789,32 +792,33 @@
 
         force_original = self.get_chromeos_release_milestone() is None
 
-        success, retryable = devserver.auto_update(
-                self.hostname, build,
-                original_board=self.get_board().replace(
-                        ds_constants.BOARD_PREFIX, ''),
-                original_release_version=self.get_release_version(),
-                log_dir=self.job.resultdir,
-                force_update=force_update,
-                full_update=force_full_update,
-                force_original=force_original)
-        if not success and retryable:
-          # It indicates that last provision failed due to devserver load
-          # issue, so another devserver is resolved to kick off provision
-          # job once again and only once.
-          logging.debug('Provision failed due to devserver issue,'
-                        'retry it with another devserver.')
+        try:
+            devserver.auto_update(
+                    self.hostname, build,
+                    original_board=self.get_board().replace(
+                            ds_constants.BOARD_PREFIX, ''),
+                    original_release_version=self.get_release_version(),
+                    log_dir=self.job.resultdir,
+                    force_update=force_update,
+                    full_update=force_full_update,
+                    force_original=force_original)
+        except dev_server.RetryableProvisionException:
+            # It indicates that last provision failed due to devserver load
+            # issue, so another devserver is resolved to kick off provision
+            # job once again and only once.
+            logging.debug('Provision failed due to devserver issue,'
+                          'retry it with another devserver.')
 
-          # Check first whether this DUT is completely offline. If so, skip
-          # the following provision tries.
-          logging.debug('Checking whether host %s is online.', self.hostname)
-          if utils.ping(self.hostname, tries=1, deadline=1) == 0:
-              self._retry_auto_update_with_new_devserver(
-                      build, devserver, force_update, force_full_update,
-                      force_original)
-          else:
-              raise error.AutoservError(
-                      'No answer to ping from %s' % self.hostname)
+            # Check first whether this DUT is completely offline. If so, skip
+            # the following provision tries.
+            logging.debug('Checking whether host %s is online.', self.hostname)
+            if utils.ping(self.hostname, tries=1, deadline=1) == 0:
+                self._retry_auto_update_with_new_devserver(
+                        build, devserver, force_update, force_full_update,
+                        force_original)
+            else:
+                raise error.AutoservError(
+                        'No answer to ping from %s' % self.hostname)
 
         # The reason to resolve a new devserver in function machine_install
         # is mostly because that the update_url there may has a strange format,