autotest: Install stateful with the same build of current rootfs.

This CL changes the devserver side, to add a new feature in cros-flash-based
provision framework: installing statefu partition with the same build of
current rootfs partition on an host if it has a super old build installed.

Also this CL adds a metric to monitor this case.

BUG=chromium:658374
TEST=Ran autoupdate_EndToEndTest first on host, then repair it with this new
feature. Proved that without this feature, repair fails. With this feature,
repair succeeds.

Change-Id: I066cbff0f8b29800116340d7b2ae923c22da7075
Reviewed-on: https://chromium-review.googlesource.com/415500
Commit-Ready: Xixuan Wu <xixuan@chromium.org>
Tested-by: Xixuan Wu <xixuan@chromium.org>
Reviewed-by: Xixuan Wu <xixuan@chromium.org>
diff --git a/client/common_lib/cros/dev_server.py b/client/common_lib/cros/dev_server.py
index 60d87ee..5f177dc 100644
--- a/client/common_lib/cros/dev_server.py
+++ b/client/common_lib/cros/dev_server.py
@@ -1943,14 +1943,36 @@
         return False
 
 
-    def auto_update(self, host_name, build_name, log_dir=None,
+    def _parse_buildname_safely(self, build_name):
+        """Parse a given buildname safely.
+
+        @param build_name: the build name to be parsed.
+
+        @return: a tuple (board, build_type, milestone)
+        """
+        try:
+            board, build_type, milestone, _ = server_utils.ParseBuildName(
+                    build_name)
+        except server_utils.ParseBuildNameException:
+            logging.warning('Unable to parse build name %s for metrics. '
+                            'Continuing anyway.', build_name)
+            board, build_type, milestone = ('', '', '')
+
+        return board, build_type, milestone
+
+
+    def auto_update(self, host_name, build_name, original_board=None,
+                    original_release_version=None, log_dir=None,
                     force_update=False, full_update=False):
         """Auto-update a CrOS host.
 
-        @param host_name:    The hostname of the DUT to auto-update.
-        @param build_name:   The build name to be auto-updated on the DUT.
-        @param log_dir:      The log directory to store auto-update logs from
-                             devserver.
+        @param host_name: The hostname of the DUT to auto-update.
+        @param build_name:  The build name to be auto-updated on the DUT.
+        @param original_board: The original board of the DUT to auto-update.
+        @param original_release_version: The release version of the DUT's
+            current build.
+        @param log_dir: The log directory to store auto-update logs from
+            devserver.
         @param force_update: Force an update even if the version installed
                              is the same. Default: False.
         @param full_update:  If True, do not run stateful update, directly
@@ -1977,16 +1999,40 @@
                                   AUTO_UPDATE_LOG_DIR) if log_dir else None
         error_list = []
         retry_with_another_devserver = False
+        board, build_type, milestone = self._parse_buildname_safely(build_name)
 
         for au_attempt in range(AU_RETRY_LIMIT):
             logging.debug('Start CrOS auto-update for host %s at %d time(s).',
                           host_name, au_attempt + 1)
-            # No matter _start_auto_update succeeds or fails, the auto-update
+            # No matter _trigger_auto_update succeeds or fails, the auto-update
             # track_status_file should be cleaned, and the auto-update execute
             # log should be collected to directory sysinfo. Also, the error
-            # raised by _start_auto_update should be displayed.
+            # raised by _trigger_auto_update should be displayed.
             try:
-                response = self._trigger_auto_update(**kwargs)
+                # Try update with stateful.tgz of old release version in the
+                # last try of auto-update.
+                if (au_attempt > 0 and au_attempt  == AU_RETRY_LIMIT - 1 and
+                    original_release_version):
+                    # Monitor this case in monarch
+                    original_build = '%s/%s' % (original_board,
+                                                original_release_version)
+                    c = metrics.Counter(
+                            'chromeos/autotest/provision/'
+                            'cros_update_with_original_build')
+                    f = {'dev_server': ImageServer.get_server_name(self.url()),
+                         'board': board,
+                         'build_type': build_type,
+                         'milestone': milestone,
+                         'original_build': original_build}
+                    c.increment(fields=f)
+
+                    logging.debug('Try updating stateful partition of the '
+                                  'host with the same version of its current '
+                                  'rootfs partition: %s', original_build)
+                    response = self._trigger_auto_update(
+                            original_build=original_build, **kwargs)
+                else:
+                    response = self._trigger_auto_update(**kwargs)
             except DevServerException as e:
                 logging.debug(error_msg_attempt, au_attempt+1, str(e))
                 error_list.append(str(e))
@@ -2049,15 +2095,6 @@
                             'AU failed, trying IP instead of hostname: %s',
                             host_name_ip)
 
-        # Upload data to metrics
-        try:
-            board, build_type, milestone, _ = server_utils.ParseBuildName(
-                build_name)
-        except server_utils.ParseBuildNameException:
-            logging.warning('Unable to parse build name %s for metrics. '
-                            'Continuing anyway.', build_name)
-            board, build_type, milestone = ('', '', '')
-
         # Note: To avoid reaching or exceeding the monarch field cardinality
         # limit, we avoid a metric that includes both dut hostname and other
         # high cardinality fields.
diff --git a/client/common_lib/cros/dev_server_unittest.py b/client/common_lib/cros/dev_server_unittest.py
index 7e3a7d7..9851ca1 100755
--- a/client/common_lib/cros/dev_server_unittest.py
+++ b/client/common_lib/cros/dev_server_unittest.py
@@ -587,7 +587,7 @@
         errors.
 
         Func auto_update() should call 'handler_cleanup' and 'collect_au_log'
-        even if '_start_auto_update()' failed.
+        even if '_trigger_auto_update()' failed.
         """
         self.mox.StubOutWithMock(time, 'sleep')
         self.mox.StubOutWithMock(__builtin__, 'open')
@@ -603,7 +603,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testCleanUpErrorInAutoUpdate(self):
@@ -611,7 +611,7 @@
         errors.
 
         Func auto_update() should call 'handler_cleanup' and 'collect_au_log'
-        no matter '_start_auto_update()' succeeds or fails.
+        no matter '_trigger_auto_update()' succeeds or fails.
         """
         self.mox.StubOutWithMock(time, 'sleep')
         self.mox.StubOutWithMock(__builtin__, 'open')
@@ -628,7 +628,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testCollectLogErrorInAutoUpdate(self):
@@ -648,7 +648,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testGetAUStatusErrorAndCleanUpErrorInAutoUpdate(self):
@@ -656,7 +656,7 @@
         and handler_cleanup errors.
 
         Func auto_update() should call 'handler_cleanup' and 'collect_au_log'
-        even if '_start_auto_update()' fails.
+        even if '_trigger_auto_update()' fails.
         """
         self.mox.StubOutWithMock(time, 'sleep')
         self.mox.StubOutWithMock(__builtin__, 'open')
@@ -673,7 +673,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testGetAUStatusErrorAndCleanUpErrorAndCollectLogErrorInAutoUpdate(self):
@@ -681,7 +681,7 @@
         handler_cleanup, and collect_au_log errors.
 
         Func auto_update() should call 'handler_cleanup' and 'collect_au_log'
-        even if '_start_auto_update()' fails.
+        even if '_trigger_auto_update()' fails.
         """
         self.mox.StubOutWithMock(time, 'sleep')
         kwargs={'cros_au_error': False, 'get_au_status_error': True,
@@ -696,7 +696,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testGetAUStatusErrorAndCleanUpErrorAndCollectLogErrorAndKillErrorInAutoUpdate(self):
@@ -704,7 +704,7 @@
         handler_cleanup, collect_au_log, and kill_au_proc errors.
 
         Func auto_update() should call 'handler_cleanup' and 'collect_au_log'
-        even if '_start_auto_update()' fails.
+        even if '_trigger_auto_update()' fails.
         """
         self.mox.StubOutWithMock(time, 'sleep')
 
@@ -720,7 +720,7 @@
         self.mox.ReplayAll()
         self.assertRaises(dev_server.DevServerException,
                           self.dev_server.auto_update,
-                          '100.0.0.0', 'build', 'path/')
+                          '100.0.0.0', 'build', log_dir='path/')
 
 
     def testSuccessfulTriggerDownloadSync(self):
diff --git a/server/hosts/cros_host.py b/server/hosts/cros_host.py
index 489da49..76fba3d 100644
--- a/server/hosts/cros_host.py
+++ b/server/hosts/cros_host.py
@@ -739,8 +739,13 @@
                         ).increment(fields=monarch_fields)
 
         success, retryable = devserver.auto_update(
-                self.hostname, build, log_dir=self.job.resultdir,
-                force_update=force_update, full_update=force_full_update)
+                self.hostname, build,
+                original_board=self.get_board().replace(
+                        ds_constants.BOARD_PREFIX, ''),
+                original_release_version=self.get_release_version(),
+                log_dir=self.job.resultdir,
+                force_update=force_update,
+                full_update=force_full_update)
         if not success and retryable:
           # It indicates that last provision failed due to devserver load
           # issue, so another devserver is resolved to kick off provision
@@ -768,9 +773,13 @@
               c.increment(fields=monarch_fields)
 
               devserver.auto_update(
-                    self.hostname, build, log_dir=self.job.resultdir,
-                    force_update=force_update, full_update=force_full_update)
-
+                      self.hostname, build,
+                      original_board=self.get_board().replace(
+                              ds_constants.BOARD_PREFIX, ''),
+                      original_release_version=self.get_release_version(),
+                      log_dir=self.job.resultdir,
+                      force_update=force_update,
+                      full_update=force_full_update)
 
         # The reason to resolve a new devserver in function machine_install
         # is mostly because that the update_url there may has a strange format,