Add support for clobbered blocks

In ext4 filesystems, some blocks might be changed even being mounted
R/O, such as the superblock (block 0). We need to exclude such blocks
from integrity verification. Plus such blocks should always be
written to the target by copying instead of patching.

Bug: http://b/20939131
Change-Id: I657025b7b1ad50d4365e7b18dc39308facfe864e
(cherry picked from commit ff7778166bd13a90c89fa333591ee2037f587a11)
diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index 544b1d0..0a387ec 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -82,6 +82,7 @@
   """A zero-length image."""
   blocksize = 4096
   care_map = RangeSet()
+  clobbered_blocks = RangeSet()
   total_blocks = 0
   file_map = {}
   def ReadRangeSet(self, ranges):
@@ -114,6 +115,7 @@
 
     self.total_blocks = len(self.data) / self.blocksize
     self.care_map = RangeSet(data=(0, self.total_blocks))
+    self.clobbered_blocks = RangeSet()
 
     zero_blocks = []
     nonzero_blocks = []
@@ -135,6 +137,8 @@
     return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
 
   def TotalSha1(self):
+    # DataImage always carries empty clobbered_blocks.
+    assert self.clobbered_blocks.size() == 0
     return sha1(self.data).hexdigest()
 
 
@@ -184,6 +188,10 @@
 #      (Typically a domain is a file, and the key in file_map is the
 #      pathname.)
 #
+#    clobbered_blocks: a RangeSet containing which blocks contain data
+#      but may be altered by the FS. They need to be excluded when
+#      verifying the partition integrity.
+#
 #    ReadRangeSet(): a function that takes a RangeSet and returns the
 #      data contained in the image blocks of that RangeSet.  The data
 #      is returned as a list or tuple of strings; concatenating the
@@ -193,7 +201,7 @@
 #
 #    TotalSha1(): a function that returns (as a hex string) the SHA-1
 #      hash of all the data in the image (ie, all the blocks in the
-#      care_map)
+#      care_map minus clobbered_blocks).
 #
 # When creating a BlockImageDiff, the src image may be None, in which
 # case the list of transfers produced will never read from the
@@ -844,6 +852,12 @@
                  "zero", self.transfers)
         continue
 
+      elif tgt_fn == "__COPY":
+        # "__COPY" domain includes all the blocks not contained in any
+        # file and that need to be copied unconditionally to the target.
+        Transfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
+        continue
+
       elif tgt_fn in self.src.file_map:
         # Look for an exact pathname match in the source.
         Transfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
diff --git a/tools/releasetools/common.py b/tools/releasetools/common.py
index ab58fa7..8ab98e7 100644
--- a/tools/releasetools/common.py
+++ b/tools/releasetools/common.py
@@ -1148,6 +1148,9 @@
     self.partition = partition
     self.check_first_block = check_first_block
 
+    # Due to http://b/20939131, check_first_block is disabled temporarily.
+    assert not self.check_first_block
+
     if version is None:
       version = 1
       if OPTIONS.info_dict:
@@ -1181,18 +1184,18 @@
     if not self.src:
       script.Print("Image %s will be patched unconditionally." % (partition,))
     else:
+      ranges = self.src.care_map.subtract(self.src.clobbered_blocks)
+      ranges_str = ranges.to_string_raw()
       if self.version >= 3:
         script.AppendExtra(('if (range_sha1("%s", "%s") == "%s" || '
                             'block_image_verify("%s", '
                             'package_extract_file("%s.transfer.list"), '
                             '"%s.new.dat", "%s.patch.dat")) then') % (
-                            self.device, self.src.care_map.to_string_raw(),
-                            self.src.TotalSha1(),
+                            self.device, ranges_str, self.src.TotalSha1(),
                             self.device, partition, partition, partition))
       else:
         script.AppendExtra('if range_sha1("%s", "%s") == "%s" then' % (
-            self.device, self.src.care_map.to_string_raw(),
-            self.src.TotalSha1()))
+                           self.device, ranges_str, self.src.TotalSha1()))
       script.Print('Verified %s image...' % (partition,))
       script.AppendExtra('else')
 
@@ -1240,6 +1243,9 @@
 
     return ctx.hexdigest()
 
+  # TODO(tbao): Due to http://b/20939131, block 0 may be changed without
+  # remounting R/W. Will change the checking to a finer-grained way to
+  # mask off those bits.
   def _CheckFirstBlock(self, script):
     r = rangelib.RangeSet((0, 1))
     srchash = self._HashBlocks(self.src, r)
diff --git a/tools/releasetools/ota_from_target_files.py b/tools/releasetools/ota_from_target_files.py
index 900eaec..c4d0c1b 100755
--- a/tools/releasetools/ota_from_target_files.py
+++ b/tools/releasetools/ota_from_target_files.py
@@ -475,7 +475,13 @@
       path = add_img_to_target_files.BuildVendor(
           tmpdir, info_dict, block_list=mappath)
 
-  return sparse_img.SparseImage(path, mappath)
+  # Bug: http://b/20939131
+  # In ext4 filesystems, block 0 might be changed even being mounted
+  # R/O. We add it to clobbered_blocks so that it will be written to the
+  # target unconditionally. Note that they are still part of care_map.
+  clobbered_blocks = "0"
+
+  return sparse_img.SparseImage(path, mappath, clobbered_blocks)
 
 
 def WriteFullOTAPackage(input_zip, output_zip):
@@ -773,7 +779,6 @@
         OPTIONS.info_dict.get("blockimgdiff_versions", "1").split(","))
 
   system_diff = common.BlockDifference("system", system_tgt, system_src,
-                                       check_first_block=True,
                                        version=blockimgdiff_version)
 
   if HasVendorPartition(target_zip):
@@ -784,7 +789,6 @@
     vendor_tgt = GetImage("vendor", OPTIONS.target_tmp,
                           OPTIONS.target_info_dict)
     vendor_diff = common.BlockDifference("vendor", vendor_tgt, vendor_src,
-                                         check_first_block=True,
                                          version=blockimgdiff_version)
   else:
     vendor_diff = None
diff --git a/tools/releasetools/sparse_img.py b/tools/releasetools/sparse_img.py
index b97bb84..2ac97ac 100644
--- a/tools/releasetools/sparse_img.py
+++ b/tools/releasetools/sparse_img.py
@@ -21,10 +21,17 @@
 
 
 class SparseImage(object):
-  """Wraps a sparse image file (and optional file map) into an image
-  object suitable for passing to BlockImageDiff."""
+  """Wraps a sparse image file into an image object.
 
-  def __init__(self, simg_fn, file_map_fn=None):
+  Wraps a sparse image file (and optional file map and clobbered_blocks) into
+  an image object suitable for passing to BlockImageDiff. file_map contains
+  the mapping between files and their blocks. clobbered_blocks contains the set
+  of blocks that should be always written to the target regardless of the old
+  contents (i.e. copying instead of patching). clobbered_blocks should be in
+  the form of a string like "0" or "0 1-5 8".
+  """
+
+  def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None):
     self.simg_f = f = open(simg_fn, "rb")
 
     header_bin = f.read(28)
@@ -57,6 +64,7 @@
     pos = 0   # in blocks
     care_data = []
     self.offset_map = offset_map = []
+    self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
 
     for i in range(total_chunks):
       header_bin = f.read(12)
@@ -103,7 +111,7 @@
     self.offset_index = [i[0] for i in offset_map]
 
     if file_map_fn:
-      self.LoadFileBlockMap(file_map_fn)
+      self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks)
     else:
       self.file_map = {"__DATA": self.care_map}
 
@@ -111,9 +119,10 @@
     return [d for d in self._GetRangeData(ranges)]
 
   def TotalSha1(self):
-    """Return the SHA-1 hash of all data in the 'care' regions of this image."""
+    """Return the SHA-1 hash of all data in the 'care' regions but not in
+    clobbered_blocks of this image."""
     h = sha1()
-    for d in self._GetRangeData(self.care_map):
+    for d in self._GetRangeData(self.care_map.subtract(self.clobbered_blocks)):
       h.update(d)
     return h.hexdigest()
 
@@ -156,7 +165,7 @@
           yield fill_data * (this_read * (self.blocksize >> 2))
         to_read -= this_read
 
-  def LoadFileBlockMap(self, fn):
+  def LoadFileBlockMap(self, fn, clobbered_blocks):
     remaining = self.care_map
     self.file_map = out = {}
 
@@ -166,14 +175,20 @@
         ranges = rangelib.RangeSet.parse(ranges)
         out[fn] = ranges
         assert ranges.size() == ranges.intersect(remaining).size()
+
+        # Currently we assume that blocks in clobbered_blocks are not part of
+        # any file.
+        assert not clobbered_blocks.overlaps(ranges)
         remaining = remaining.subtract(ranges)
 
+    remaining = remaining.subtract(clobbered_blocks)
+
     # For all the remaining blocks in the care_map (ie, those that
-    # aren't part of the data for any file), divide them into blocks
-    # that are all zero and blocks that aren't.  (Zero blocks are
-    # handled specially because (1) there are usually a lot of them
-    # and (2) bsdiff handles files with long sequences of repeated
-    # bytes especially poorly.)
+    # aren't part of the data for any file nor part of the clobbered_blocks),
+    # divide them into blocks that are all zero and blocks that aren't.
+    # (Zero blocks are handled specially because (1) there are usually
+    # a lot of them and (2) bsdiff handles files with long sequences of
+    # repeated bytes especially poorly.)
 
     zero_blocks = []
     nonzero_blocks = []
@@ -203,6 +218,7 @@
 
     out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
     out["__NONZERO"] = rangelib.RangeSet(data=nonzero_blocks)
+    out["__COPY"] = clobbered_blocks
 
   def ResetFileMap(self):
     """Throw away the file map and treat the entire image as