Delta-compress register maps in stack maps.

The register maps tend to be similar from stack map to stack map,
so instead of encoding them again, store only the modified ones.

The dex register bitmap stores the delta now - if register has
been modified since the previous stack map, the bit will be set.

The decoding logic scans backwards through stack maps until it
eventfully finds the most recent value of each register.

This CL saves ~2.5% of .oat file size (~10% of stackmap size).

Due to the scan, this makes dex register decoding slower by factor
of 2.5, but that still beats the old algorithm before refactoring.

Test: test-art-host-gtest-stack_map_test
Change-Id: Id5217a329eb757954e0c9447f38b05ec34118f84
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 3685ab2..094b75d 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -46,6 +46,13 @@
                                         uint8_t inlining_depth) {
   DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
   in_stack_map_ = true;
+  // num_dex_registers_ is the constant per-method number of registers.
+  // However we initially don't know what the value is, so lazily initialize it.
+  if (num_dex_registers_ == 0) {
+    num_dex_registers_ = num_dex_registers;
+  } else if (num_dex_registers > 0) {
+    DCHECK_EQ(num_dex_registers_, num_dex_registers) << "Inconsistent register count";
+  }
 
   current_stack_map_ = StackMapEntry {
     .packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_),
@@ -85,7 +92,6 @@
       }
       CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0));
       CHECK_EQ(code_info.GetInlineDepthOf(stack_map), inlining_depth);
-      CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0));
     });
   }
 }
@@ -102,16 +108,14 @@
         inline_infos_.Dedup(current_inline_infos_.data(), current_inline_infos_.size());
   }
 
+  // Generate delta-compressed dex register map.
+  CreateDexRegisterMap();
+
   stack_maps_.Add(current_stack_map_);
 }
 
 void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
   current_dex_registers_.push_back(DexRegisterLocation(kind, value));
-
-  // We have collected all the dex registers for StackMap/InlineInfo - create the map.
-  if (current_dex_registers_.size() == expected_num_dex_registers_) {
-    CreateDexRegisterMap();
-  }
 }
 
 void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) {
@@ -142,14 +146,15 @@
   in_inline_info_ = true;
   DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
 
+  expected_num_dex_registers_ += num_dex_registers;
+
   InlineInfoEntry entry = {
     .is_last = InlineInfo::kMore,
     .dex_pc = dex_pc,
     .method_info_index = kNoValue,
     .art_method_hi = kNoValue,
     .art_method_lo = kNoValue,
-    .dex_register_mask_index = kNoValue,
-    .dex_register_map_index = kNoValue,
+    .num_dex_registers = static_cast<uint32_t>(expected_num_dex_registers_),
   };
   if (EncodeArtMethodInInlineInfo(method)) {
     entry.art_method_hi = High32Bits(reinterpret_cast<uintptr_t>(method));
@@ -164,9 +169,6 @@
   }
   current_inline_infos_.push_back(entry);
 
-  current_dex_registers_.clear();
-  expected_num_dex_registers_ = num_dex_registers;
-
   if (kVerifyStackMaps) {
     size_t stack_map_index = stack_maps_.size();
     size_t depth = current_inline_infos_.size() - 1;
@@ -182,7 +184,6 @@
         CHECK_EQ(method_infos_[inline_info.GetMethodInfoIndex()],
                  method->GetDexMethodIndexUnchecked());
       }
-      CHECK_EQ(inline_info.HasDexRegisterMap(), (num_dex_registers != 0));
     });
   }
 }
@@ -193,56 +194,68 @@
   DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
 }
 
-// Create dex register map (bitmap + indices + catalogue entries)
-// based on the currently accumulated list of DexRegisterLocations.
+// Create delta-compressed dex register map based on the current list of DexRegisterLocations.
+// All dex registers for a stack map are concatenated - inlined registers are just appended.
 void StackMapStream::CreateDexRegisterMap() {
-  // Create mask and map based on current registers.
+  // These are fields rather than local variables so that we can reuse the reserved memory.
   temp_dex_register_mask_.ClearAllBits();
   temp_dex_register_map_.clear();
+
+  // Ensure that the arrays that hold previous state are big enough to be safely indexed below.
+  if (previous_dex_registers_.size() < current_dex_registers_.size()) {
+    previous_dex_registers_.resize(current_dex_registers_.size(), DexRegisterLocation::None());
+    dex_register_timestamp_.resize(current_dex_registers_.size(), 0u);
+  }
+
+  // Set bit in the mask for each register that has been changed since the previous stack map.
+  // Modified registers are stored in the catalogue and the catalogue index added to the list.
   for (size_t i = 0; i < current_dex_registers_.size(); i++) {
     DexRegisterLocation reg = current_dex_registers_[i];
-    if (reg.IsLive()) {
-      DexRegisterEntry entry = DexRegisterEntry {
+    // Distance is difference between this index and the index of last modification.
+    uint32_t distance = stack_maps_.size() - dex_register_timestamp_[i];
+    if (previous_dex_registers_[i] != reg || distance > kMaxDexRegisterMapSearchDistance) {
+      DexRegisterEntry entry = DexRegisterEntry{
         .kind = static_cast<uint32_t>(reg.GetKind()),
         .packed_value = DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue()),
       };
+      uint32_t index = reg.IsLive() ? dex_register_catalog_.Dedup(&entry) : kNoValue;
       temp_dex_register_mask_.SetBit(i);
-      temp_dex_register_map_.push_back(dex_register_catalog_.Dedup(&entry));
+      temp_dex_register_map_.push_back(index);
+      previous_dex_registers_[i] = reg;
+      dex_register_timestamp_[i] = stack_maps_.size();
     }
   }
 
-  // Set the mask and map for the current StackMap/InlineInfo.
-  uint32_t mask_index = StackMap::kNoValue;  // Represents mask with all zero bits.
+  // Set the mask and map for the current StackMap (which includes inlined registers).
   if (temp_dex_register_mask_.GetNumberOfBits() != 0) {
-    mask_index = dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(),
-                                           temp_dex_register_mask_.GetNumberOfBits());
+    current_stack_map_.dex_register_mask_index =
+        dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(),
+                                  temp_dex_register_mask_.GetNumberOfBits());
   }
-  uint32_t map_index = dex_register_maps_.Dedup(temp_dex_register_map_.data(),
-                                                temp_dex_register_map_.size());
-  if (!current_inline_infos_.empty()) {
-    current_inline_infos_.back().dex_register_mask_index = mask_index;
-    current_inline_infos_.back().dex_register_map_index = map_index;
-  } else {
-    current_stack_map_.dex_register_mask_index = mask_index;
-    current_stack_map_.dex_register_map_index = map_index;
+  if (!current_dex_registers_.empty()) {
+    current_stack_map_.dex_register_map_index =
+        dex_register_maps_.Dedup(temp_dex_register_map_.data(),
+                                 temp_dex_register_map_.size());
   }
 
   if (kVerifyStackMaps) {
     size_t stack_map_index = stack_maps_.size();
-    int32_t depth = current_inline_infos_.size() - 1;
+    uint32_t depth = current_inline_infos_.size();
     // We need to make copy of the current registers for later (when the check is run).
-    auto expected_dex_registers = std::make_shared<std::vector<DexRegisterLocation>>(
+    auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>(
         current_dex_registers_.begin(), current_dex_registers_.end());
     dchecks_.emplace_back([=](const CodeInfo& code_info) {
       StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
-      size_t num_dex_registers = expected_dex_registers->size();
-      DexRegisterMap map = (depth == -1)
-        ? code_info.GetDexRegisterMapOf(stack_map, num_dex_registers)
-        : code_info.GetDexRegisterMapAtDepth(depth, stack_map, num_dex_registers);
-      CHECK_EQ(map.size(), num_dex_registers);
-      for (size_t r = 0; r < num_dex_registers; r++) {
-        CHECK_EQ(expected_dex_registers->at(r), map.Get(r));
+      uint32_t expected_reg = 0;
+      for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) {
+        CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
       }
+      for (uint32_t d = 0; d < depth; d++) {
+        for (DexRegisterLocation reg : code_info.GetDexRegisterMapAtDepth(d, stack_map)) {
+          CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
+        }
+      }
+      CHECK_EQ(expected_reg, expected_dex_registers->size());
     });
   }
 }
@@ -290,6 +303,7 @@
   dex_register_masks_.Encode(&out_, &bit_offset);
   dex_register_maps_.Encode(&out_, &bit_offset);
   dex_register_catalog_.Encode(&out_, &bit_offset);
+  EncodeVarintBits(&out_, &bit_offset, num_dex_registers_);
 
   return UnsignedLeb128Size(out_.size()) +  out_.size();
 }
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index d634c70..02fb6cb 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -55,6 +55,8 @@
         in_inline_info_(false),
         current_inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
+        previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
+        dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)),
         temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream),
         temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) {
   }
@@ -113,8 +115,7 @@
     uint32_t method_info_index;
     uint32_t art_method_hi;
     uint32_t art_method_lo;
-    uint32_t dex_register_mask_index;
-    uint32_t dex_register_map_index;
+    uint32_t num_dex_registers;
   };
 
   // The fields must be uint32_t and mirror the InvokeInfo accessor in stack_map.h!
@@ -147,6 +148,7 @@
   BitmapTableBuilder dex_register_masks_;
   BitTableBuilder<uint32_t> dex_register_maps_;
   BitTableBuilder<DexRegisterEntry> dex_register_catalog_;
+  uint32_t num_dex_registers_ = 0;  // TODO: Make this const and get the value in constructor.
   ScopedArenaVector<uint8_t> out_;
 
   BitTableBuilder<uint32_t> method_infos_;
@@ -159,6 +161,8 @@
   StackMapEntry current_stack_map_;
   ScopedArenaVector<InlineInfoEntry> current_inline_infos_;
   ScopedArenaVector<DexRegisterLocation> current_dex_registers_;
+  ScopedArenaVector<DexRegisterLocation> previous_dex_registers_;
+  ScopedArenaVector<uint32_t> dex_register_timestamp_;  // Stack map index of last change.
   size_t expected_num_dex_registers_;
 
   // Temporary variables used in CreateDexRegisterMap.
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 77aa3ef..0be276c 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -358,13 +358,6 @@
     ASSERT_EQ(Kind::kConstant, location1.GetKind());
     ASSERT_EQ(0, location0.GetValue());
     ASSERT_EQ(-2, location1.GetValue());
-
-    // Test that the inline info dex register map deduplicated to the same offset as the stack map
-    // one.
-    ASSERT_TRUE(stack_map.HasInlineInfo());
-    InlineInfo inline_info = code_info.GetInlineInfoAtDepth(stack_map, 0);
-    EXPECT_EQ(inline_info.GetDexRegisterMapIndex(),
-              stack_map.GetDexRegisterMapIndex());
   }
 }
 
@@ -466,13 +459,9 @@
   ASSERT_EQ(2, dex_registers2.GetMachineRegister(0));
   ASSERT_EQ(-2, dex_registers2.GetConstant(1));
 
-  // Verify dex register map offsets.
-  ASSERT_EQ(sm0.GetDexRegisterMapIndex(),
-            sm1.GetDexRegisterMapIndex());
-  ASSERT_NE(sm0.GetDexRegisterMapIndex(),
-            sm2.GetDexRegisterMapIndex());
-  ASSERT_NE(sm1.GetDexRegisterMapIndex(),
-            sm2.GetDexRegisterMapIndex());
+  // Verify dex register mask offsets.
+  ASSERT_FALSE(sm1.HasDexRegisterMaskIndex());  // No delta.
+  ASSERT_TRUE(sm2.HasDexRegisterMaskIndex());  // Has delta.
 }
 
 TEST(StackMapTest, TestNoDexRegisterMap) {
@@ -649,8 +638,6 @@
     ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0));
     ASSERT_EQ(10, dex_registers2.GetConstant(1));
     ASSERT_EQ(5, dex_registers2.GetMachineRegister(2));
-
-    ASSERT_FALSE(if1_2.HasDexRegisterMap());
   }
 
   {
@@ -682,8 +669,6 @@
     ASSERT_EQ(10u, if2_2.GetDexPc());
     ASSERT_TRUE(if2_2.EncodesArtMethod());
 
-    ASSERT_FALSE(if2_0.HasDexRegisterMap());
-
     DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, sm3, 1);
     ASSERT_EQ(2, dex_registers1.GetMachineRegister(0));
 
diff --git a/libartbase/base/bit_memory_region.h b/libartbase/base/bit_memory_region.h
index a3d3ee4..b4764fd 100644
--- a/libartbase/base/bit_memory_region.h
+++ b/libartbase/base/bit_memory_region.h
@@ -151,6 +151,20 @@
     StoreBits(bit_offset + bit, src.LoadBits(bit, num_bits), num_bits);
   }
 
+  // Count the number of set bits within the given bit range.
+  ALWAYS_INLINE size_t PopCount(size_t bit_offset, size_t bit_length) const {
+    DCHECK_LE(bit_offset, bit_size_);
+    DCHECK_LE(bit_length, bit_size_ - bit_offset);
+    size_t count = 0;
+    size_t bit = 0;
+    constexpr size_t kNumBits = BitSizeOf<uint32_t>();
+    for (; bit + kNumBits <= bit_length; bit += kNumBits) {
+      count += POPCOUNT(LoadBits(bit_offset + bit, kNumBits));
+    }
+    count += POPCOUNT(LoadBits(bit_offset + bit, bit_length - bit));
+    return count;
+  }
+
   ALWAYS_INLINE bool Equals(const BitMemoryRegion& other) const {
     return data_ == other.data_ &&
            bit_start_ == other.bit_start_ &&
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 3188087..3973edc 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -753,7 +753,7 @@
       kByteKindInlineInfoMethodIndexIdx,
       kByteKindInlineInfoDexPc,
       kByteKindInlineInfoArtMethod,
-      kByteKindInlineInfoDexRegisterMap,
+      kByteKindInlineInfoNumDexRegisters,
       kByteKindInlineInfoIsLast,
       kByteKindCount,
       // Special ranges for std::accumulate convenience.
@@ -855,8 +855,8 @@
                inline_info_bits,
                "inline info");
           Dump(os,
-               "InlineInfoDexRegisterMap      ",
-               bits[kByteKindInlineInfoDexRegisterMap],
+               "InlineInfoNumDexRegisters     ",
+               bits[kByteKindInlineInfoNumDexRegisters],
                inline_info_bits,
                "inline info");
           Dump(os,
@@ -1757,8 +1757,8 @@
                 inline_infos.NumColumnBits(InlineInfo::kArtMethodHi) * num_inline_infos +
                 inline_infos.NumColumnBits(InlineInfo::kArtMethodLo) * num_inline_infos);
             stats_.AddBits(
-                Stats::kByteKindInlineInfoDexRegisterMap,
-                inline_infos.NumColumnBits(InlineInfo::kDexRegisterMapIndex) * num_inline_infos);
+                Stats::kByteKindInlineInfoNumDexRegisters,
+                inline_infos.NumColumnBits(InlineInfo::kNumberOfDexRegisters) * num_inline_infos);
             stats_.AddBits(Stats::kByteKindInlineInfoIsLast, num_inline_infos);
           }
         }
diff --git a/runtime/dex_register_location.h b/runtime/dex_register_location.h
index c6d4ad2..a20dccb 100644
--- a/runtime/dex_register_location.h
+++ b/runtime/dex_register_location.h
@@ -29,6 +29,7 @@
 class DexRegisterLocation {
  public:
   enum class Kind : int32_t {
+    kInvalid = -2,       // only used internally during register map decoding.
     kNone = -1,          // vreg has not been set.
     kInStack,            // vreg is on the stack, value holds the stack offset.
     kConstant,           // vreg is a constant value.
@@ -40,9 +41,8 @@
 
   DexRegisterLocation(Kind kind, int32_t value) : kind_(kind), value_(value) {}
 
-  static DexRegisterLocation None() {
-    return DexRegisterLocation(Kind::kNone, 0);
-  }
+  static DexRegisterLocation None() { return DexRegisterLocation(Kind::kNone, 0); }
+  static DexRegisterLocation Invalid() { return DexRegisterLocation(Kind::kInvalid, 0); }
 
   bool IsLive() const { return kind_ != Kind::kNone; }
 
diff --git a/runtime/stack.cc b/runtime/stack.cc
index bd0d5d6..0b3441a 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -236,7 +236,9 @@
   size_t depth_in_stack_map = current_inlining_depth_ - 1;
 
   DexRegisterMap dex_register_map = IsInInlinedFrame()
-      ? code_info.GetDexRegisterMapAtDepth(depth_in_stack_map, stack_map, number_of_dex_registers)
+      ? code_info.GetDexRegisterMapAtDepth(depth_in_stack_map,
+                                           stack_map,
+                                           number_of_dex_registers)
       : code_info.GetDexRegisterMapOf(stack_map, number_of_dex_registers);
 
   if (!dex_register_map.IsValid()) {
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index a5749b8..59a89e1 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -25,6 +25,69 @@
 
 namespace art {
 
+// Scan backward to determine dex register locations at given stack map.
+// All registers for a stack map are combined - inlined registers are just appended,
+// therefore 'first_dex_register' allows us to select a sub-range to decode.
+void CodeInfo::DecodeDexRegisterMap(uint32_t stack_map_index,
+                                    uint32_t first_dex_register,
+                                    /*out*/ DexRegisterMap* map) const {
+  // Count remaining work so we know when we have finished.
+  uint32_t remaining_registers = map->size();
+
+  // Keep scanning backwards and collect the most recent location of each register.
+  for (int32_t s = stack_map_index; s >= 0 && remaining_registers != 0; s--) {
+    StackMap stack_map = GetStackMapAt(s);
+    DCHECK_LE(stack_map_index - s, kMaxDexRegisterMapSearchDistance) << "Unbounded search";
+
+    // The mask specifies which registers where modified in this stack map.
+    // NB: the mask can be shorter than expected if trailing zero bits were removed.
+    uint32_t mask_index = stack_map.GetDexRegisterMaskIndex();
+    if (mask_index == StackMap::kNoValue) {
+      continue;  // Nothing changed at this stack map.
+    }
+    BitMemoryRegion mask = dex_register_masks_.GetBitMemoryRegion(mask_index);
+    if (mask.size_in_bits() <= first_dex_register) {
+      continue;  // Nothing changed after the first register we are interested in.
+    }
+
+    // The map stores one catalogue index per each modified register location.
+    uint32_t map_index = stack_map.GetDexRegisterMapIndex();
+    DCHECK_NE(map_index, StackMap::kNoValue);
+
+    // Skip initial registers which we are not interested in (to get to inlined registers).
+    map_index += mask.PopCount(0, first_dex_register);
+    mask = mask.Subregion(first_dex_register, mask.size_in_bits() - first_dex_register);
+
+    // Update registers that we see for first time (i.e. most recent value).
+    DexRegisterLocation* regs = map->data();
+    const uint32_t end = std::min<uint32_t>(map->size(), mask.size_in_bits());
+    const size_t kNumBits = BitSizeOf<uint32_t>();
+    for (uint32_t reg = 0; reg < end; reg += kNumBits) {
+      // Process the mask in chunks of kNumBits for performance.
+      uint32_t bits = mask.LoadBits(reg, std::min<uint32_t>(end - reg, kNumBits));
+      while (bits != 0) {
+        uint32_t bit = CTZ(bits);
+        if (regs[reg + bit].GetKind() == DexRegisterLocation::Kind::kInvalid) {
+          regs[reg + bit] = GetDexRegisterCatalogEntry(dex_register_maps_.Get(map_index));
+          remaining_registers--;
+        }
+        map_index++;
+        bits ^= 1u << bit;  // Clear the bit.
+      }
+    }
+  }
+
+  // Set any remaining registers to None (which is the default state at first stack map).
+  if (remaining_registers != 0) {
+    DexRegisterLocation* regs = map->data();
+    for (uint32_t r = 0; r < map->size(); r++) {
+      if (regs[r].GetKind() == DexRegisterLocation::Kind::kInvalid) {
+        regs[r] = DexRegisterLocation::None();
+      }
+    }
+  }
+}
+
 std::ostream& operator<<(std::ostream& stream, const DexRegisterLocation& reg) {
   using Kind = DexRegisterLocation::Kind;
   switch (reg.GetKind()) {
@@ -42,6 +105,8 @@
       return stream << "f" << reg.GetValue() << "/hi";
     case Kind::kConstant:
       return stream << "#" << reg.GetValue();
+    case Kind::kInvalid:
+      return stream << "Invalid";
     default:
       return stream << "DexRegisterLocation(" << static_cast<uint32_t>(reg.GetKind())
                     << "," << reg.GetValue() << ")";
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 6da0021..ff70b6c 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -39,6 +39,12 @@
 // (signed) values.
 static constexpr ssize_t kFrameSlotSize = 4;
 
+// The delta compression of dex register maps means we need to scan the stackmaps backwards.
+// We compress the data in such a way so that there is an upper bound on the search distance.
+// Max distance 0 means each stack map must be fully defined and no scanning back is allowed.
+// If this value is changed, the oat file version should be incremented (for DCHECK to pass).
+static constexpr size_t kMaxDexRegisterMapSearchDistance = 32;
+
 class ArtMethod;
 class CodeInfo;
 
@@ -49,12 +55,14 @@
 // If the size is small enough, it keeps the data on the stack.
 class DexRegisterMap {
  public:
-  // Create map for given number of registers and initialize all locations to None.
-  explicit DexRegisterMap(size_t count) : count_(count), regs_small_{} {
+  using iterator = DexRegisterLocation*;
+
+  // Create map for given number of registers and initialize them to the given value.
+  DexRegisterMap(size_t count, DexRegisterLocation value) : count_(count), regs_small_{} {
     if (count_ <= kSmallCount) {
-      std::fill_n(regs_small_.begin(), count, DexRegisterLocation::None());
+      std::fill_n(regs_small_.begin(), count, value);
     } else {
-      regs_large_.resize(count, DexRegisterLocation::None());
+      regs_large_.resize(count, value);
     }
   }
 
@@ -62,6 +70,9 @@
     return count_ <= kSmallCount ? regs_small_.data() : regs_large_.data();
   }
 
+  iterator begin() { return data(); }
+  iterator end() { return data() + count_; }
+
   size_t size() const { return count_; }
 
   bool IsValid() const { return count_ != 0; }
@@ -192,7 +203,7 @@
  * The row referenced from the StackMap holds information at depth 0.
  * Following rows hold information for further depths.
  */
-class InlineInfo : public BitTable<7>::Accessor {
+class InlineInfo : public BitTable<6>::Accessor {
  public:
   BIT_TABLE_HEADER()
   BIT_TABLE_COLUMN(0, IsLast)  // Determines if there are further rows for further depths.
@@ -200,7 +211,7 @@
   BIT_TABLE_COLUMN(2, MethodInfoIndex)
   BIT_TABLE_COLUMN(3, ArtMethodHi)  // High bits of ArtMethod*.
   BIT_TABLE_COLUMN(4, ArtMethodLo)  // Low bits of ArtMethod*.
-  BIT_TABLE_COLUMN(5, DexRegisterMaskIndex)
+  BIT_TABLE_COLUMN(5, NumberOfDexRegisters)  // Includes outer levels and the main method.
   BIT_TABLE_COLUMN(6, DexRegisterMapIndex)
 
   static constexpr uint32_t kLast = -1;
@@ -220,10 +231,6 @@
     return reinterpret_cast<ArtMethod*>((hi << 32) | lo);
   }
 
-  ALWAYS_INLINE bool HasDexRegisterMap() const {
-    return HasDexRegisterMapIndex();
-  }
-
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& info,
             const StackMap& stack_map,
@@ -338,7 +345,9 @@
   }
 
   ALWAYS_INLINE DexRegisterLocation GetDexRegisterCatalogEntry(size_t index) const {
-    return DexRegisterInfo(&dex_register_catalog_, index).GetLocation();
+    return (index == StackMap::kNoValue)
+      ? DexRegisterLocation::None()
+      : DexRegisterInfo(&dex_register_catalog_, index).GetLocation();
   }
 
   uint32_t GetNumberOfStackMaps() const {
@@ -350,19 +359,30 @@
   }
 
   ALWAYS_INLINE DexRegisterMap GetDexRegisterMapOf(StackMap stack_map,
-                                                   size_t num_dex_registers) const {
-    return DecodeDexRegisterMap(stack_map.GetDexRegisterMaskIndex(),
-                                stack_map.GetDexRegisterMapIndex(),
-                                num_dex_registers);
+                                                   size_t vregs ATTRIBUTE_UNUSED = 0) const {
+    if (stack_map.HasDexRegisterMap()) {
+      DexRegisterMap map(number_of_dex_registers_, DexRegisterLocation::Invalid());
+      DecodeDexRegisterMap(stack_map.Row(), /* first_dex_register */ 0, &map);
+      return map;
+    }
+    return DexRegisterMap(0, DexRegisterLocation::None());
   }
 
   ALWAYS_INLINE DexRegisterMap GetDexRegisterMapAtDepth(uint8_t depth,
                                                         StackMap stack_map,
-                                                        size_t num_dex_registers) const {
-    InlineInfo inline_info = GetInlineInfoAtDepth(stack_map, depth);
-    return DecodeDexRegisterMap(inline_info.GetDexRegisterMaskIndex(),
-                                inline_info.GetDexRegisterMapIndex(),
-                                num_dex_registers);
+                                                        size_t vregs ATTRIBUTE_UNUSED = 0) const {
+    if (stack_map.HasDexRegisterMap()) {
+      // The register counts are commutative and include all outer levels.
+      // This allows us to determine the range [first, last) in just two lookups.
+      // If we are at depth 0 (the first inlinee), the count from the main method is used.
+      uint32_t first = (depth == 0) ? number_of_dex_registers_
+          : GetInlineInfoAtDepth(stack_map, depth - 1).GetNumberOfDexRegisters();
+      uint32_t last = GetInlineInfoAtDepth(stack_map, depth).GetNumberOfDexRegisters();
+      DexRegisterMap map(last - first, DexRegisterLocation::Invalid());
+      DecodeDexRegisterMap(stack_map.Row(), first, &map);
+      return map;
+    }
+    return DexRegisterMap(0, DexRegisterLocation::None());
   }
 
   InlineInfo GetInlineInfo(size_t index) const {
@@ -421,8 +441,6 @@
         if (other.GetDexPc() == dex_pc &&
             other.GetNativePcOffset(kRuntimeISA) ==
                 stack_map.GetNativePcOffset(kRuntimeISA)) {
-          DCHECK_EQ(other.GetDexRegisterMapIndex(),
-                    stack_map.GetDexRegisterMapIndex());
           if (i < e - 2) {
             // Make sure there are not three identical stack maps following each other.
             DCHECK_NE(
@@ -469,23 +487,10 @@
             const MethodInfo& method_info) const;
 
  private:
-  ALWAYS_INLINE DexRegisterMap DecodeDexRegisterMap(uint32_t mask_index,
-                                                    uint32_t map_index,
-                                                    uint32_t num_dex_registers) const {
-    DexRegisterMap map(map_index == StackMap::kNoValue ? 0 : num_dex_registers);
-    if (mask_index != StackMap::kNoValue) {
-      BitMemoryRegion mask = dex_register_masks_.GetBitMemoryRegion(mask_index);
-      num_dex_registers = std::min<uint32_t>(num_dex_registers, mask.size_in_bits());
-      DexRegisterLocation* regs = map.data();
-      for (uint32_t r = 0; r < mask.size_in_bits(); r++) {
-        if (mask.LoadBit(r) /* is_live */) {
-          DCHECK_LT(r, map.size());
-          regs[r] = GetDexRegisterCatalogEntry(dex_register_maps_.Get(map_index++));
-        }
-      }
-    }
-    return map;
-  }
+  // Scan backward to determine dex register locations at given stack map.
+  void DecodeDexRegisterMap(uint32_t stack_map_index,
+                            uint32_t first_dex_register,
+                            /*out*/ DexRegisterMap* map) const;
 
   void Decode(const uint8_t* data) {
     size_t non_header_size = DecodeUnsignedLeb128(&data);
@@ -500,6 +505,7 @@
     dex_register_masks_.Decode(region, &bit_offset);
     dex_register_maps_.Decode(region, &bit_offset);
     dex_register_catalog_.Decode(region, &bit_offset);
+    number_of_dex_registers_ = DecodeVarintBits(region, &bit_offset);
     CHECK_EQ(non_header_size, BitsToBytesRoundUp(bit_offset)) << "Invalid CodeInfo";
   }
 
@@ -512,6 +518,7 @@
   BitTable<1> dex_register_masks_;
   BitTable<1> dex_register_maps_;
   BitTable<DexRegisterInfo::kCount> dex_register_catalog_;
+  uint32_t number_of_dex_registers_;  // Excludes any inlined methods.
 
   friend class OatDumper;
 };