Add Kind column to stack maps.

Add 'Kind' column to stack maps which marks special stack map types,
and use it at run-time to add extra sanity checks.

It will also allow us to binary search the stack maps.

The column increases .oat file by 0.2%.

Test: test-art-host-gtest-stack_map_test
Change-Id: I2a9143afa0e32bb06174604ca81a64c41fed232f
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index b3feb78..45a81cf 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1055,7 +1055,8 @@
 
 void CodeGenerator::RecordPcInfo(HInstruction* instruction,
                                  uint32_t dex_pc,
-                                 SlowPathCode* slow_path) {
+                                 SlowPathCode* slow_path,
+                                 bool native_debug_info) {
   if (instruction != nullptr) {
     // The code generated for some type conversions
     // may call the runtime, thus normally requiring a subsequent
@@ -1120,12 +1121,23 @@
     outer_dex_pc = outer_environment->GetDexPc();
     outer_environment_size = outer_environment->Size();
   }
+
+  HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+  bool osr =
+      instruction->IsSuspendCheck() &&
+      (info != nullptr) &&
+      graph_->IsCompilingOsr() &&
+      (inlining_depth == 0);
+  StackMap::Kind kind = native_debug_info
+      ? StackMap::Kind::Debug
+      : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default);
   stack_map_stream->BeginStackMapEntry(outer_dex_pc,
                                        native_pc,
                                        register_mask,
                                        locations->GetStackMask(),
                                        outer_environment_size,
-                                       inlining_depth);
+                                       inlining_depth,
+                                       kind);
   EmitEnvironment(environment, slow_path);
   // Record invoke info, the common case for the trampoline is super and static invokes. Only
   // record these to reduce oat file size.
@@ -1138,19 +1150,9 @@
   }
   stack_map_stream->EndStackMapEntry();
 
-  HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
-  if (instruction->IsSuspendCheck() &&
-      (info != nullptr) &&
-      graph_->IsCompilingOsr() &&
-      (inlining_depth == 0)) {
+  if (osr) {
     DCHECK_EQ(info->GetSuspendCheck(), instruction);
-    // We duplicate the stack map as a marker that this stack map can be an OSR entry.
-    // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
     DCHECK(info->IsIrreducible());
-    stack_map_stream->BeginStackMapEntry(
-        dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
-    EmitEnvironment(instruction->GetEnvironment(), slow_path);
-    stack_map_stream->EndStackMapEntry();
     if (kIsDebugBuild) {
       for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
         HInstruction* in_environment = environment->GetInstructionAt(i);
@@ -1167,14 +1169,6 @@
         }
       }
     }
-  } else if (kIsDebugBuild) {
-    // Ensure stack maps are unique, by checking that the native pc in the stack map
-    // last emitted is different than the native pc of the stack map just emitted.
-    size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps();
-    if (number_of_stack_maps > 1) {
-      DCHECK_NE(stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 1),
-                stack_map_stream->GetStackMapNativePcOffset(number_of_stack_maps - 2));
-    }
   }
 }
 
@@ -1196,12 +1190,11 @@
       // Ensure that we do not collide with the stack map of the previous instruction.
       GenerateNop();
     }
-    RecordPcInfo(instruction, dex_pc, slow_path);
+    RecordPcInfo(instruction, dex_pc, slow_path, /* native_debug_info */ true);
   }
 }
 
 void CodeGenerator::RecordCatchBlockInfo() {
-  ArenaAllocator* allocator = graph_->GetAllocator();
   StackMapStream* stack_map_stream = GetStackMapStream();
 
   for (HBasicBlock* block : *block_order_) {
@@ -1213,28 +1206,24 @@
     uint32_t num_vregs = graph_->GetNumberOfVRegs();
     uint32_t inlining_depth = 0;  // Inlining of catch blocks is not supported at the moment.
     uint32_t native_pc = GetAddressOf(block);
-    uint32_t register_mask = 0;   // Not used.
-
-    // The stack mask is not used, so we leave it empty.
-    ArenaBitVector* stack_mask =
-        ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator);
 
     stack_map_stream->BeginStackMapEntry(dex_pc,
                                          native_pc,
-                                         register_mask,
-                                         stack_mask,
+                                         /* register_mask */ 0,
+                                         /* stack_mask */ nullptr,
                                          num_vregs,
-                                         inlining_depth);
+                                         inlining_depth,
+                                         StackMap::Kind::Catch);
 
     HInstruction* current_phi = block->GetFirstPhi();
     for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
-    while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
-      HInstruction* next_phi = current_phi->GetNext();
-      DCHECK(next_phi == nullptr ||
-             current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
-          << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
-      current_phi = next_phi;
-    }
+      while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+        HInstruction* next_phi = current_phi->GetNext();
+        DCHECK(next_phi == nullptr ||
+               current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+            << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+        current_phi = next_phi;
+      }
 
       if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
         stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b3c29aa..03ae498 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -323,7 +323,10 @@
   }
 
   // Record native to dex mapping for a suspend point.  Required by runtime.
-  void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+  void RecordPcInfo(HInstruction* instruction,
+                    uint32_t dex_pc,
+                    SlowPathCode* slow_path = nullptr,
+                    bool native_debug_info = false);
   // Check whether we have already recorded mapping at this PC.
   bool HasStackMapAtCurrentPc();
   // Record extra stack maps if we support native debugging.
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 094b75d..cd11549 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -43,7 +43,8 @@
                                         uint32_t register_mask,
                                         BitVector* stack_mask,
                                         uint32_t num_dex_registers,
-                                        uint8_t inlining_depth) {
+                                        uint8_t inlining_depth,
+                                        StackMap::Kind kind) {
   DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
   in_stack_map_ = true;
   // num_dex_registers_ is the constant per-method number of registers.
@@ -55,6 +56,7 @@
   }
 
   current_stack_map_ = StackMapEntry {
+    .kind = static_cast<uint32_t>(kind),
     .packed_native_pc = StackMap::PackNativePc(native_pc_offset, instruction_set_),
     .dex_pc = dex_pc,
     .register_mask_index = kNoValue,
@@ -81,8 +83,17 @@
     // Create lambda method, which will be executed at the very end to verify data.
     // Parameters and local variables will be captured(stored) by the lambda "[=]".
     dchecks_.emplace_back([=](const CodeInfo& code_info) {
+      if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
+        StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset,
+                                                                    instruction_set_);
+        CHECK_EQ(stack_map.Row(), stack_map_index);
+      } else if (kind == StackMap::Kind::Catch) {
+        StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc);
+        CHECK_EQ(stack_map.Row(), stack_map_index);
+      }
       StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
       CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset);
+      CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind));
       CHECK_EQ(stack_map.GetDexPc(), dex_pc);
       CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask);
       BitMemoryRegion seen_stack_mask = code_info.GetStackMaskOf(stack_map);
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 02fb6cb..0686847 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -27,11 +27,10 @@
 #include "dex_register_location.h"
 #include "method_info.h"
 #include "nodes.h"
+#include "stack_map.h"
 
 namespace art {
 
-class CodeInfo;
-
 /**
  * Collects and builds stack maps for a method. All the stack maps
  * for a method are placed in a CodeInfo object.
@@ -66,7 +65,8 @@
                           uint32_t register_mask,
                           BitVector* sp_mask,
                           uint32_t num_dex_registers,
-                          uint8_t inlining_depth);
+                          uint8_t inlining_depth,
+                          StackMap::Kind kind = StackMap::Kind::Default);
   void EndStackMapEntry();
 
   void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value);
@@ -99,6 +99,7 @@
 
   // The fields must be uint32_t and mirror the StackMap accessor in stack_map.h!
   struct StackMapEntry {
+    uint32_t kind;
     uint32_t packed_native_pc;
     uint32_t dex_pc;
     uint32_t register_mask_index;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 9adc4c5..2594faf 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -425,12 +425,12 @@
   stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
   stream.EndStackMapEntry();
   // Second stack map, which should share the same dex register map.
-  stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 65 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(Kind::kInRegister, 0);  // Short location.
   stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
   stream.EndStackMapEntry();
   // Third stack map (doesn't share the dex register map).
-  stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0);
+  stream.BeginStackMapEntry(0, 66 * kPcAlign, 0x3, &sp_mask, number_of_dex_registers, 0);
   stream.AddDexRegisterEntry(Kind::kInRegister, 2);  // Short location.
   stream.AddDexRegisterEntry(Kind::kConstant, -2);   // Large location.
   stream.EndStackMapEntry();
@@ -528,7 +528,7 @@
   sp_mask1.SetBit(4);
 
   // First stack map.
-  stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1, 2, 2);
+  stream.BeginStackMapEntry(0, 10 * kPcAlign, 0x3, &sp_mask1, 2, 2);
   stream.AddDexRegisterEntry(Kind::kInStack, 0);
   stream.AddDexRegisterEntry(Kind::kConstant, 4);
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 40f4edd..22c6a39 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: compiler support invoke-custom
-  static constexpr uint8_t kOatVersion[] = { '1', '4', '8', '\0' };
+  // Last oat version changed reason: Add Kind column to stack maps.
+  static constexpr uint8_t kOatVersion[] = { '1', '4', '9', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index ea358c6..faa196f 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -157,16 +157,23 @@
  * - Knowing the inlining information,
  * - Knowing the values of dex registers.
  */
-class StackMap : public BitTable<7>::Accessor {
+class StackMap : public BitTable<8>::Accessor {
  public:
+  enum Kind {
+    Default = -1,
+    Catch = 0,
+    OSR = 1,
+    Debug = 2,
+  };
   BIT_TABLE_HEADER()
-  BIT_TABLE_COLUMN(0, PackedNativePc)
-  BIT_TABLE_COLUMN(1, DexPc)
-  BIT_TABLE_COLUMN(2, RegisterMaskIndex)
-  BIT_TABLE_COLUMN(3, StackMaskIndex)
-  BIT_TABLE_COLUMN(4, InlineInfoIndex)
-  BIT_TABLE_COLUMN(5, DexRegisterMaskIndex)
-  BIT_TABLE_COLUMN(6, DexRegisterMapIndex)
+  BIT_TABLE_COLUMN(0, Kind)
+  BIT_TABLE_COLUMN(1, PackedNativePc)
+  BIT_TABLE_COLUMN(2, DexPc)
+  BIT_TABLE_COLUMN(3, RegisterMaskIndex)
+  BIT_TABLE_COLUMN(4, StackMaskIndex)
+  BIT_TABLE_COLUMN(5, InlineInfoIndex)
+  BIT_TABLE_COLUMN(6, DexRegisterMaskIndex)
+  BIT_TABLE_COLUMN(7, DexRegisterMapIndex)
 
   ALWAYS_INLINE uint32_t GetNativePcOffset(InstructionSet instruction_set) const {
     return UnpackNativePc(Get<kPackedNativePc>(), instruction_set);
@@ -415,19 +422,18 @@
   StackMap GetStackMapForDexPc(uint32_t dex_pc) const {
     for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i);
-      if (stack_map.GetDexPc() == dex_pc) {
+      if (stack_map.GetDexPc() == dex_pc && stack_map.GetKind() != StackMap::Kind::Debug) {
         return stack_map;
       }
     }
     return StackMap();
   }
 
-  // Searches the stack map list backwards because catch stack maps are stored
-  // at the end.
+  // Searches the stack map list backwards because catch stack maps are stored at the end.
   StackMap GetCatchStackMapForDexPc(uint32_t dex_pc) const {
     for (size_t i = GetNumberOfStackMaps(); i > 0; --i) {
       StackMap stack_map = GetStackMapAt(i - 1);
-      if (stack_map.GetDexPc() == dex_pc) {
+      if (stack_map.GetDexPc() == dex_pc && stack_map.GetKind() == StackMap::Kind::Catch) {
         return stack_map;
       }
     }
@@ -435,41 +441,26 @@
   }
 
   StackMap GetOsrStackMapForDexPc(uint32_t dex_pc) const {
-    size_t e = GetNumberOfStackMaps();
-    if (e == 0) {
-      // There cannot be OSR stack map if there is no stack map.
-      return StackMap();
-    }
-    // Walk over all stack maps. If two consecutive stack maps are identical, then we
-    // have found a stack map suitable for OSR.
-    for (size_t i = 0; i < e - 1; ++i) {
+    for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i);
-      if (stack_map.GetDexPc() == dex_pc) {
-        StackMap other = GetStackMapAt(i + 1);
-        if (other.GetDexPc() == dex_pc &&
-            other.GetNativePcOffset(kRuntimeISA) ==
-                stack_map.GetNativePcOffset(kRuntimeISA)) {
-          if (i < e - 2) {
-            // Make sure there are not three identical stack maps following each other.
-            DCHECK_NE(
-                stack_map.GetNativePcOffset(kRuntimeISA),
-                GetStackMapAt(i + 2).GetNativePcOffset(kRuntimeISA));
-          }
-          return stack_map;
-        }
+      if (stack_map.GetDexPc() == dex_pc && stack_map.GetKind() == StackMap::Kind::OSR) {
+        return stack_map;
       }
     }
     return StackMap();
   }
 
-  StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset) const {
+  StackMap GetStackMapForNativePcOffset(uint32_t pc, InstructionSet isa = kRuntimeISA) const {
     // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
     //       maps are not. If we knew that the method does not have try/catch,
     //       we could do binary search.
     for (size_t i = 0, e = GetNumberOfStackMaps(); i < e; ++i) {
       StackMap stack_map = GetStackMapAt(i);
-      if (stack_map.GetNativePcOffset(kRuntimeISA) == native_pc_offset) {
-        return stack_map;
+      if (stack_map.GetNativePcOffset(isa) == pc) {
+        StackMap::Kind kind = static_cast<StackMap::Kind>(stack_map.GetKind());
+        if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
+          return stack_map;
+        }
       }
     }
     return StackMap();