Decode only the needed tables from CodeInfo.

Most use cases need only the first two bit tables from CodeInfo.
Add flag to the decode method so that only those two are loaded.
We only touched the table header but that still made difference.

This speeds up pmd by over 10%.

Test: test-art-host-gtest
Change-Id: I7740081bf18205dd69864503b5bcec7de5e1a901
diff --git a/compiler/debug/method_debug_info.h b/compiler/debug/method_debug_info.h
index d0b03ec..729c403 100644
--- a/compiler/debug/method_debug_info.h
+++ b/compiler/debug/method_debug_info.h
@@ -41,7 +41,7 @@
   uint64_t code_address;
   uint32_t code_size;
   uint32_t frame_size_in_bytes;
-  const void* code_info;
+  const uint8_t* code_info;
   ArrayRef<const uint8_t> cfi;
 };
 
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index d74d7b6..da6c711 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -310,9 +310,9 @@
   EncodeUnsignedLeb128(&out_, num_dex_registers_);
   BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&out_, out_.size() * kBitsPerByte);
   stack_maps_.Encode(out);
+  inline_infos_.Encode(out);
   register_masks_.Encode(out);
   stack_masks_.Encode(out);
-  inline_infos_.Encode(out);
   dex_register_masks_.Encode(out);
   dex_register_maps_.Encode(out);
   dex_register_catalog_.Encode(out);
diff --git a/libartbase/base/bit_table.h b/libartbase/base/bit_table.h
index ee47721..1c7614b 100644
--- a/libartbase/base/bit_table.h
+++ b/libartbase/base/bit_table.h
@@ -93,6 +93,7 @@
   }
 
   ALWAYS_INLINE uint32_t Get(uint32_t row, uint32_t column = 0) const {
+    DCHECK_NE(header_bit_size_, 0u) << "Table has not been loaded";
     DCHECK_LT(row, num_rows_);
     DCHECK_LT(column, kNumColumns);
     size_t offset = row * NumRowBits() + column_offset_[column];
@@ -100,6 +101,7 @@
   }
 
   ALWAYS_INLINE BitMemoryRegion GetBitMemoryRegion(uint32_t row, uint32_t column = 0) const {
+    DCHECK_NE(header_bit_size_, 0u) << "Table has not been loaded";
     DCHECK_LT(row, num_rows_);
     DCHECK_LT(column, kNumColumns);
     size_t offset = row * NumRowBits() + column_offset_[column];
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index 4521e63..271d37d 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -1310,7 +1310,7 @@
                     const CodeItemDataAccessor& code_item_accessor) {
     if (IsMethodGeneratedByOptimizingCompiler(oat_method, code_item_accessor)) {
       // The optimizing compiler outputs its CodeInfo data in the vmap table.
-      const void* raw_code_info = oat_method.GetVmapTable();
+      const uint8_t* raw_code_info = oat_method.GetVmapTable();
       if (raw_code_info != nullptr) {
         CodeInfo code_info(raw_code_info);
         DCHECK(code_item_accessor.HasCodeItem());
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 0c61965..a5ebce5 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -201,7 +201,7 @@
       DCHECK(current_code != nullptr);
       DCHECK(current_code->IsOptimized());
       uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
-      CodeInfo code_info(current_code);
+      CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
       MethodInfo method_info = current_code->GetOptimizedMethodInfo();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
       DCHECK(stack_map.IsValid());
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 9cae3ae..be4e4e6 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -343,7 +343,7 @@
     uintptr_t outer_pc_offset = current_code->NativeQuickPcOffset(outer_pc);
 
     if (current_code->IsOptimized()) {
-      CodeInfo code_info(current_code);
+      CodeInfo code_info(current_code, CodeInfo::DecodeFlags::InlineInfoOnly);
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(outer_pc_offset);
       DCHECK(stack_map.IsValid());
       BitTableRange<InlineInfo> inline_infos = code_info.GetInlineInfosOf(stack_map);
diff --git a/runtime/oat.h b/runtime/oat.h
index f8ec665..3fa5a63 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  // Last oat version changed reason: Remove InvokeInfo from stack maps.
-  static constexpr uint8_t kOatVersion[] = { '1', '5', '4', '\0' };
+  // Last oat version changed reason: Move InlineInfo bit table.
+  static constexpr uint8_t kOatVersion[] = { '1', '5', '5', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index 0b239c1..79c3ad6 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -44,7 +44,7 @@
     return dex::kDexNoIndex;
   } else {
     DCHECK(IsOptimized());
-    CodeInfo code_info(this);
+    CodeInfo code_info(this, CodeInfo::DecodeFlags::InlineInfoOnly);
     StackMap stack_map = code_info.GetStackMapForNativePcOffset(sought_offset);
     if (stack_map.IsValid()) {
       return stack_map.GetDexPc();
@@ -69,7 +69,7 @@
   DCHECK(!method->IsNative());
   DCHECK(IsOptimized());
   // Search for the dex-to-pc mapping in stack maps.
-  CodeInfo code_info(this);
+  CodeInfo code_info(this, CodeInfo::DecodeFlags::InlineInfoOnly);
 
   // All stack maps are stored in the same CodeItem section, safepoint stack
   // maps first, then catch stack maps. We use `is_for_catch_handler` to select
diff --git a/runtime/stack.cc b/runtime/stack.cc
index c4851e1..85b1ea0 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -795,7 +795,7 @@
             // JNI methods cannot have any inlined frames.
             && !method->IsNative()) {
           DCHECK_NE(cur_quick_frame_pc_, 0u);
-          CodeInfo code_info(cur_oat_quick_method_header_);
+          CodeInfo code_info(cur_oat_quick_method_header_, CodeInfo::DecodeFlags::InlineInfoOnly);
           uint32_t native_pc_offset =
               cur_oat_quick_method_header_->NativeQuickPcOffset(cur_quick_frame_pc_);
           StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset);
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index 9fa9d84..62b9f35 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -27,11 +27,11 @@
 
 namespace art {
 
-CodeInfo::CodeInfo(const OatQuickMethodHeader* header)
-  : CodeInfo(header->GetOptimizedCodeInfoPtr()) {
+CodeInfo::CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags)
+  : CodeInfo(header->GetOptimizedCodeInfoPtr(), flags) {
 }
 
-void CodeInfo::Decode(const uint8_t* data) {
+void CodeInfo::Decode(const uint8_t* data, DecodeFlags flags) {
   const uint8_t* begin = data;
   frame_size_in_bytes_ = DecodeUnsignedLeb128(&data);
   core_spill_mask_ = DecodeUnsignedLeb128(&data);
@@ -39,9 +39,12 @@
   number_of_dex_registers_ = DecodeUnsignedLeb128(&data);
   BitMemoryReader reader(data, /* bit_offset */ 0);
   stack_maps_.Decode(reader);
+  inline_infos_.Decode(reader);
+  if (flags & DecodeFlags::InlineInfoOnly) {
+    return;
+  }
   register_masks_.Decode(reader);
   stack_masks_.Decode(reader);
-  inline_infos_.Decode(reader);
   dex_register_masks_.Decode(reader);
   dex_register_maps_.Decode(reader);
   dex_register_catalog_.Decode(reader);
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 26b95b0..928f0f2 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -268,15 +268,22 @@
  */
 class CodeInfo {
  public:
-  explicit CodeInfo(const void* data) {
-    Decode(reinterpret_cast<const uint8_t*>(data));
+  enum DecodeFlags {
+    Default = 0,
+    // Limits the decoding only to the main stack map table and inline info table.
+    // This is sufficient for many use cases and makes the header decoding faster.
+    InlineInfoOnly = 1,
+  };
+
+  explicit CodeInfo(const uint8_t* data, DecodeFlags flags = DecodeFlags::Default) {
+    Decode(reinterpret_cast<const uint8_t*>(data), flags);
   }
 
   explicit CodeInfo(MemoryRegion region) : CodeInfo(region.begin()) {
     DCHECK_EQ(Size(), region.size());
   }
 
-  explicit CodeInfo(const OatQuickMethodHeader* header);
+  explicit CodeInfo(const OatQuickMethodHeader* header, DecodeFlags flags = DecodeFlags::Default);
 
   size_t Size() const {
     return BitsToBytesRoundUp(size_in_bits_);
@@ -421,20 +428,20 @@
                             uint32_t first_dex_register,
                             /*out*/ DexRegisterMap* map) const;
 
-  void Decode(const uint8_t* data);
+  void Decode(const uint8_t* data, DecodeFlags flags);
 
   uint32_t frame_size_in_bytes_;
   uint32_t core_spill_mask_;
   uint32_t fp_spill_mask_;
   uint32_t number_of_dex_registers_;
   BitTable<StackMap> stack_maps_;
+  BitTable<InlineInfo> inline_infos_;
   BitTable<RegisterMask> register_masks_;
   BitTable<MaskInfo> stack_masks_;
-  BitTable<InlineInfo> inline_infos_;
   BitTable<MaskInfo> dex_register_masks_;
   BitTable<DexRegisterMapInfo> dex_register_maps_;
   BitTable<DexRegisterInfo> dex_register_catalog_;
-  uint32_t size_in_bits_;
+  uint32_t size_in_bits_ = 0;
 };
 
 #undef ELEMENT_BYTE_OFFSET_AFTER