ART: Generate switch targets from successor blocks

This patch relies on the successor blocks to generate switch targets
in GenSmallPackedSwitch and GenSmallSparseSwitch for all quick targets.
In x86, we create a new packed switch table by storing basic block
ids instead of dex offsets, and we override MarkPackedCaseLabels and
InsertCaseLabel to avoid calling FindBlock.

Change-Id: Ibb5983db582f0965aba787b520bd106522453564
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index c00f90b..c1c0d32 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -2212,43 +2212,53 @@
 }
 
 void Mir2Lir::GenSmallPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  ArenaVector<SuccessorBlockInfo*>::const_iterator succ_bb_iter = bb->successor_blocks.cbegin();
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   const uint16_t entries = table[1];
   // Chained cmp-and-branch.
   const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
   int32_t starting_key = as_int32[0];
-  const int32_t* targets = &as_int32[1];
   rl_src = LoadValue(rl_src, kCoreReg);
   int i = 0;
-  for (; i < entries; i++) {
+  for (; i < entries; ++i, ++succ_bb_iter) {
     if (!InexpensiveConstantInt(starting_key + i, Instruction::Code::IF_EQ)) {
       // Switch to using a temp and add.
       break;
     }
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block->id]);
+    SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+    DCHECK(successor_block_info != nullptr);
+    int case_block_id = successor_block_info->block;
+    DCHECK_EQ(starting_key + i, successor_block_info->key);
+    OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
   }
   if (i < entries) {
     // The rest do not seem to be inexpensive. Try to allocate a temp and use add.
     RegStorage key_temp = AllocTypedTemp(false, kCoreReg, false);
     if (key_temp.Valid()) {
       LoadConstantNoClobber(key_temp, starting_key + i);
-      for (; i < entries - 1; i++) {
-        BasicBlock* case_block =
-            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+      for (; i < entries - 1; ++i, ++succ_bb_iter) {
+        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+        DCHECK(successor_block_info != nullptr);
+        int case_block_id = successor_block_info->block;
+        DCHECK_EQ(starting_key + i, successor_block_info->key);
+        OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
         OpRegImm(kOpAdd, key_temp, 1);  // Increment key.
       }
-      BasicBlock* case_block =
-          mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block->id]);
+      SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+      DCHECK(successor_block_info != nullptr);
+      int case_block_id = successor_block_info->block;
+      DCHECK_EQ(starting_key + i, successor_block_info->key);
+      OpCmpBranch(kCondEq, rl_src.reg, key_temp, &block_label_list_[case_block_id]);
     } else {
       // No free temp, just finish the old loop.
-      for (; i < entries; i++) {
-        BasicBlock* case_block =
-            mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-        OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block->id]);
+      for (; i < entries; ++i, ++succ_bb_iter) {
+        SuccessorBlockInfo* successor_block_info = *succ_bb_iter;
+        DCHECK(successor_block_info != nullptr);
+        int case_block_id = successor_block_info->block;
+        DCHECK_EQ(starting_key + i, successor_block_info->key);
+        OpCmpImmBranch(kCondEq, rl_src.reg, starting_key + i, &block_label_list_[case_block_id]);
       }
     }
   }
@@ -2257,7 +2267,7 @@
 void Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
+    DumpPackedSwitchTable(table);
   }
 
   const uint16_t entries = table[1];
@@ -2270,18 +2280,20 @@
 }
 
 void Mir2Lir::GenSmallSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
   const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
   const uint16_t entries = table[1];
   // Chained cmp-and-branch.
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
   rl_src = LoadValue(rl_src, kCoreReg);
-  for (int i = 0; i < entries; i++) {
-    int key = keys[i];
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]);
+  int i = 0;
+  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+    int case_block_id = successor_block_info->block;
+    int key = successor_block_info->key;
+    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block_id]);
+    i++;
   }
+  DCHECK_EQ(i, entries);
 }
 
 void Mir2Lir::GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index 886b238..b610dd9 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -680,7 +680,7 @@
     int AssignSwitchTablesOffset(CodeOffset offset);
     int AssignFillArrayDataOffset(CodeOffset offset);
     virtual LIR* InsertCaseLabel(DexOffset vaddr, int keyVal);
-    void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
+    virtual void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec);
     void MarkSparseCaseLabels(Mir2Lir::SwitchTable* tab_rec);
 
     // Handle bookkeeping to convert a wide RegLocation to a narrow RegLocation.  No code generated.
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index a808459..be10d93 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -30,23 +30,88 @@
  * pairs.
  */
 void X86Mir2Lir::GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
+  GenSmallSparseSwitch(mir, table_offset, rl_src);
+}
+
+/*
+ * We override InsertCaseLabel, because the first parameter represents
+ * a basic block id, instead of a dex offset.
+ */
+LIR* X86Mir2Lir::InsertCaseLabel(DexOffset bbid, int keyVal) {
+  LIR* boundary_lir = &block_label_list_[bbid];
+  LIR* res = boundary_lir;
   if (cu_->verbose) {
-    DumpSparseSwitchTable(table);
+    // Only pay the expense if we're pretty-printing.
+    LIR* new_label = static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR));
+    BasicBlock* bb = mir_graph_->GetBasicBlock(bbid);
+    DCHECK(bb != nullptr);
+    new_label->dalvik_offset = bb->start_offset;;
+    new_label->opcode = kPseudoCaseLabel;
+    new_label->operands[0] = keyVal;
+    new_label->flags.fixup = kFixupLabel;
+    DCHECK(!new_label->flags.use_def_invalid);
+    new_label->u.m.def_mask = &kEncodeAll;
+    InsertLIRAfter(boundary_lir, new_label);
+    res = new_label;
   }
+  return res;
+}
+
+void X86Mir2Lir::MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) {
+  const uint16_t* table = tab_rec->table;
+  const int32_t *targets = reinterpret_cast<const int32_t*>(&table[4]);
   int entries = table[1];
-  const int32_t* keys = reinterpret_cast<const int32_t*>(&table[2]);
-  const int32_t* targets = &keys[entries];
-  rl_src = LoadValue(rl_src, kCoreReg);
+  int low_key = s4FromSwitchData(&table[2]);
   for (int i = 0; i < entries; i++) {
-    int key = keys[i];
-    BasicBlock* case_block =
-        mir_graph_->FindBlock(current_dalvik_offset_ + targets[i]);
-    OpCmpImmBranch(kCondEq, rl_src.reg, key, &block_label_list_[case_block->id]);
+    // The value at targets[i] is a basic block id, instead of a dex offset.
+    tab_rec->targets[i] = InsertCaseLabel(targets[i], i + low_key);
   }
 }
 
 /*
+ * We convert and create a new packed switch table that stores
+ * basic block ids to targets[] by examining successor blocks.
+ * Note that the original packed switch table stores dex offsets to targets[].
+ */
+const uint16_t* X86Mir2Lir::ConvertPackedSwitchTable(MIR* mir, const uint16_t* table) {
+  /*
+   * The original packed switch data format:
+   *  ushort ident = 0x0100  magic value
+   *  ushort size            number of entries in the table
+   *  int first_key          first (and lowest) switch case value
+   *  int targets[size]      branch targets, relative to switch opcode
+   *
+   * Total size is (4+size*2) 16-bit code units.
+   *
+   * Note that the new packed switch data format is the same as the original
+   * format, except that targets[] are basic block ids.
+   *
+   */
+  BasicBlock* bb = mir_graph_->GetBasicBlock(mir->bb);
+  DCHECK(bb != nullptr);
+  // Get the number of entries.
+  int entries = table[1];
+  const int32_t* as_int32 = reinterpret_cast<const int32_t*>(&table[2]);
+  int32_t starting_key = as_int32[0];
+  // Create a new table.
+  int size = sizeof(uint16_t) * (4 + entries * 2);
+  uint16_t* new_table = reinterpret_cast<uint16_t*>(arena_->Alloc(size, kArenaAllocMisc));
+  // Copy ident, size, and first_key to the new table.
+  memcpy(new_table, table, sizeof(uint16_t) * 4);
+  // Get the new targets.
+  int32_t* new_targets = reinterpret_cast<int32_t*>(&new_table[4]);
+  // Find out targets for each entry.
+  int i = 0;
+  for (SuccessorBlockInfo* successor_block_info : bb->successor_blocks) {
+    DCHECK_EQ(starting_key + i, successor_block_info->key);
+    // Save target basic block id.
+    new_targets[i++] = successor_block_info->block;
+  }
+  DCHECK_EQ(i, entries);
+  return new_table;
+}
+
+/*
  * Code pattern will look something like:
  *
  * mov  r_val, ..
@@ -63,10 +128,8 @@
  * done:
  */
 void X86Mir2Lir::GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) {
-  const uint16_t* table = mir_graph_->GetTable(mir, table_offset);
-  if (cu_->verbose) {
-    DumpPackedSwitchTable(table);
-  }
+  const uint16_t* old_table = mir_graph_->GetTable(mir, table_offset);
+  const uint16_t* table = ConvertPackedSwitchTable(mir, old_table);
   // Add the table to the list - we'll process it later
   SwitchTable* tab_rec =
       static_cast<SwitchTable*>(arena_->Alloc(sizeof(SwitchTable), kArenaAllocData));
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 26641f8..09e1482 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -264,8 +264,11 @@
                                      int first_bit, int second_bit) OVERRIDE;
   void GenNegDouble(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
   void GenNegFloat(RegLocation rl_dest, RegLocation rl_src) OVERRIDE;
+  const uint16_t* ConvertPackedSwitchTable(MIR* mir, const uint16_t* table);
   void GenLargePackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
   void GenLargeSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src) OVERRIDE;
+  LIR* InsertCaseLabel(DexOffset vaddr, int keyVal) OVERRIDE;
+  void MarkPackedCaseLabels(Mir2Lir::SwitchTable* tab_rec) OVERRIDE;
 
   /**
    * @brief Implement instanceof a final class with x86 specific code.