X86_64 jump tables for PackedSwitch

Implement PackedSwitch using a jump table of offsets to blocks.

Bug: 24092914
Bug: 21119474
Change-Id: I83430086c03ef728d30d79b4022607e9245ef98f
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 21120a0..f0d9420 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -670,7 +670,8 @@
         constant_area_start_(0),
         method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -5322,31 +5323,43 @@
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
   int32_t lower_bound = switch_instr->GetStartValue();
   int32_t num_entries = switch_instr->GetNumEntries();
   LocationSummary* locations = switch_instr->GetLocations();
-  CpuRegister value_reg = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
+  CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
+  CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
+
+  // Remove the bias, if needed.
+  Register value_reg_out = value_reg_in.AsRegister();
+  if (lower_bound != 0) {
+    __ leal(temp_reg, Address(value_reg_in, -lower_bound));
+    value_reg_out = temp_reg.AsRegister();
+  }
+  CpuRegister value_reg(value_reg_out);
+
+  // Is the value in range?
   HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+  __ cmpl(value_reg, Immediate(num_entries - 1));
+  __ j(kAbove, codegen_->GetLabelOf(default_block));
 
-  // Create a series of compare/jumps.
-  const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
-  for (int i = 0; i < num_entries; i++) {
-    int32_t case_value = lower_bound + i;
-    if (case_value == 0) {
-      __ testl(value_reg, value_reg);
-    } else {
-      __ cmpl(value_reg, Immediate(case_value));
-    }
-    __ j(kEqual, codegen_->GetLabelOf(successors[i]));
-  }
+  // We are in the range of the table.
+  // Load the address of the jump table in the constant area.
+  __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
 
-  // And the default for any other value.
-  if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
-      __ jmp(codegen_->GetLabelOf(default_block));
-  }
+  // Load the (signed) offset from the jump table.
+  __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
+
+  // Add the offset to the address of the table base.
+  __ addq(temp_reg, base_reg);
+
+  // And jump.
+  __ jmp(temp_reg);
 }
 
 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
@@ -5372,15 +5385,85 @@
   }
 }
 
+/**
+ * Class to handle late fixup of offsets into constant area.
+ */
+class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
+ public:
+  RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
+      : codegen_(&codegen), offset_into_constant_area_(offset) {}
+
+ protected:
+  void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
+
+  CodeGeneratorX86_64* codegen_;
+
+ private:
+  void Process(const MemoryRegion& region, int pos) OVERRIDE {
+    // Patch the correct offset for the instruction.  We use the address of the
+    // 'next' instruction, which is 'pos' (patch the 4 bytes before).
+    int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
+    int32_t relative_position = constant_offset - pos;
+
+    // Patch in the right value.
+    region.StoreUnaligned<int32_t>(pos - 4, relative_position);
+  }
+
+  // Location in constant area that the fixup refers to.
+  size_t offset_into_constant_area_;
+};
+
+/**
+ t * Class to handle late fixup of offsets to a jump table that will be created in the
+ * constant area.
+ */
+class JumpTableRIPFixup : public RIPFixup {
+ public:
+  JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
+      : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
+
+  void CreateJumpTable() {
+    X86_64Assembler* assembler = codegen_->GetAssembler();
+
+    // Ensure that the reference to the jump table has the correct offset.
+    const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
+    SetOffset(offset_in_constant_table);
+
+    // Compute the offset from the start of the function to this jump table.
+    const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
+
+    // Populate the jump table with the correct values for the jump table.
+    int32_t num_entries = switch_instr_->GetNumEntries();
+    HBasicBlock* block = switch_instr_->GetBlock();
+    const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
+    // The value that we want is the target offset - the position of the table.
+    for (int32_t i = 0; i < num_entries; i++) {
+      HBasicBlock* b = successors[i];
+      Label* l = codegen_->GetLabelOf(b);
+      DCHECK(l->IsBound());
+      int32_t offset_to_block = l->Position() - current_table_offset;
+      assembler->AppendInt32(offset_to_block);
+    }
+  }
+
+ private:
+  const HPackedSwitch* switch_instr_;
+};
+
 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
   // Generate the constant area if needed.
   X86_64Assembler* assembler = GetAssembler();
-  if (!assembler->IsConstantAreaEmpty()) {
-    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8
-    // byte values.  If used for vectors at a later time, this will need to be
-    // updated to 16 bytes with the appropriate offset.
+  if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
+    // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
     assembler->Align(4, 0);
     constant_area_start_ = assembler->CodeSize();
+
+    // Populate any jump tables.
+    for (auto jump_table : fixups_to_jump_tables_) {
+      jump_table->CreateJumpTable();
+    }
+
+    // And now add the constant area to the generated code.
     assembler->AddConstantArea();
   }
 
@@ -5388,31 +5471,6 @@
   CodeGenerator::Finalize(allocator);
 }
 
-/**
- * Class to handle late fixup of offsets into constant area.
- */
-class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
-  public:
-    RIPFixup(const CodeGeneratorX86_64& codegen, int offset)
-      : codegen_(codegen), offset_into_constant_area_(offset) {}
-
-  private:
-    void Process(const MemoryRegion& region, int pos) OVERRIDE {
-      // Patch the correct offset for the instruction.  We use the address of the
-      // 'next' instruction, which is 'pos' (patch the 4 bytes before).
-      int constant_offset = codegen_.ConstantAreaStart() + offset_into_constant_area_;
-      int relative_position = constant_offset - pos;
-
-      // Patch in the right value.
-      region.StoreUnaligned<int32_t>(pos - 4, relative_position);
-    }
-
-    const CodeGeneratorX86_64& codegen_;
-
-    // Location in constant area that the fixup refers to.
-    int offset_into_constant_area_;
-};
-
 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
   AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
   return Address::RIP(fixup);
@@ -5453,6 +5511,16 @@
   GetMoveResolver()->EmitNativeCode(&parallel_move);
 }
 
+Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
+  // Create a fixup to be used to create and address the jump table.
+  JumpTableRIPFixup* table_fixup =
+      new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
+
+  // We have to populate the jump tables.
+  fixups_to_jump_tables_.push_back(table_fixup);
+  return Address::RIP(table_fixup);
+}
+
 #undef __
 
 }  // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d6a6a7e..dc86a48 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -234,6 +234,9 @@
   DISALLOW_COPY_AND_ASSIGN(InstructionCodeGeneratorX86_64);
 };
 
+// Class for fixups to jump tables.
+class JumpTableRIPFixup;
+
 class CodeGeneratorX86_64 : public CodeGenerator {
  public:
   CodeGeneratorX86_64(HGraph* graph,
@@ -354,6 +357,7 @@
 
   // Load a 64 bit value into a register in the most efficient manner.
   void Load64BitValue(CpuRegister dest, int64_t value);
+  Address LiteralCaseTable(HPackedSwitch* switch_instr);
 
   // Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
   void Store64BitValueToStack(Location dest, int64_t value);
@@ -391,6 +395,9 @@
   // We will fix this up in the linker later to have the right value.
   static constexpr int32_t kDummy32BitOffset = 256;
 
+  // Fixups for jump tables need to be handled specially.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 6e7d74d..9eb5e67 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -3122,7 +3122,14 @@
   }
 }
 
-int ConstantArea::AddInt32(int32_t v) {
+size_t ConstantArea::AppendInt32(int32_t v) {
+  size_t result = buffer_.size() * elem_size_;
+  buffer_.push_back(v);
+  return result;
+}
+
+size_t ConstantArea::AddInt32(int32_t v) {
+  // Look for an existing match.
   for (size_t i = 0, e = buffer_.size(); i < e; i++) {
     if (v == buffer_[i]) {
       return i * elem_size_;
@@ -3130,12 +3137,10 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
-  buffer_.push_back(v);
-  return result;
+  return AppendInt32(v);
 }
 
-int ConstantArea::AddInt64(int64_t v) {
+size_t ConstantArea::AddInt64(int64_t v) {
   int32_t v_low = v;
   int32_t v_high = v >> 32;
   if (buffer_.size() > 1) {
@@ -3148,18 +3153,18 @@
   }
 
   // Didn't match anything.
-  int result = buffer_.size() * elem_size_;
+  size_t result = buffer_.size() * elem_size_;
   buffer_.push_back(v_low);
   buffer_.push_back(v_high);
   return result;
 }
 
-int ConstantArea::AddDouble(double v) {
+size_t ConstantArea::AddDouble(double v) {
   // Treat the value as a 64-bit integer value.
   return AddInt64(bit_cast<int64_t, double>(v));
 }
 
-int ConstantArea::AddFloat(float v) {
+size_t ConstantArea::AddFloat(float v) {
   // Treat the value as a 32-bit integer value.
   return AddInt32(bit_cast<int32_t, float>(v));
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 255f551..01d28e3 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -269,36 +269,40 @@
  * Class to handle constant area values.
  */
 class ConstantArea {
-  public:
-    ConstantArea() {}
+ public:
+  ConstantArea() {}
 
-    // Add a double to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddDouble(double v);
+  // Add a double to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddDouble(double v);
 
-    // Add a float to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddFloat(float v);
+  // Add a float to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddFloat(float v);
 
-    // Add an int32_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt32(int32_t v);
+  // Add an int32_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt32(int32_t v);
 
-    // Add an int64_t to the constant area, returning the offset into
-    // the constant area where the literal resides.
-    int AddInt64(int64_t v);
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v);
 
-    int GetSize() const {
-      return buffer_.size() * elem_size_;
-    }
+  // Add an int64_t to the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AddInt64(int64_t v);
 
-    const std::vector<int32_t>& GetBuffer() const {
-      return buffer_;
-    }
+  size_t GetSize() const {
+    return buffer_.size() * elem_size_;
+  }
 
-  private:
-    static constexpr size_t elem_size_ = sizeof(int32_t);
-    std::vector<int32_t> buffer_;
+  const std::vector<int32_t>& GetBuffer() const {
+    return buffer_;
+  }
+
+ private:
+  static constexpr size_t elem_size_ = sizeof(int32_t);
+  std::vector<int32_t> buffer_;
 };
 
 
@@ -806,19 +810,27 @@
 
   // Add a double to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddDouble(double v) { return constant_area_.AddDouble(v); }
+  size_t AddDouble(double v) { return constant_area_.AddDouble(v); }
 
   // Add a float to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddFloat(float v)   { return constant_area_.AddFloat(v); }
+  size_t AddFloat(float v)   { return constant_area_.AddFloat(v); }
 
   // Add an int32_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt32(int32_t v) { return constant_area_.AddInt32(v); }
+  size_t AddInt32(int32_t v) {
+    return constant_area_.AddInt32(v);
+  }
+
+  // Add an int32_t to the end of the constant area, returning the offset into
+  // the constant area where the literal resides.
+  size_t AppendInt32(int32_t v) {
+    return constant_area_.AppendInt32(v);
+  }
 
   // Add an int64_t to the constant area, returning the offset into
   // the constant area where the literal resides.
-  int AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
+  size_t AddInt64(int64_t v) { return constant_area_.AddInt64(v); }
 
   // Add the contents of the constant area to the assembler buffer.
   void AddConstantArea();
@@ -826,6 +838,9 @@
   // Is the constant area empty? Return true if there are no literals in the constant area.
   bool IsConstantAreaEmpty() const { return constant_area_.GetSize() == 0; }
 
+  // Return the current size of the constant area.
+  size_t ConstantAreaSize() const { return constant_area_.GetSize(); }
+
   //
   // Heap poisoning.
   //