X86_64 QBE: use RIP addressing

Take advantage of RIP addressing in 64 bit mode to improve the code
generation for accesses to the constant area as well as packed switches.
Avoid computing the address of the start of the method, which is needed
in 32 bit mode.

To do this, we add a new 'pseudo-register' kRIPReg to minimize the
changes needed to get the new addressing mode to be generated.

Change-Id: Ia28c93f98b09939806d91ff0bd7392e58996d108
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index 3b3170e..a3fe8ad 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -606,7 +606,7 @@
 };
 std::ostream& operator<<(std::ostream& os, const SelectInstructionKind& kind);
 
-// LIR fixup kinds for Arm
+// LIR fixup kinds for Arm and X86.
 enum FixupKind {
   kFixupNone,
   kFixupLabel,             // For labels we just adjust the offset.
@@ -624,6 +624,7 @@
   kFixupMovImmHST,         // kThumb2MovImm16HST.
   kFixupAlign4,            // Align to 4-byte boundary.
   kFixupA53Erratum835769,  // Cortex A53 Erratum 835769.
+  kFixupSwitchTable,       // X86_64 packed switch table.
 };
 std::ostream& operator<<(std::ostream& os, const FixupKind& kind);
 
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 0021754..066041c 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -538,9 +538,12 @@
         bx_offset = tab_rec->anchor->offset + 4;
         break;
       case kX86:
-      case kX86_64:
         bx_offset = 0;
         break;
+      case kX86_64:
+        // RIP relative to switch table.
+        bx_offset = tab_rec->offset;
+        break;
       case kArm64:
       case kMips:
         bx_offset = tab_rec->anchor->offset;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 84d68d2..ad2ed01 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -553,7 +553,7 @@
 }
 
 static bool NeedsRex(int32_t raw_reg) {
-  return RegStorage::RegNum(raw_reg) > 7;
+  return raw_reg != kRIPReg && RegStorage::RegNum(raw_reg) > 7;
 }
 
 static uint8_t LowRegisterBits(int32_t raw_reg) {
@@ -689,7 +689,13 @@
           entry->opcode != kX86Lea32RM && entry->opcode != kX86Lea64RM) {
         DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), UINT64_C(0)) << entry->name;
       }
-      size += IS_SIMM8(displacement) ? 1 : 4;
+      if (raw_base == kRIPReg) {
+        DCHECK(cu_->target64) <<
+          "Attempt to use a 64-bit RIP adressing with instruction " << entry->name;
+        size += 4;
+      } else {
+        size += IS_SIMM8(displacement) ? 1 : 4;
+      }
     }
   }
   size += entry->skeleton.immediate_bytes;
@@ -1022,14 +1028,24 @@
 
 void X86Mir2Lir::EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp) {
   DCHECK_LT(reg_or_opcode, 8);
-  DCHECK_LT(base, 8);
-  uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
-  code_buffer_.push_back(modrm);
-  if (base == rs_rX86_SP_32.GetRegNum()) {
-    // Special SIB for SP base
-    code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum());
+  if (base == kRIPReg) {
+    // x86_64 RIP handling: always 32 bit displacement.
+    uint8_t modrm = (0x0 << 6) | (reg_or_opcode << 3) | 0x5;
+    code_buffer_.push_back(modrm);
+    code_buffer_.push_back(disp & 0xFF);
+    code_buffer_.push_back((disp >> 8) & 0xFF);
+    code_buffer_.push_back((disp >> 16) & 0xFF);
+    code_buffer_.push_back((disp >> 24) & 0xFF);
+  } else {
+    DCHECK_LT(base, 8);
+    uint8_t modrm = (ModrmForDisp(base, disp) << 6) | (reg_or_opcode << 3) | base;
+    code_buffer_.push_back(modrm);
+    if (base == rs_rX86_SP_32.GetRegNum()) {
+      // Special SIB for SP base
+      code_buffer_.push_back(0 << 6 | rs_rX86_SP_32.GetRegNum() << 3 | rs_rX86_SP_32.GetRegNum());
+    }
+    EmitDisp(base, disp);
   }
-  EmitDisp(base, disp);
 }
 
 void X86Mir2Lir::EmitModrmSibDisp(uint8_t reg_or_opcode, uint8_t base, uint8_t index,
@@ -1141,7 +1157,7 @@
   CheckValidByteRegister(entry, raw_reg);
   EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base);
   uint8_t low_reg = LowRegisterBits(raw_reg);
-  uint8_t low_base = LowRegisterBits(raw_base);
+  uint8_t low_base = (raw_base == kRIPReg) ? raw_base : LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1758,12 +1774,29 @@
             LIR *target_lir = lir->target;
             DCHECK(target_lir != NULL);
             CodeOffset target = target_lir->offset;
-            lir->operands[2] = target;
-            int newSize = GetInsnSize(lir);
-            if (newSize != lir->flags.size) {
-              lir->flags.size = newSize;
-              res = kRetryAll;
+            // Handle 64 bit RIP addressing.
+            if (lir->operands[1] == kRIPReg) {
+              // Offset is relative to next instruction.
+              lir->operands[2] = target - (lir->offset + lir->flags.size);
+            } else {
+              lir->operands[2] = target;
+              int newSize = GetInsnSize(lir);
+              if (newSize != lir->flags.size) {
+                lir->flags.size = newSize;
+                res = kRetryAll;
+              }
             }
+          } else if (lir->flags.fixup == kFixupSwitchTable) {
+            DCHECK(cu_->target64);
+            DCHECK_EQ(lir->opcode, kX86Lea64RM) << "Unknown instruction: " << X86Mir2Lir::EncodingMap[lir->opcode].name;
+            DCHECK_EQ(lir->operands[1], static_cast<int>(kRIPReg));
+            // Grab the target offset from the saved data.
+            Mir2Lir::EmbeddedData* tab_rec =
+                reinterpret_cast<Mir2Lir::EmbeddedData*>(UnwrapPointer(lir->operands[4]));
+            CodeOffset target = tab_rec->offset;
+            // Handle 64 bit RIP addressing.
+            // Offset is relative to next instruction.
+            lir->operands[2] = target - (lir->offset + lir->flags.size);
           }
           break;
       }
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index be10d93..544ac3b 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -142,25 +142,7 @@
 
   // Get the switch value
   rl_src = LoadValue(rl_src, kCoreReg);
-  // NewLIR0(kX86Bkpt);
 
-  // Materialize a pointer to the switch table
-  RegStorage start_of_method_reg;
-  if (base_of_code_ != nullptr) {
-    // We can use the saved value.
-    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    if (rl_method.wide) {
-      rl_method = LoadValueWide(rl_method, kCoreReg);
-    } else {
-      rl_method = LoadValue(rl_method, kCoreReg);
-    }
-    start_of_method_reg = rl_method.reg;
-    store_method_addr_used_ = true;
-  } else {
-    start_of_method_reg = AllocTempRef();
-    NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
-  }
-  DCHECK_EQ(start_of_method_reg.Is64Bit(), cu_->target64);
   int low_key = s4FromSwitchData(&table[2]);
   RegStorage keyReg;
   // Remove the bias, if necessary
@@ -170,19 +152,49 @@
     keyReg = AllocTemp();
     OpRegRegImm(kOpSub, keyReg, rl_src.reg, low_key);
   }
+
   // Bounds check - if < 0 or >= size continue following switch
   OpRegImm(kOpCmp, keyReg, size - 1);
   LIR* branch_over = OpCondBranch(kCondHi, NULL);
 
-  // Load the displacement from the switch table
-  RegStorage disp_reg = AllocTemp();
-  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
-          2, WrapPointer(tab_rec));
-  // Add displacement to start of method
-  OpRegReg(kOpAdd, start_of_method_reg, cu_->target64 ? As64BitReg(disp_reg) : disp_reg);
+  RegStorage addr_for_jump;
+  if (cu_->target64) {
+    RegStorage table_base = AllocTempWide();
+    // Load the address of the table into table_base.
+    LIR* lea = RawLIR(current_dalvik_offset_, kX86Lea64RM, table_base.GetReg(), kRIPReg,
+                      256, 0, WrapPointer(tab_rec));
+    lea->flags.fixup = kFixupSwitchTable;
+    AppendLIR(lea);
+
+    // Load the offset from the table out of the table.
+    addr_for_jump = AllocTempWide();
+    NewLIR5(kX86MovsxdRA, addr_for_jump.GetReg(), table_base.GetReg(), keyReg.GetReg(), 2, 0);
+
+    // Add the offset from the table to the table base.
+    OpRegReg(kOpAdd, addr_for_jump, table_base);
+  } else {
+    // Materialize a pointer to the switch table.
+    RegStorage start_of_method_reg;
+    if (base_of_code_ != nullptr) {
+      // We can use the saved value.
+      RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+      rl_method = LoadValue(rl_method, kCoreReg);
+      start_of_method_reg = rl_method.reg;
+      store_method_addr_used_ = true;
+    } else {
+      start_of_method_reg = AllocTempRef();
+      NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
+    }
+    // Load the displacement from the switch table.
+    addr_for_jump = AllocTemp();
+    NewLIR5(kX86PcRelLoadRA, addr_for_jump.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+            2, WrapPointer(tab_rec));
+    // Add displacement to start of method.
+    OpRegReg(kOpAdd, addr_for_jump, start_of_method_reg);
+  }
+
   // ..and go!
-  LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
-  tab_rec->anchor = switch_branch;
+  tab_rec->anchor = NewLIR1(kX86JmpR, addr_for_jump.GetReg());
 
   /* branch_over target here */
   LIR* target = NewLIR0(kPseudoTargetLabel);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 80cdc83..85ab92b 100755
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -1289,6 +1289,18 @@
 }
 
 LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
+  if (cu_->target64) {
+    // We can do this directly using RIP addressing.
+    // We don't know the proper offset for the value, so pick one that will force
+    // 4 byte offset.  We will fix this up in the assembler later to have the right
+    // value.
+    ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
+    LIR* res = NewLIR3(kX86Mov32RM, reg.GetReg(), kRIPReg, 256);
+    res->target = target;
+    res->flags.fixup = kFixupLoad;
+    return res;
+  }
+
   CHECK(base_of_code_ != nullptr);
 
   // Address the start of the method
@@ -1309,7 +1321,6 @@
                     0, 0, target);
   res->target = target;
   res->flags.fixup = kFixupLoad;
-  store_method_addr_used_ = true;
   return res;
 }
 
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 998aeff..ae80e9f 100755
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -206,7 +206,7 @@
   RegStorage::InvalidReg(),  // kSelf - Thread pointer.
   RegStorage::InvalidReg(),  // kSuspend - Used to reduce suspend checks for some targets.
   RegStorage::InvalidReg(),  // kLr - no register as the return address is pushed on entry.
-  RegStorage::InvalidReg(),  // kPc - TODO: RIP based addressing.
+  RegStorage(kRIPReg),       // kPc
   rs_rX86_SP_32,             // kSp
   rs_rDI,                    // kArg0
   rs_rSI,                    // kArg1
@@ -662,6 +662,12 @@
     xp_reg_info->SetIsTemp(true);
   }
 
+  // Special Handling for x86_64 RIP addressing.
+  if (cu_->target64) {
+    RegisterInfo* info = new (arena_) RegisterInfo(RegStorage(kRIPReg), kEncodeNone);
+    reginfo_map_[kRIPReg] = info;
+  }
+
   // Alias single precision xmm to double xmms.
   // TODO: as needed, add larger vector sizes - alias all to the largest.
   for (RegisterInfo* info : reg_pool_->sp_regs_) {
@@ -1608,9 +1614,6 @@
 }
 
 void X86Mir2Lir::AppendOpcodeWithConst(X86OpCode opcode, int reg, MIR* mir) {
-  // The literal pool needs position independent logic.
-  store_method_addr_used_ = true;
-
   // To deal with correct memory ordering, reverse order of constants.
   int32_t constants[4];
   constants[3] = mir->dalvikInsn.arg[0];
@@ -1624,20 +1627,28 @@
     data_target = AddVectorLiteral(constants);
   }
 
-  // Address the start of the method.
-  RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  if (rl_method.wide) {
-    rl_method = LoadValueWide(rl_method, kCoreReg);
-  } else {
-    rl_method = LoadValue(rl_method, kCoreReg);
-  }
-
   // Load the proper value from the literal area.
   // We don't know the proper offset for the value, so pick one that will force
-  // 4 byte offset.  We will fix this up in the assembler later to have the right
-  // value.
+  // 4 byte offset.  We will fix this up in the assembler later to have the
+  // right value.
+  LIR* load;
   ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-  LIR *load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
+  if (cu_->target64) {
+    load = NewLIR3(opcode, reg, kRIPReg, 256 /* bogus */);
+  } else {
+    // Address the start of the method.
+    RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+    if (rl_method.wide) {
+      rl_method = LoadValueWide(rl_method, kCoreReg);
+    } else {
+      rl_method = LoadValue(rl_method, kCoreReg);
+    }
+
+    load = NewLIR3(opcode, reg, rl_method.reg.GetReg(), 256 /* bogus */);
+
+    // The literal pool needs position independent logic.
+    store_method_addr_used_ = true;
+  }
   load->flags.fixup = kFixupLoad;
   load->target = data_target;
 }
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index ad3222c..3b58698 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -570,32 +570,36 @@
     if (is_fp) {
       DCHECK(r_dest.IsDouble());
       if (value == 0) {
-        return NewLIR2(kX86XorpsRR, low_reg_val, low_reg_val);
-      } else if (base_of_code_ != nullptr) {
+        return NewLIR2(kX86XorpdRR, low_reg_val, low_reg_val);
+      } else if (base_of_code_ != nullptr || cu_->target64) {
         // We will load the value from the literal area.
         LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
         if (data_target == NULL) {
           data_target = AddWideData(&literal_list_, val_lo, val_hi);
         }
 
-        // Address the start of the method
-        RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-        if (rl_method.wide) {
-          rl_method = LoadValueWide(rl_method, kCoreReg);
-        } else {
-          rl_method = LoadValue(rl_method, kCoreReg);
-        }
-
         // Load the proper value from the literal area.
-        // We don't know the proper offset for the value, so pick one that will force
-        // 4 byte offset.  We will fix this up in the assembler later to have the right
-        // value.
+        // We don't know the proper offset for the value, so pick one that
+        // will force 4 byte offset.  We will fix this up in the assembler
+        // later to have the right value.
         ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
-        res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
-                           kDouble, kNotVolatile);
+        if (cu_->target64) {
+          res = NewLIR3(kX86MovsdRM, low_reg_val, kRIPReg, 256 /* bogus */);
+        } else {
+          // Address the start of the method.
+          RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
+          if (rl_method.wide) {
+            rl_method = LoadValueWide(rl_method, kCoreReg);
+          } else {
+            rl_method = LoadValue(rl_method, kCoreReg);
+          }
+
+          res = LoadBaseDisp(rl_method.reg, 256 /* bogus */, RegStorage::FloatSolo64(low_reg_val),
+                             kDouble, kNotVolatile);
+          store_method_addr_used_ = true;
+        }
         res->target = data_target;
         res->flags.fixup = kFixupLoad;
-        store_method_addr_used_ = true;
       } else {
         if (r_dest.IsPair()) {
           if (val_lo == 0) {
@@ -960,12 +964,14 @@
     curr_bb = iter.Next();
   }
 
-  // Did we need a pointer to the method code?
+  // Did we need a pointer to the method code?  Not in 64 bit mode.
+  base_of_code_ = nullptr;
+
+  // store_method_addr_ must be false for x86_64, since RIP addressing is used.
+  CHECK(!(cu_->target64 && store_method_addr_));
   if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, cu_->target64 == true);
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempBackend, false);
     DCHECK(base_of_code_ != nullptr);
-  } else {
-    base_of_code_ = nullptr;
   }
 }
 
@@ -994,19 +1000,22 @@
       AnalyzeFPInstruction(opcode, bb, mir);
       break;
     case kMirOpConstVector:
-      store_method_addr_ = true;
+      if (!cu_->target64) {
+        store_method_addr_ = true;
+      }
       break;
     case kMirOpPackedMultiply:
     case kMirOpPackedShiftLeft:
     case kMirOpPackedSignedShiftRight:
-    case kMirOpPackedUnsignedShiftRight: {
-      // Byte emulation requires constants from the literal pool.
-      OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
-      if (opsize == kSignedByte || opsize == kUnsignedByte) {
-        store_method_addr_ = true;
+    case kMirOpPackedUnsignedShiftRight:
+      if (!cu_->target64) {
+        // Byte emulation requires constants from the literal pool.
+        OpSize opsize = static_cast<OpSize>(mir->dalvikInsn.vC >> 16);
+        if (opsize == kSignedByte || opsize == kUnsignedByte) {
+          store_method_addr_ = true;
+        }
       }
       break;
-    }
     default:
       // Ignore the rest.
       break;
@@ -1016,6 +1025,7 @@
 void X86Mir2Lir::AnalyzeMIR(int opcode, BasicBlock* bb, MIR* mir) {
   // Looking for
   // - Do we need a pointer to the code (used for packed switches and double lits)?
+  // 64 bit uses RIP addressing instead.
 
   switch (opcode) {
     // Instructions referencing doubles.
@@ -1038,7 +1048,9 @@
     // Packed switches and array fills need a pointer to the base of the method.
     case Instruction::FILL_ARRAY_DATA:
     case Instruction::PACKED_SWITCH:
-      store_method_addr_ = true;
+      if (!cu_->target64) {
+        store_method_addr_ = true;
+      }
       break;
     case Instruction::INVOKE_STATIC:
     case Instruction::INVOKE_STATIC_RANGE:
@@ -1115,7 +1127,8 @@
 
 void X86Mir2Lir::AnalyzeInvokeStatic(int opcode, BasicBlock* bb, MIR* mir) {
   UNUSED(opcode, bb);
-  // For now this is only actual for x86-32.
+
+  // 64 bit RIP addressing doesn't need store_method_addr_ set.
   if (cu_->target64) {
     return;
   }
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index 76a67c4..3e0a852 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -217,6 +217,9 @@
   xr14 = RegStorage::k128BitSolo | 14,
   xr15 = RegStorage::k128BitSolo | 15,
 
+  // Special value for RIP 64 bit addressing.
+  kRIPReg = 255,
+
   // TODO: as needed, add 256, 512 and 1024-bit xmm views.
 };