x86_64: Disable all optimizations and fix bugs

This disables all optimizations and ensures that art tests still pass.

Change-Id: I43217378d6889bb04f4d064f8d53cb3ff4c20aa0
Signed-off-by: Chao-ying Fu <chao-ying.fu@intel.com>
Signed-off-by: Serguei Katkov <serguei.i.katkov@intel.com>
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index d453797..035bd66 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -885,15 +885,13 @@
         (1 << kBBOpt) |
         (1 << kMatch) |
         (1 << kPromoteCompilerTemps));
-  }
-
-  if (cu.instruction_set == kArm64 || cu.instruction_set == kX86_64) {
-    // TODO(Arm64): enable optimizations once backend is mature enough.
+  } else if (cu.instruction_set == kX86_64) {
     // TODO(X86_64): enable optimizations once backend is mature enough.
     cu.disable_opt = ~(uint32_t)0;
-    if (cu.instruction_set == kArm64) {
-      cu.enable_debug |= (1 << kDebugCodegenDump);
-    }
+  } else if (cu.instruction_set == kArm64) {
+    // TODO(Arm64): enable optimizations once backend is mature enough.
+    cu.disable_opt = ~(uint32_t)0;
+    cu.enable_debug |= (1 << kDebugCodegenDump);
   }
 
   cu.StartTimingSplit("BuildMIRGraph");
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 8f6d716..f9081ce 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -456,7 +456,8 @@
      * this is an uncommon operation and isn't especially performance
      * critical.
      */
-    RegStorage r_src = AllocTemp();
+    // This is addressing the stack, which may be out of the 4G area.
+    RegStorage r_src = cu_->target64 ? AllocTempWide() : AllocTemp();
     RegStorage r_dst = AllocTemp();
     RegStorage r_idx = AllocTemp();
     RegStorage r_val;
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 2af847c..a90a06e 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -155,7 +155,12 @@
   if (arg0.wide == 0) {
     LoadValueDirectFixed(arg0, TargetReg(kArg0));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+    }
     LoadValueDirectWideFixed(arg0, r_tmp);
   }
   ClobberCallerSave();
@@ -181,7 +186,12 @@
   if (arg1.wide == 0) {
     LoadValueDirectFixed(arg1, TargetReg(kArg1));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+    }
     LoadValueDirectWideFixed(arg1, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
@@ -279,6 +289,12 @@
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg1));
       } else if (cu_->instruction_set == kArm64) {
         LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else if (cu_->instruction_set == kX86_64) {
+        if (arg0.fp) {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg0));
+        } else {
+          LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg0) : TargetReg(kArg1));
+        }
       } else {
         LoadValueDirectFixed(arg1, TargetReg(kArg1));
       }
@@ -423,7 +439,12 @@
   if (arg2.wide == 0) {
     LoadValueDirectFixed(arg2, TargetReg(kArg2));
   } else {
-    RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    RegStorage r_tmp;
+    if (cu_->instruction_set == kX86_64) {
+      r_tmp = RegStorage::Solo64(TargetReg(kArg2).GetReg());
+    } else {
+      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+    }
     LoadValueDirectWideFixed(arg2, r_tmp);
   }
   LoadConstant(TargetReg(kArg0), arg0);
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index d37ee67..fb6bd94 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -685,6 +685,7 @@
       return ComputeSize(entry, lir->operands[4], lir->operands[1], lir->operands[0],
                          true, false, false, lir->operands[3]);
     case kRegCond:  // lir operands - 0: reg, 1: cond
+      // Note: RegCond form passes reg as REX_R but encodes it as REX_B.
       return ComputeSize(entry, lir->operands[0], NO_REG, NO_REG,
                          false, entry->skeleton.r8_form, false, 0);
     case kMemCond:  // lir operands - 0: base, 1: disp, 2: cond
@@ -802,7 +803,7 @@
 
 void X86Mir2Lir::EmitPrefix(const X86EncodingMap* entry,
                             int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                            bool r8_form) {
+                            bool r8_form, bool modrm_is_reg_reg) {
   // REX.WRXB
   // W - 64-bit operand
   // R - MODRM.reg
@@ -813,8 +814,13 @@
   bool x = NeedsRex(raw_reg_x);
   bool b = NeedsRex(raw_reg_b);
   uint8_t rex = 0;
-  if (r8_form && RegStorage::RegNum(raw_reg_r) > 4) {
-    rex |= 0x40;  // REX.0000
+  if (r8_form) {
+    // Do we need an empty REX prefix to normalize byte register addressing?
+    if (RegStorage::RegNum(raw_reg_r) >= 4) {
+      rex |= 0x40;  // REX.0000
+    } else if (modrm_is_reg_reg && RegStorage::RegNum(raw_reg_b) >= 4) {
+      rex |= 0x40;  // REX.0000
+    }
   }
   if (w) {
     rex |= 0x48;  // REX.W000
@@ -876,8 +882,8 @@
 
 void X86Mir2Lir::EmitPrefixAndOpcode(const X86EncodingMap* entry,
                                      int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                                     bool r8_form) {
-  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form);
+                                     bool r8_form, bool modrm_is_reg_reg) {
+  EmitPrefix(entry, raw_reg_r, raw_reg_x, raw_reg_b, r8_form, modrm_is_reg_reg);
   EmitOpcode(entry);
 }
 
@@ -971,7 +977,7 @@
 
 void X86Mir2Lir::EmitNullary(const X86EncodingMap* entry) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -979,7 +985,7 @@
 
 void X86Mir2Lir::EmitOpRegOpcode(const X86EncodingMap* entry, int32_t raw_reg) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, false, false);
   // There's no 3-byte instruction with +rd
   DCHECK(entry->skeleton.opcode != 0x0F ||
          (entry->skeleton.extra_opcode1 != 0x38 && entry->skeleton.extra_opcode1 != 0x3A));
@@ -992,7 +998,7 @@
 
 void X86Mir2Lir::EmitOpReg(const X86EncodingMap* entry, int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
   code_buffer_.push_back(modrm);
@@ -1002,7 +1008,7 @@
 
 void X86Mir2Lir::EmitOpMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1016,7 +1022,7 @@
 void X86Mir2Lir::EmitOpArray(const X86EncodingMap* entry, int32_t raw_base, int32_t raw_index,
                              int scale, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false, false);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1027,7 +1033,7 @@
 void X86Mir2Lir::EmitMemReg(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t raw_reg) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form, false);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1045,7 +1051,7 @@
 void X86Mir2Lir::EmitRegArray(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_base,
                               int32_t raw_index, int scale, int32_t disp) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, raw_index, raw_base, entry->skeleton.r8_form, false);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
@@ -1064,7 +1070,7 @@
 void X86Mir2Lir::EmitMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                             int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false, false);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1075,7 +1081,7 @@
                               int32_t raw_base, int32_t raw_index, int scale, int32_t disp,
                               int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, raw_index, raw_base, false, false);
   uint8_t low_index = LowRegisterBits(raw_index);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmSibDisp(entry->skeleton.modrm_opcode, low_base, low_index, scale, disp);
@@ -1086,7 +1092,7 @@
 void X86Mir2Lir::EmitRegThread(const X86EncodingMap* entry, int32_t raw_reg, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, NO_REG, false, false);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   EmitModrmThread(low_reg);
   code_buffer_.push_back(disp & 0xFF);
@@ -1101,7 +1107,7 @@
 void X86Mir2Lir::EmitRegReg(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2) {
   CheckValidByteRegister(entry, raw_reg1);
   CheckValidByteRegister(entry, raw_reg2);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, entry->skeleton.r8_form, false);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1114,7 +1120,7 @@
 void X86Mir2Lir::EmitRegRegImm(const X86EncodingMap* entry, int32_t raw_reg1, int32_t raw_reg2,
                                int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false, true);
   uint8_t low_reg1 = LowRegisterBits(raw_reg1);
   uint8_t low_reg2 = LowRegisterBits(raw_reg2);
   uint8_t modrm = (3 << 6) | (low_reg1 << 3) | low_reg2;
@@ -1128,7 +1134,7 @@
                                int32_t raw_reg, int32_t raw_base, int disp, int32_t imm) {
   DCHECK(!RegStorage::IsFloat(raw_reg));
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, raw_reg, NO_REG, raw_base, entry->skeleton.r8_form, false);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(low_reg, low_base, disp);
@@ -1145,7 +1151,7 @@
 
 void X86Mir2Lir::EmitRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
   if (RegStorage::RegNum(raw_reg) == rs_rAX.GetRegNum() && entry->skeleton.ax_opcode != 0) {
     code_buffer_.push_back(entry->skeleton.ax_opcode);
   } else {
@@ -1158,7 +1164,8 @@
 }
 
 void X86Mir2Lir::EmitThreadImm(const X86EncodingMap* entry, int32_t disp, int32_t imm) {
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  DCHECK_EQ(false, entry->skeleton.r8_form);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1170,7 +1177,7 @@
 
 void X86Mir2Lir::EmitMovRegImm(const X86EncodingMap* entry, int32_t raw_reg, int64_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, false, true);
   uint8_t low_reg = LowRegisterBits(raw_reg);
   code_buffer_.push_back(0xB8 + low_reg);
   switch (entry->skeleton.immediate_bytes) {
@@ -1198,7 +1205,7 @@
 
 void X86Mir2Lir::EmitShiftRegImm(const X86EncodingMap* entry, int32_t raw_reg, int32_t imm) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1221,7 +1228,7 @@
 void X86Mir2Lir::EmitShiftRegCl(const X86EncodingMap* entry, int32_t raw_reg, int32_t raw_cl) {
   CheckValidByteRegister(entry, raw_reg);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1237,7 +1244,7 @@
                                 int32_t displacement, int32_t raw_cl) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_EQ(rs_rCX.GetRegNum(), RegStorage::RegNum(raw_cl));
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
   code_buffer_.push_back(entry->skeleton.opcode);
   DCHECK_NE(0x0F, entry->skeleton.opcode);
   DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1251,7 +1258,7 @@
 void X86Mir2Lir::EmitShiftMemImm(const X86EncodingMap* entry, int32_t raw_base, int32_t disp,
                                  int32_t imm) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_base, false, false);
   if (imm != 1) {
     code_buffer_.push_back(entry->skeleton.opcode);
   } else {
@@ -1272,7 +1279,7 @@
 
 void X86Mir2Lir::EmitRegCond(const X86EncodingMap* entry, int32_t raw_reg, int32_t cc) {
   CheckValidByteRegister(entry, raw_reg);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, entry->skeleton.r8_form);
+  EmitPrefix(entry, NO_REG, NO_REG, raw_reg, entry->skeleton.r8_form, true);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0x0F, entry->skeleton.opcode);
   code_buffer_.push_back(0x0F);
@@ -1315,7 +1322,7 @@
                                 int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_reg2, false, true);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1341,7 +1348,7 @@
                                 int32_t disp, int32_t cc) {
   // Generate prefix and opcode without the condition.
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, raw_reg1, NO_REG, raw_base, false, false);
 
   // Now add the condition. The last byte of opcode is the one that receives it.
   DCHECK_GE(cc, 0);
@@ -1376,7 +1383,7 @@
   } else {
     DCHECK(entry->opcode == kX86JmpR);
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, NO_REG, NO_REG, rel, false);
+    EmitPrefix(entry, NO_REG, NO_REG, rel, false, true);
     code_buffer_.push_back(entry->skeleton.opcode);
     uint8_t low_reg = LowRegisterBits(rel);
     uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | low_reg;
@@ -1404,7 +1411,7 @@
 
 void X86Mir2Lir::EmitCallMem(const X86EncodingMap* entry, int32_t raw_base, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, raw_base, false, false);
   uint8_t low_base = LowRegisterBits(raw_base);
   EmitModrmDisp(entry->skeleton.modrm_opcode, low_base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
@@ -1413,7 +1420,7 @@
 
 void X86Mir2Lir::EmitCallImmediate(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
   DCHECK_EQ(4, entry->skeleton.immediate_bytes);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1425,7 +1432,7 @@
 void X86Mir2Lir::EmitCallThread(const X86EncodingMap* entry, int32_t disp) {
   DCHECK_EQ(false, entry->skeleton.r8_form);
   DCHECK_NE(entry->skeleton.prefix1, 0);
-  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false);
+  EmitPrefixAndOpcode(entry, NO_REG, NO_REG, NO_REG, false, false);
   EmitModrmThread(entry->skeleton.modrm_opcode);
   code_buffer_.push_back(disp & 0xFF);
   code_buffer_.push_back((disp >> 8) & 0xFF);
@@ -1450,7 +1457,7 @@
   }
   if (entry->opcode == kX86PcRelLoadRA) {
     DCHECK_EQ(false, entry->skeleton.r8_form);
-    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false);
+    EmitPrefix(entry, raw_reg, raw_index, raw_base_or_table, false, false);
     code_buffer_.push_back(entry->skeleton.opcode);
     DCHECK_NE(0x0F, entry->skeleton.opcode);
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
@@ -1479,7 +1486,7 @@
 void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, int32_t raw_reg, int32_t offset) {
   DCHECK_EQ(entry->opcode, kX86StartOfMethod) << entry->name;
   DCHECK_EQ(false, entry->skeleton.r8_form);
-  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false);
+  EmitPrefix(entry, raw_reg, NO_REG, NO_REG, false, false);
   code_buffer_.push_back(0xE8);  // call +0
   code_buffer_.push_back(0);
   code_buffer_.push_back(0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 6ae553d..a92608f 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -397,10 +397,11 @@
   void CheckValidByteRegister(const X86EncodingMap* entry, int32_t raw_reg);
   void EmitPrefix(const X86EncodingMap* entry,
                   int32_t raw_reg_r, int32_t raw_reg_x, int32_t raw_reg_b,
-                  bool r8_form);
+                  bool r8_form_r, bool modrm_is_reg_reg);
   void EmitOpcode(const X86EncodingMap* entry);
   void EmitPrefixAndOpcode(const X86EncodingMap* entry,
-                           int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form);
+                           int32_t reg_r, int32_t reg_x, int32_t reg_b, bool r8_form,
+                           bool modrm_is_reg_reg);
   void EmitDisp(uint8_t base, int32_t disp);
   void EmitModrmThread(uint8_t reg_or_opcode);
   void EmitModrmDisp(uint8_t reg_or_opcode, uint8_t base, int32_t disp);
@@ -464,6 +465,12 @@
   virtual RegStorage AllocateByteRegister();
 
   /*
+   * @brief Check if a register is byte addressable.
+   * @returns true if a register is byte addressable.
+   */
+  bool IsByteRegister(RegStorage reg);
+
+  /*
    * @brief generate inline code for fast case of Strng.indexOf.
    * @param info Call parameters
    * @param zero_based 'true' if the index into the string is 0.
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index ced6400..f6f0617 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -381,7 +381,7 @@
     branch = NewLIR2(kX86Jcc8, 0, kX86CondPE);
   }
   // If the result reg can't be byte accessed, use a jump and move instead of a set.
-  if (rl_result.reg.GetReg() >= rs_rX86_SP.GetReg()) {
+  if (!IsByteRegister(rl_result.reg)) {
     LIR* branch2 = NULL;
     if (unordered_gt) {
       branch2 = NewLIR2(kX86Jcc8, 0, kX86CondA);
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4a77df2..05b5e43 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -882,10 +882,9 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
-  // SETcc only works with EAX..EDX.
-  if (result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (!IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
-    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
   NewLIR2(kX86Set8R, result_reg.GetReg(), kX86CondZ);
   NewLIR2(kX86Movzx8RR, rl_result.reg.GetReg(), result_reg.GetReg());
@@ -1386,9 +1385,9 @@
   if (!Gen64Bit()) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
   }
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
 }
 
 void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
@@ -1423,11 +1422,11 @@
   if (!Gen64Bit()) {
     x86op = GetOpcode(op, rl_dest, rl_src, true);
     lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is64bit */);
+    AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
+                            false /* is_load */, true /* is64bit */);
   }
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          true /* is_load */, true /* is64bit */);
-  AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
-                          false /* is_load */, true /* is64bit */);
   FreeTemp(rl_src.reg);
 }
 
@@ -1760,8 +1759,7 @@
     rl_src = LoadValue(rl_src, reg_class);
   }
   // If the src reg can't be byte accessed, move it to a temp first.
-  if ((size == kSignedByte || size == kUnsignedByte) &&
-      rl_src.reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  if ((size == kSignedByte || size == kUnsignedByte) && !IsByteRegister(rl_src.reg)) {
     RegStorage temp = AllocTemp();
     OpRegCopy(temp, rl_src.reg);
     StoreBaseIndexedDisp(rl_array.reg, rl_index.reg, scale, data_offset, temp, size);
@@ -2240,10 +2238,9 @@
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   RegStorage result_reg = rl_result.reg;
 
-  // SETcc only works with EAX..EDX.
-  if (result_reg == object.reg || result_reg.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
+  // For 32-bit, SETcc only works with EAX..EDX.
+  if (result_reg == object.reg || !IsByteRegister(result_reg)) {
     result_reg = AllocateByteRegister();
-    DCHECK_LT(result_reg.GetRegNum(), rs_rX86_SP.GetRegNum());
   }
 
   // Assume that there is no match.
@@ -2355,7 +2352,7 @@
   /* kArg0 is ref, kArg2 is class. If ref==null, use directly as bool result. */
   RegLocation rl_result = GetReturn(kRefReg);
 
-  // SETcc only works with EAX..EDX.
+  // For 32-bit, SETcc only works with EAX..EDX.
   DCHECK_LT(rl_result.reg.GetRegNum(), 4);
 
   // Is the class NULL?
@@ -2655,6 +2652,7 @@
     Mir2Lir::GenIntToLong(rl_dest, rl_src);
     return;
   }
+  rl_src = UpdateLoc(rl_src);
   RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
   if (rl_src.location == kLocPhysReg) {
     NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index d1ba239..483d8cf 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -424,7 +424,15 @@
 }
 
 RegStorage X86Mir2Lir::AllocateByteRegister() {
-  return AllocTypedTemp(false, kCoreReg);
+  RegStorage reg = AllocTypedTemp(false, kCoreReg);
+  if (!Gen64Bit()) {
+    DCHECK_LT(reg.GetRegNum(), rs_rX86_SP.GetRegNum());
+  }
+  return reg;
+}
+
+bool X86Mir2Lir::IsByteRegister(RegStorage reg) {
+  return Gen64Bit() || reg.GetRegNum() < rs_rX86_SP.GetRegNum();
 }
 
 /* Clobber all regs that might be used by an external C call */
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index c72e8cd..b93e3e8 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -138,6 +138,7 @@
       case kOpLsl: opcode = kX86Sal64RI; break;
       case kOpLsr: opcode = kX86Shr64RI; break;
       case kOpAsr: opcode = kX86Sar64RI; break;
+      case kOpCmp: opcode = byte_imm ? kX86Cmp64RI8 : kX86Cmp64RI; break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
     }
@@ -505,7 +506,7 @@
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+      return NewLIR5(r_dest.Is64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
                      r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */,
                      0 /* scale */, value /* disp */);
     }