Numerous fixes to enable PromoteRegs, though it's still broken.

- Fixed ThrowNullPointerFromCode launchpad to load the array length
  directly into the necessary arg reg without clobbering the array
  pointer, since that value may be live afterwards.

- genArrayPut use a temporary reg for bytes if the source reg is >= 4,
  since x86 can't express this.

- Fixed the order that core regs are spilled and unspilled.

- Correctly emit instructions when base == rBP and disp == 0.

- Added checks to the compiler to ensure that byte opcodes aren't used
  on registers that can't be byte accessed.

- Fixed generation of a number of ops which use byte opcodes, including
  floating point comparison, int-to-byte, and and-int/lit16.

- Added rBP, rSI, and rDI to spill registers for the x86 jni compiler.

- Various fixes and additions to the x86 disassembler.

Change-Id: I365fe7dec5cc64d181248fd58e90789f100b45e7
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index b4b0f6a..baa4b48 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -898,22 +898,33 @@
         funcOffset = ENTRYPOINT_OFFSET(pThrowNullPointerFromCode);
         break;
       case kThrowArrayBounds:
-#if defined (TARGET_X86)
-        // x86 leaves the array pointer in v2, so load the array length that the handler expects
-        opRegMem(cUnit, kOpMov, v2, v2, Array::LengthOffset().Int32Value());
-#endif
         // Move v1 (array index) to rARG0 and v2 (array length) to rARG1
         if (v2 != rARG0) {
           opRegCopy(cUnit, rARG0, v1);
+#if defined (TARGET_X86)
+          // x86 leaves the array pointer in v2, so load the array length that the handler expects
+          opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
           opRegCopy(cUnit, rARG1, v2);
+#endif
         } else {
           if (v1 == rARG1) {
             // Swap v1 and v2, using rARG2 as a temp
             opRegCopy(cUnit, rARG2, v1);
+#if defined (TARGET_X86)
+            // x86 leaves the array pointer in v2, so load the array length that the handler expects
+            opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
             opRegCopy(cUnit, rARG1, v2);
+#endif
             opRegCopy(cUnit, rARG0, rARG2);
           } else {
+#if defined (TARGET_X86)
+            // x86 leaves the array pointer in v2, so load the array length that the handler expects
+            opRegMem(cUnit, kOpMov, rARG1, v2, Array::LengthOffset().Int32Value());
+#else
             opRegCopy(cUnit, rARG1, v2);
+#endif
             opRegCopy(cUnit, rARG0, v1);
           }
         }
@@ -1598,9 +1609,18 @@
   } else {
     rlSrc = loadValue(cUnit, rlSrc, regClass);
   }
-  storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
-                       dataOffset, rlSrc.lowReg, rlSrc.highReg, size,
-                       INVALID_SREG);
+  // If the src reg can't be byte accessed, move it to a temp first.
+  if ((size == kSignedByte || size == kUnsignedByte) && rlSrc.lowReg >= 4) {
+    int temp = oatAllocTemp(cUnit);
+    opRegCopy(cUnit, temp, rlSrc.lowReg);
+    storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
+                         dataOffset, temp, INVALID_REG, size,
+                         INVALID_SREG);
+  } else {
+    storeBaseIndexedDisp(cUnit, rlArray.lowReg, rlIndex.lowReg, scale,
+                         dataOffset, rlSrc.lowReg, rlSrc.highReg, size,
+                         INVALID_SREG);
+  }
 #else
   bool needsRangeCheck = (!(optFlags & MIR_IGNORE_RANGE_CHECK));
   int regLen = INVALID_REG;
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index 1620044..001a93d 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -128,11 +128,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = cUnit->coreSpillMask & ~(1 << rRET);
-  int offset = cUnit->frameSize - 4;
+  int offset = cUnit->frameSize - (4 * cUnit->numCoreSpills);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
-      offset -= 4;
       storeWordDisp(cUnit, rSP, offset, reg);
+      offset += 4;
     }
   }
 }
@@ -143,11 +143,11 @@
   }
   // Spill mask not including fake return address register
   uint32_t mask = cUnit->coreSpillMask & ~(1 << rRET);
-  int offset = cUnit->frameSize - 4;
+  int offset = cUnit->frameSize - (4 * cUnit->numCoreSpills);
   for (int reg = 0; mask; mask >>= 1, reg++) {
     if (mask & 0x1) {
-      offset -= 4;
       loadWordDisp(cUnit, rSP, offset, reg);
+      offset += 4;
     }
   }
 }
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index 63e4cc3..a245660 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -362,15 +362,19 @@
       return computeSize(entry, 0, false);
     case kMem: { // lir operands - 0: base, 1: disp
       int base = lir->operands[0];
-      // SP requires a special extra SIB byte
-      return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+      int disp = lir->operands[1];
+      // SP requires a special extra SIB byte. BP requires explicit disp,
+      // so add a byte for disp 0 which would normally be omitted.
+      return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
     }
     case kArray:  // lir operands - 0: base, 1: index, 2: scale, 3: disp
       return computeSize(entry, lir->operands[3], true);
     case kMemReg: { // lir operands - 0: base, 1: disp, 2: reg
       int base = lir->operands[0];
-      // SP requires a special extra SIB byte
-      return computeSize(entry, lir->operands[1], false) + (base == rSP ? 1 : 0);
+      int disp = lir->operands[1];
+      // SP requires a special extra SIB byte. BP requires explicit disp,
+      // so add a byte for disp 0 which would normally be omitted.
+      return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
     }
     case kArrayReg:  // lir operands - 0: base, 1: index, 2: scale, 3: disp, 4: reg
       return computeSize(entry, lir->operands[3], true);
@@ -382,10 +386,17 @@
       return computeSize(entry, 0, false);
     case kRegMem: { // lir operands - 0: reg, 1: base, 2: disp
       int base = lir->operands[1];
-      return computeSize(entry, lir->operands[2], false) + (base == rSP ? 1 : 0);
+      int disp = lir->operands[2];
+      // SP requires a special extra SIB byte. BP requires explicit disp,
+      // so add a byte for disp 0 which would normally be omitted.
+      return computeSize(entry, disp, false) + ((base == rSP) || (base == rBP && disp == 0) ? 1 : 0);
     }
-    case kRegArray:  // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
-      return computeSize(entry, lir->operands[4], true);
+    case kRegArray:  { // lir operands - 0: reg, 1: base, 2: index, 3: scale, 4: disp
+      int base = lir->operands[1];
+      int disp = lir->operands[4];
+      // BP requires explicit disp, so add a byte for disp 0 which would normally be omitted.
+      return computeSize(entry, disp, true) + ((base == rBP && disp == 0) ? 1 : 0);
+    }
     case kRegThread:  // lir operands - 0: reg, 1: disp
       return computeSize(entry, 0x12345678, false);  // displacement size is always 32bit
     case kRegImm: {  // lir operands - 0: reg, 1: immediate
@@ -487,8 +498,9 @@
   return 0;
 }
 
-static uint8_t modrmForDisp(int disp) {
-  if (disp == 0) {
+static uint8_t modrmForDisp(int base, int disp) {
+  // BP requires an explicit disp, so do not omit it in the 0 case
+  if (disp == 0 && base != rBP) {
     return 0;
   } else if (IS_SIMM8(disp)) {
     return 1;
@@ -497,8 +509,9 @@
   }
 }
 
-static void emitDisp(CompilationUnit* cUnit, int disp) {
-  if (disp == 0) {
+static void emitDisp(CompilationUnit* cUnit, int base, int disp) {
+  // BP requires an explicit disp, so do not omit it in the 0 case
+  if (disp == 0 && base != rBP) {
     return;
   } else if (IS_SIMM8(disp)) {
     cUnit->codeBuffer.push_back(disp & 0xFF);
@@ -534,6 +547,10 @@
   if (FPREG(reg)) {
     reg = reg & FP_REG_MASK;
   }
+  if (reg >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+        << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+  }
   DCHECK_LT(reg, 8);
   uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
   cUnit->codeBuffer.push_back(modrm);
@@ -555,9 +572,9 @@
   DCHECK_EQ(0, entry->skeleton.extra_opcode2);
   DCHECK_LT(entry->skeleton.modrm_opcode, 8);
   DCHECK_LT(base, 8);
-  uint8_t modrm = (modrmForDisp(disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
+  uint8_t modrm = (modrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
   cUnit->codeBuffer.push_back(modrm);
-  emitDisp(cUnit, disp);
+  emitDisp(cUnit, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
@@ -587,15 +604,19 @@
   if (FPREG(reg)) {
     reg = reg & FP_REG_MASK;
   }
+  if (reg >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+        << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+  }
   DCHECK_LT(reg, 8);
   DCHECK_LT(base, 8);
-  uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | base;
+  uint8_t modrm = (modrmForDisp(base, disp) << 6) | (reg << 3) | base;
   cUnit->codeBuffer.push_back(modrm);
   if (base == rSP) {
     // Special SIB for SP base
     cUnit->codeBuffer.push_back(0 << 6 | (rSP << 3) | rSP);
   }
-  emitDisp(cUnit, disp);
+  emitDisp(cUnit, base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -633,14 +654,14 @@
     reg = reg & FP_REG_MASK;
   }
   DCHECK_LT(reg, 8);
-  uint8_t modrm = (modrmForDisp(disp) << 6) | (reg << 3) | rSP;
+  uint8_t modrm = (modrmForDisp(base, disp) << 6) | (reg << 3) | rSP;
   cUnit->codeBuffer.push_back(modrm);
   DCHECK_LT(scale, 4);
   DCHECK_LT(index, 8);
   DCHECK_LT(base, 8);
   uint8_t sib = (scale << 6) | (index << 3) | base;
   cUnit->codeBuffer.push_back(sib);
-  emitDisp(cUnit, disp);
+  emitDisp(cUnit, base, disp);
   DCHECK_EQ(0, entry->skeleton.modrm_opcode);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
@@ -674,6 +695,10 @@
   if (FPREG(reg)) {
     reg = reg & FP_REG_MASK;
   }
+  if (reg >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+        << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+  }
   DCHECK_LT(reg, 8);
   uint8_t modrm = (0 << 6) | (reg << 3) | rBP;
   cUnit->codeBuffer.push_back(modrm);
@@ -923,6 +948,10 @@
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
     DCHECK_EQ(0, entry->skeleton.extra_opcode2);
   }
+  if (reg >= 4) {
+    DCHECK(strchr(entry->name, '8') == NULL) << entry->name << " " << (int) reg
+        << " in " << PrettyMethod(cUnit->method_idx, *cUnit->dex_file);
+  }
   DCHECK_LT(reg, 8);
   uint8_t modrm = (3 << 6) | (entry->skeleton.modrm_opcode << 3) | reg;
   cUnit->codeBuffer.push_back(modrm);
@@ -1037,13 +1066,13 @@
     DCHECK_EQ(0, entry->skeleton.extra_opcode1);
     DCHECK_EQ(0, entry->skeleton.extra_opcode2);
   }
-  uint8_t modrm = (modrmForDisp(disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
+  uint8_t modrm = (modrmForDisp(base, disp) << 6) | (entry->skeleton.modrm_opcode << 3) | base;
   cUnit->codeBuffer.push_back(modrm);
   if (base == rSP) {
     // Special SIB for SP base
     cUnit->codeBuffer.push_back(0 << 6 | (rSP << 3) | rSP);
   }
-  emitDisp(cUnit, disp);
+  emitDisp(cUnit, base, disp);
   DCHECK_EQ(0, entry->skeleton.ax_opcode);
   DCHECK_EQ(0, entry->skeleton.immediate_bytes);
 }
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index f2488d0..8cd32b4 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -264,7 +264,20 @@
   if (unorderedGt) {
     branch = newLIR2(cUnit, kX86Jcc8, 0, kX86CondPE);
   }
-  newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
+  // If the result reg can't be byte accessed, use a jump and move instead of a set.
+  if (rlResult.lowReg >= 4) {
+    LIR* branch2 = NULL;
+    if (unorderedGt) {
+      branch2 = newLIR2(cUnit, kX86Jcc8, 0, kX86CondA);
+      newLIR2(cUnit, kX86Mov32RI, rlResult.lowReg, 0x0);
+    } else {
+      branch2 = newLIR2(cUnit, kX86Jcc8, 0, kX86CondBe);
+      newLIR2(cUnit, kX86Mov32RI, rlResult.lowReg, 0x1);
+    }
+    branch2->target = newLIR0(cUnit, kPseudoTargetLabel);
+  } else {
+    newLIR2(cUnit, kX86Set8R, rlResult.lowReg, kX86CondA /* above - unsigned > */);
+  }
   newLIR2(cUnit, kX86Sbb32RI, rlResult.lowReg, 0);
   if (unorderedGt) {
     branch->target = newLIR0(cUnit, kPseudoTargetLabel);
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index 9721038..f77a793 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -198,7 +198,16 @@
       case kOpAnd: opcode = kX86And32RR; break;
       case kOpOr:  opcode = kX86Or32RR; break;
       case kOpXor: opcode = kX86Xor32RR; break;
-      case kOp2Byte: opcode = kX86Movsx8RR; break;
+      case kOp2Byte:
+        // Use shifts instead of a byte operand if the source can't be byte accessed.
+        if (rSrc2 >= 4) {
+          newLIR2(cUnit, kX86Mov32RR, rDestSrc1, rSrc2);
+          newLIR2(cUnit, kX86Sal32RI, rDestSrc1, 24);
+          return newLIR2(cUnit, kX86Sar32RI, rDestSrc1, 24);
+        } else {
+          opcode = kX86Movsx8RR;
+        }
+        break;
       case kOp2Short: opcode = kX86Movsx16RR; break;
       case kOp2Char: opcode = kX86Movzx16RR; break;
       case kOpMul: opcode = kX86Imul32RR; break;
@@ -228,7 +237,7 @@
     case kOp2Char: opcode = kX86Movzx16RM; break;
     case kOpMul:
     default:
-      LOG(FATAL) << "Bad case in opRegReg " << op;
+      LOG(FATAL) << "Bad case in opRegMem " << op;
       break;
   }
   return newLIR3(cUnit, opcode, rDest, rBase, offset);
@@ -290,7 +299,7 @@
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return newLIR3(cUnit, opcode, rDest, rSrc, value);
   } else if (op == kOpAnd) {
-    if (value == 0xFF) {
+    if (value == 0xFF && rDest < 4) {
       return newLIR2(cUnit, kX86Movzx8RR, rDest, rSrc);
     } else if (value == 0xFFFF) {
       return newLIR2(cUnit, kX86Movzx16RR, rDest, rSrc);
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index c229844..5bf4dd9 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -199,6 +199,9 @@
   rSI    = r6,
   r7     = 7,
   rDI    = r7,
+#ifndef TARGET_REX_SUPPORT
+  rRET   = 8,  // fake return address register for core spill mask
+#else
   r8     = 8,
   r9     = 9,
   r10    = 10,
@@ -208,6 +211,7 @@
   r14    = 14,
   r15    = 15,
   rRET   = 16,  // fake return address register for core spill mask
+#endif
   fr0  =  0 + FP_REG_OFFSET,
   fr1  =  1 + FP_REG_OFFSET,
   fr2  =  2 + FP_REG_OFFSET,
diff --git a/src/disassembler_x86.cc b/src/disassembler_x86.cc
index 969feb8..d45d641 100644
--- a/src/disassembler_x86.cc
+++ b/src/disassembler_x86.cc
@@ -36,8 +36,8 @@
 }
 
 static const char* gReg8Names[]  = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh" };
-static const char* gReg16Names[] = { "ax", "cx", "dx", "bx", "sp", "bp", "di", "si" };
-static const char* gReg32Names[] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "edi", "esi" };
+static const char* gReg16Names[] = { "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" };
+static const char* gReg32Names[] = { "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" };
 
 static void DumpReg0(std::ostream& os, uint8_t /*rex*/, size_t reg,
                      bool byte_operand, uint8_t size_override) {
@@ -147,6 +147,7 @@
   bool ax = false;  // implicit use of ax
   bool cx = false;  // implicit use of cx
   bool reg_in_opcode = false;  // low 3-bits of opcode encode register parameter
+  bool no_ops = false;
   RegFile src_reg_file = GPR;
   RegFile dst_reg_file = GPR;
   switch (*instr) {
@@ -473,6 +474,43 @@
         has_modrm = true;
         store = true;
         break;
+      case 0xAE:
+        if (prefix[0] == 0xF3) {
+          static const char* xAE_opcodes[] = {"rdfsbase", "rdgsbase", "wrfsbase", "wrgsbase", "unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE"};
+          modrm_opcodes = xAE_opcodes;
+          reg_is_opcode = true;
+          has_modrm = true;
+          uint8_t reg_or_opcode = (instr[1] >> 3) & 7;
+          switch (reg_or_opcode) {
+            case 0:
+              prefix[1] = kFs;
+              load = true;
+              break;
+            case 1:
+              prefix[1] = kGs;
+              load = true;
+              break;
+            case 2:
+              prefix[1] = kFs;
+              store = true;
+              break;
+            case 3:
+              prefix[1] = kGs;
+              store = true;
+              break;
+            default:
+              load = true;
+              break;
+          }
+        } else {
+          static const char* xAE_opcodes[] = {"unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE", "unknown-AE", "lfence", "mfence", "sfence"};
+          modrm_opcodes = xAE_opcodes;
+          reg_is_opcode = true;
+          has_modrm = true;
+          load = true;
+          no_ops = true;
+        }
+        break;
       case 0xB6: opcode << "movzxb"; has_modrm = true; load = true; break;
       case 0xB7: opcode << "movzxw"; has_modrm = true; load = true; break;
       default:
@@ -489,11 +527,23 @@
     byte_operand = (*instr & 1) == 0;
     immediate_bytes = *instr == 0x81 ? 4 : 1;
     break;
+  case 0x84: case 0x85:
+    opcode << "test";
+    has_modrm = true;
+    load = true;
+    byte_operand = (*instr & 1) == 0;
+    break;
   case 0x8D:
     opcode << "lea";
     has_modrm = true;
     load = true;
     break;
+  case 0x8F:
+    opcode << "pop";
+    has_modrm = true;
+    reg_is_opcode = true;
+    store = true;
+    break;
   case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
     opcode << "mov";
     immediate_bytes = 1;
@@ -595,7 +645,9 @@
       address << "]";
     } else {
       if (mod == 3) {
-        DumpReg(address, rex, rm, byte_operand, prefix[2], load ? src_reg_file : dst_reg_file);
+        if (!no_ops) {
+          DumpReg(address, rex, rm, byte_operand, prefix[2], load ? src_reg_file : dst_reg_file);
+        }
       } else {
         address << "[";
         DumpBaseReg(address, rex, rm);
diff --git a/src/oat/jni/x86/calling_convention_x86.cc b/src/oat/jni/x86/calling_convention_x86.cc
index 1f66d71..1cd849c 100644
--- a/src/oat/jni/x86/calling_convention_x86.cc
+++ b/src/oat/jni/x86/calling_convention_x86.cc
@@ -106,11 +106,21 @@
 
 // JNI calling convention
 
-std::vector<ManagedRegister> X86JniCallingConvention::callee_save_regs_;
+X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
+                                                 const char* shorty)
+    : JniCallingConvention(is_static, is_synchronized, shorty) {
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EBP));
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(ESI));
+  callee_save_regs_.push_back(X86ManagedRegister::FromCpuRegister(EDI));
+}
+
+uint32_t X86JniCallingConvention::CoreSpillMask() const {
+  return 1 << EBP | 1 << ESI | 1 << EDI | 1 << kNumberOfCpuRegisters;
+}
 
 size_t X86JniCallingConvention::FrameSize() {
-  // Return address, Method* and local reference segment state
-  size_t frame_data_size = 3 * kPointerSize;
+  // Method*, return address and callee save area size, local reference segment state
+  size_t frame_data_size = (3 + CalleeSaveRegisters().size()) * kPointerSize;
   // References plus 2 words for SIRT header
   size_t sirt_size = (ReferenceCount() + 2) * kPointerSize;
   // Plus return value spill area size
diff --git a/src/oat/jni/x86/calling_convention_x86.h b/src/oat/jni/x86/calling_convention_x86.h
index 3bf4f7c..959a37f 100644
--- a/src/oat/jni/x86/calling_convention_x86.h
+++ b/src/oat/jni/x86/calling_convention_x86.h
@@ -45,8 +45,7 @@
 
 class X86JniCallingConvention : public JniCallingConvention {
  public:
-  X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty)
-      : JniCallingConvention(is_static, is_synchronized, shorty) {}
+  explicit X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
   virtual ~X86JniCallingConvention() {}
   // Calling convention
   virtual ManagedRegister ReturnRegister();
@@ -55,13 +54,10 @@
   virtual size_t FrameSize();
   virtual size_t OutArgSize();
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
-    DCHECK(callee_save_regs_.empty());
     return callee_save_regs_;
   }
   virtual ManagedRegister ReturnScratchRegister() const;
-  virtual uint32_t CoreSpillMask() const {
-    return 0;
-  }
+  virtual uint32_t CoreSpillMask() const;
   virtual uint32_t FpSpillMask() const {
     return 0;
   }
@@ -75,7 +71,8 @@
   virtual size_t NumberOfOutgoingStackArgs();
 
  private:
-  static std::vector<ManagedRegister> callee_save_regs_;
+  // TODO: these values aren't unique and can be shared amongst instances
+  std::vector<ManagedRegister> callee_save_regs_;
 
   DISALLOW_COPY_AND_ASSIGN(X86JniCallingConvention);
 };
diff --git a/src/oat/utils/x86/assembler_x86.cc b/src/oat/utils/x86/assembler_x86.cc
index 28b17f5..b7f0c1f 100644
--- a/src/oat/utils/x86/assembler_x86.cc
+++ b/src/oat/utils/x86/assembler_x86.cc
@@ -1415,10 +1415,12 @@
                               const std::vector<ManagedRegister>& spill_regs,
                               const std::vector<ManagedRegister>& entry_spills) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
+  for (int i = spill_regs.size() - 1; i >= 0; --i) {
+    pushl(spill_regs.at(i).AsX86().AsCpuRegister());
+  }
   // return address then method on stack
-  addl(ESP, Immediate(-frame_size + kPointerSize /*method*/ +
-                      kPointerSize /*return address*/));
+  addl(ESP, Immediate(-frame_size + (spill_regs.size() * kPointerSize) +
+                      kPointerSize /*method*/ + kPointerSize /*return address*/));
   pushl(method_reg.AsX86().AsCpuRegister());
   for (size_t i = 0; i < entry_spills.size(); ++i) {
     movl(Address(ESP, frame_size + kPointerSize + (i * kPointerSize)),
@@ -1429,8 +1431,10 @@
 void X86Assembler::RemoveFrame(size_t frame_size,
                             const std::vector<ManagedRegister>& spill_regs) {
   CHECK_ALIGNED(frame_size, kStackAlignment);
-  CHECK_EQ(0u, spill_regs.size());  // no spilled regs on x86
-  addl(ESP, Immediate(frame_size - kPointerSize));
+  addl(ESP, Immediate(frame_size - (spill_regs.size() * kPointerSize) - kPointerSize));
+  for (size_t i = 0; i < spill_regs.size(); ++i) {
+    popl(spill_regs.at(i).AsX86().AsCpuRegister());
+  }
   ret();
 }