Further x86 progress and image creation.

Change-Id: Idafadfc55228541536f25d2c92d40d9e0510b602
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index e1df1a5..1b64af2 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -273,14 +273,16 @@
  */
 void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
 {
-#if defined(TARGET_X86)
-    UNIMPLEMENTED(WARNING) << "markGCCard";
-#else
     int regCardBase = oatAllocTemp(cUnit);
     int regCardNo = oatAllocTemp(cUnit);
     LIR* branchOver = opCmpImmBranch(cUnit, kCondEq, valReg, 0, NULL);
+#if !defined(TARGET_X86)
     loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
                  regCardBase);
+#else
+    newLIR2(cUnit, kX86Mov32RT, regCardBase,
+            Thread::CardTableOffset().Int32Value());
+#endif
     opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
     storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
                      kUnsignedByte);
@@ -288,7 +290,6 @@
     branchOver->target = (LIR*)target;
     oatFreeTemp(cUnit, regCardBase);
     oatFreeTemp(cUnit, regCardNo);
-#endif
 }
 
 /* Utilities to load the current Method* */
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index f63ad4c..b45fbaa6 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -637,6 +637,9 @@
             rBase = oatAllocTemp(cUnit);
             loadWordDisp(cUnit, rlMethod.lowReg,
                          Method::DeclaringClassOffset().Int32Value(), rBase);
+            if (oatIsTemp(cUnit, rlMethod.lowReg)) {
+                oatFreeTemp(cUnit, rlMethod.lowReg);
+            }
         } else {
             // Medium path, static storage base in a different class which
             // requires checks that the other class is initialized.
@@ -1412,6 +1415,9 @@
     storeBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale,
                          dataOffset, rlSrc.lowReg, INVALID_REG, kWord,
                          INVALID_SREG);
+    if (oatIsTemp(cUnit, rlIndex.lowReg)) {
+        oatFreeTemp(cUnit, rlIndex.lowReg);
+    }
 #else
     int regPtr;
     if (oatIsTemp(cUnit, rlArray.lowReg)) {
@@ -1582,7 +1588,11 @@
         genRegMemCheck(cUnit, kCondUge, rlIndex.lowReg, rlArray.lowReg,
                        lenOffset, mir, kThrowArrayBounds);
     }
-    rlSrc = loadValue(cUnit, rlSrc, regClass);
+    if ((size == kLong) || (size == kDouble)) {
+      rlSrc = loadValueWide(cUnit, rlSrc, regClass);
+    } else {
+      rlSrc = loadValue(cUnit, rlSrc, regClass);
+    }
     storeBaseIndexedDisp(cUnit, NULL, rlArray.lowReg, rlIndex.lowReg, scale, dataOffset,
                          rlSrc.lowReg, rlSrc.highReg, size, INVALID_SREG);
 #else
@@ -2136,6 +2146,10 @@
         case Instruction::ADD_LONG_2ADDR:
 #if defined(TARGET_MIPS)
             return genAddLong(cUnit, mir, rlDest, rlSrc1, rlSrc2);
+#elif defined(TARGET_X86)
+            callOut = true;
+            retReg = rRET0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLadd);
 #else
             firstOp = kOpAdd;
             secondOp = kOpAdc;
@@ -2145,11 +2159,14 @@
         case Instruction::SUB_LONG_2ADDR:
 #if defined(TARGET_MIPS)
             return genSubLong(cUnit, mir, rlDest, rlSrc1, rlSrc2);
-#else
+#elif defined(TARGET_X86)
+            callOut = true;
+            retReg = rRET0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLsub);
+#endif
             firstOp = kOpSub;
             secondOp = kOpSbc;
             break;
-#endif
         case Instruction::MUL_LONG:
         case Instruction::MUL_LONG_2ADDR:
             callOut = true;
@@ -2174,16 +2191,31 @@
             break;
         case Instruction::AND_LONG_2ADDR:
         case Instruction::AND_LONG:
+#if defined(TARGET_X86)
+            callOut = true;
+            retReg = rRET0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLand);
+#endif
             firstOp = kOpAnd;
             secondOp = kOpAnd;
             break;
         case Instruction::OR_LONG:
         case Instruction::OR_LONG_2ADDR:
+#if defined(TARGET_X86)
+            callOut = true;
+            retReg = rRET0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLor);
+#endif
             firstOp = kOpOr;
             secondOp = kOpOr;
             break;
         case Instruction::XOR_LONG:
         case Instruction::XOR_LONG_2ADDR:
+#if defined(TARGET_X86)
+            callOut = true;
+            retReg = rRET0;
+            funcOffset = OFFSETOF_MEMBER(Thread, pLxor);
+#endif
             firstOp = kOpXor;
             secondOp = kOpXor;
             break;
@@ -2198,30 +2230,26 @@
     } else {
         oatFlushAllRegs(cUnit);   /* Send everything to home location */
         if (checkZero) {
-#if defined(TARGET_X86)
-            UNIMPLEMENTED(FATAL);
-#else
             loadValueDirectWideFixed(cUnit, rlSrc2, rARG2, rARG3);
+#if !defined(TARGET_X86)
             int rTgt = loadHelper(cUnit, funcOffset);
 #endif
-            loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
             int tReg = oatAllocTemp(cUnit);
 #if defined(TARGET_ARM)
             newLIR4(cUnit, kThumb2OrrRRRs, tReg, rARG2, rARG3, 0);
             oatFreeTemp(cUnit, tReg);
             genCheck(cUnit, kCondEq, mir, kThrowDivZero);
 #else
-#if defined(TARGET_X86)
-            UNIMPLEMENTED(FATAL);
-#else
             opRegRegReg(cUnit, kOpOr, tReg, rARG2, rARG3);
 #endif
             genImmedCheck(cUnit, kCondEq, tReg, 0, mir, kThrowDivZero);
             oatFreeTemp(cUnit, tReg);
-#endif
+            loadValueDirectWideFixed(cUnit, rlSrc1, rARG0, rARG1);
 #if !defined(TARGET_X86)
             opReg(cUnit, kOpBlx, rTgt);
             oatFreeTemp(cUnit, rTgt);
+#else
+            opThreadMem(cUnit, kOpBlx, funcOffset);
 #endif
         } else {
             callRuntimeHelperRegLocationRegLocation(cUnit, funcOffset,
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index 671eb73..205a65a 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -186,22 +186,22 @@
         case Instruction::NOP:
             break;
 
-        case Instruction::MOVE_EXCEPTION:
-#if defined(TARGET_X86)
-            UNIMPLEMENTED(WARNING) << "Instruction::MOVE_EXCEPTION";
-#else
-            int exOffset;
-            int resetReg;
-            exOffset = Thread::ExceptionOffset().Int32Value();
-            resetReg = oatAllocTemp(cUnit);
+        case Instruction::MOVE_EXCEPTION: {
+            int exOffset = Thread::ExceptionOffset().Int32Value();
             rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+#if defined(TARGET_X86)
+            newLIR2(cUnit, kX86Mov32RT, rlResult.lowReg, exOffset);
+            newLIR2(cUnit, kX86Mov32TI, exOffset, 0);
+#else
+            int resetReg = oatAllocTemp(cUnit);
             loadWordDisp(cUnit, rSELF, exOffset, rlResult.lowReg);
             loadConstant(cUnit, resetReg, 0);
             storeWordDisp(cUnit, rSELF, exOffset, resetReg);
             storeValue(cUnit, rlDest, rlResult);
+            oatFreeTemp(cUnit, resetReg);
 #endif
             break;
-
+        }
         case Instruction::RETURN_VOID:
             if (!cUnit->attrs & METHOD_IS_LEAF) {
                 genSuspendTest(cUnit, mir);
diff --git a/src/compiler/codegen/x86/ArchFactory.cc b/src/compiler/codegen/x86/ArchFactory.cc
index eec1cbd..bd95afb 100644
--- a/src/compiler/codegen/x86/ArchFactory.cc
+++ b/src/compiler/codegen/x86/ArchFactory.cc
@@ -24,42 +24,6 @@
 
 namespace art {
 
-bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    /*
-     *  [v1 v0] =  [a1 a0] + [a3 a2];
-     *    add v0,a2,a0
-     *    adc v1,a3,a1
-     */
-
-    opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc2.lowReg, rlSrc1.lowReg);
-    opRegRegReg(cUnit, kOpAdc, rlResult.highReg, rlSrc2.highReg, rlSrc1.highReg);
-    storeValueWide(cUnit, rlDest, rlResult);
-    return false;
-}
-
-bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
-                RegLocation rlSrc1, RegLocation rlSrc2)
-{
-    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
-    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-    /*
-     *  [v1 v0] =  [a1 a0] - [a3 a2];
-     *    sub    v0,a0,a2
-     *    sbb    v1,a1,a3
-     */
-
-    opRegRegReg(cUnit, kOpSub, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
-    opRegRegReg(cUnit, kOpSbc, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg);
-    storeValueWide(cUnit, rlDest, rlResult);
-    return false;
-}
-
 bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
                 RegLocation rlSrc)
 {
diff --git a/src/compiler/codegen/x86/Assemble.cc b/src/compiler/codegen/x86/Assemble.cc
index b9dd978..b62b5b4 100644
--- a/src/compiler/codegen/x86/Assemble.cc
+++ b/src/compiler/codegen/x86/Assemble.cc
@@ -279,6 +279,10 @@
   { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
   { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP,     { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
 
+  // TODO: load/store?
+  // Encode the modrm opcode as an extra opcode byte to avoid computation during assembly.
+  { kX86Mfence, kReg,                 NO_OPERAND,     { 0, 0, 0x0F, 0xAE, 0, 6, 0, 0 }, "Mfence", "" },
+
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF),
   EXT_0F_ENCODING_MAP(Movzx8,  0x00, 0xB6),
@@ -945,12 +949,12 @@
   LIR *lir;
   AssemblerStatus res = kSuccess;  // Assume success
 
+  const bool kVerbosePcFixup = false;
   for (lir = (LIR *) cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
     if (lir->opcode < 0) {
       continue;
     }
 
-
     if (lir->flags.isNop) {
       continue;
     }
@@ -970,8 +974,10 @@
           intptr_t target = targetLIR->offset;
           delta = target - pc;
           if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            LOG(INFO) << "Retry for JCC growth at " << lir->offset
-                << " delta: " << delta << " old delta: " << lir->operands[0];
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for JCC growth at " << lir->offset
+                  << " delta: " << delta << " old delta: " << lir->operands[0];
+            }
             lir->opcode = kX86Jcc32;
             oatSetupResourceMasks(lir);
             res = kRetryAll;
@@ -994,10 +1000,14 @@
           if (!(cUnit->disableOpt & (1 << kSafeOptimizations)) && lir->operands[0] == 0) {
             // Useless branch
             lir->flags.isNop = true;
-            LOG(INFO) << "Retry for useless branch at " << lir->offset;
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for useless branch at " << lir->offset;
+            }
             res = kRetryAll;
           } else if (IS_SIMM8(delta) != IS_SIMM8(lir->operands[0])) {
-            LOG(INFO) << "Retry for JMP growth at " << lir->offset;
+            if (kVerbosePcFixup) {
+              LOG(INFO) << "Retry for JMP growth at " << lir->offset;
+            }
             lir->opcode = kX86Jmp32;
             oatSetupResourceMasks(lir);
             res = kRetryAll;
@@ -1028,8 +1038,14 @@
         DCHECK_EQ(0, entry->skeleton.prefix1);
         DCHECK_EQ(0, entry->skeleton.prefix2);
         cUnit->codeBuffer.push_back(entry->skeleton.opcode);
-        DCHECK_EQ(0, entry->skeleton.extra_opcode1);
-        DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+        if (entry->skeleton.extra_opcode1 != 0) {
+          cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode1);
+          if (entry->skeleton.extra_opcode2 != 0) {
+            cUnit->codeBuffer.push_back(entry->skeleton.extra_opcode2);
+          }
+        } else {
+          DCHECK_EQ(0, entry->skeleton.extra_opcode2);
+        }
         DCHECK_EQ(0, entry->skeleton.modrm_opcode);
         DCHECK_EQ(0, entry->skeleton.ax_opcode);
         DCHECK_EQ(0, entry->skeleton.immediate_bytes);
diff --git a/src/compiler/codegen/x86/FP/X86FP.cc b/src/compiler/codegen/x86/FP/X86FP.cc
index c916640..29aaeeb 100644
--- a/src/compiler/codegen/x86/FP/X86FP.cc
+++ b/src/compiler/codegen/x86/FP/X86FP.cc
@@ -204,8 +204,8 @@
     srcReg2 = S2D(rlSrc2.lowReg, rlSrc2.highReg);
   }
   rlDest = oatGetDest(cUnit, mir, 0);
-  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
-  opRegImm(cUnit, kOpMov, rlResult.lowReg, unorderedGt ? 1 : 0);
+  RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+  loadConstantNoClobber(cUnit, rlResult.lowReg, unorderedGt ? 1 : 0);
   if (single) {
     newLIR2(cUnit, kX86UcomissRR, srcReg1, srcReg2);
   } else {
diff --git a/src/compiler/codegen/x86/X86/Factory.cc b/src/compiler/codegen/x86/X86/Factory.cc
index aef5879..4987c28 100644
--- a/src/compiler/codegen/x86/X86/Factory.cc
+++ b/src/compiler/codegen/x86/X86/Factory.cc
@@ -147,6 +147,7 @@
 LIR *opRegImm(CompilationUnit *cUnit, OpKind op, int rDestSrc1, int value) {
   X86OpCode opcode = kX86Bkpt;
   bool byteImm = IS_SIMM8(value);
+  DCHECK(!FPREG(rDestSrc1));
   switch (op) {
     case kOpLsl: opcode = kX86Sal32RI; break;
     case kOpLsr: opcode = kX86Shr32RI; break;
@@ -159,15 +160,7 @@
     case kOpSub: opcode = byteImm ? kX86Sub32RI8 : kX86Sub32RI; break;
     case kOpXor: opcode = byteImm ? kX86Xor32RI8 : kX86Xor32RI; break;
     case kOpCmp: opcode = byteImm ? kX86Cmp32RI8 : kX86Cmp32RI; break;
-    case kOpMov: {
-      if (value == 0) {  // turn "mov reg, 0" into "xor reg, reg"
-        opcode = kX86Xor32RR;
-        value = rDestSrc1;
-      } else {
-        opcode = kX86Mov32RI;
-      }
-      break;
-    }
+    case kOpMov: return loadConstantNoClobber(cUnit, rDestSrc1, value);
     case kOpMul:
       opcode = byteImm ? kX86Imul32RRI8 : kX86Imul32RRI;
       return newLIR3(cUnit, opcode, rDestSrc1, rDestSrc1, value);
@@ -410,59 +403,6 @@
 #endif
 }
 
-/* store value base base + scaled index. */
-LIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase,
-                                int rIndex, int rSrc, int scale, OpSize size)
-{
-    UNIMPLEMENTED(WARNING) << "storeBaseIndexed";
-    return NULL;
-#if 0
-    LIR *first = NULL;
-    LIR *res;
-    X86OpCode opcode = kX86Nop;
-    int rNewIndex = rIndex;
-    int tReg = oatAllocTemp(cUnit);
-
-    if (FPREG(rSrc)) {
-        DCHECK(SINGLEREG(rSrc));
-        DCHECK((size == kWord) || (size == kSingle));
-        size = kSingle;
-    } else {
-        if (size == kSingle)
-            size = kWord;
-    }
-
-    if (!scale) {
-        first = newLIR3(cUnit, kX86Addu, tReg , rBase, rIndex);
-    } else {
-        first = opRegRegImm(cUnit, kOpLsl, tReg, rIndex, scale);
-        newLIR3(cUnit, kX86Addu, tReg , rBase, tReg);
-    }
-
-    switch (size) {
-        case kSingle:
-            opcode = kX86Fswc1;
-            break;
-        case kWord:
-            opcode = kX86Sw;
-            break;
-        case kUnsignedHalf:
-        case kSignedHalf:
-            opcode = kX86Sh;
-            break;
-        case kUnsignedByte:
-        case kSignedByte:
-            opcode = kX86Sb;
-            break;
-        default:
-            LOG(FATAL) << "Bad case in storeBaseIndexed";
-    }
-    res = newLIR3(cUnit, opcode, rSrc, 0, tReg);
-    oatFreeTemp(cUnit, rNewIndex);
-    return first;
-#endif
-}
-
 LIR *loadMultiple(CompilationUnit *cUnit, int rBase, int rMask)
 {
     UNIMPLEMENTED(WARNING) << "loadMultiple";
@@ -686,6 +626,14 @@
   return store;
 }
 
+/* store value base base + scaled index. */
+LIR *storeBaseIndexed(CompilationUnit *cUnit, int rBase, int rIndex, int rSrc, int scale,
+                      OpSize size)
+{
+  return storeBaseIndexedDisp(cUnit, NULL, rBase, rIndex, scale, 0,
+                              rSrc, INVALID_REG, size, INVALID_SREG);
+}
+
 LIR *storeBaseDisp(CompilationUnit *cUnit, int rBase, int displacement, int rSrc, OpSize size) {
     return storeBaseIndexedDisp(cUnit, NULL, rBase, INVALID_REG, 0, displacement,
                                 rSrc, INVALID_REG, size, INVALID_SREG);
diff --git a/src/compiler/codegen/x86/X86LIR.h b/src/compiler/codegen/x86/X86LIR.h
index 9b9fc6b..378c24d 100644
--- a/src/compiler/codegen/x86/X86LIR.h
+++ b/src/compiler/codegen/x86/X86LIR.h
@@ -436,6 +436,7 @@
     Binary0fOpCode(kX86Movdxr),     // move into xmm from gpr
     Binary0fOpCode(kX86Movdrx),     // move into reg from xmm
     kX86Set8R, kX86Set8M, kX86Set8A,// set byte depending on condition operand
+    kX86Mfence,                     // memory barrier
     Binary0fOpCode(kX86Imul16),     // 16bit multiply
     Binary0fOpCode(kX86Imul32),     // 32bit multiply
     Binary0fOpCode(kX86Movzx8),     // zero-extend 8-bit value
diff --git a/src/compiler/codegen/x86/x86/ArchVariant.cc b/src/compiler/codegen/x86/x86/ArchVariant.cc
index 944311c..2bb84d7 100644
--- a/src/compiler/codegen/x86/x86/ArchVariant.cc
+++ b/src/compiler/codegen/x86/x86/ArchVariant.cc
@@ -49,13 +49,11 @@
     return res;
 }
 
-void oatGenMemBarrier(CompilationUnit *cUnit, int barrierKind)
+void oatGenMemBarrier(CompilationUnit *cUnit, int /* barrierKind */)
 {
 #if ANDROID_SMP != 0
-    UNIMPLEMENTED(WARNING) << "oatGenMemBarrier";
-#if 0
-    newLIR1(cUnit, kX86Sync, barrierKind);
-#endif
+    // TODO: optimize fences
+    newLIR0(cUnit, kX86Mfence);
 #endif
 }
 
diff --git a/src/runtime_support.h b/src/runtime_support.h
index c74b9c5..00ca5bd 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -43,10 +43,10 @@
 void ObjectInitFromCode(Object* o);
 extern void LockObjectFromCode(Thread* thread, Object* obj);
 uint32_t TraceMethodUnwindFromCode(Thread* self);
-extern int CmpgDouble(double a, double b);
-extern int CmplDouble(double a, double b);
-extern int CmpgFloat(float a, float b);
-extern int CmplFloat(float a, float b);
+extern int32_t CmpgDouble(double a, double b);
+extern int32_t CmplDouble(double a, double b);
+extern int32_t CmpgFloat(float a, float b);
+extern int32_t CmplFloat(float a, float b);
 extern int64_t D2L(double d);
 extern int64_t F2L(float f);
 
@@ -106,14 +106,14 @@
   extern "C" void art_work_around_app_jni_bugs();
 
   /* Conversions */
-  extern "C" float __aeabi_i2f(int op1);             // INT_TO_FLOAT
-  extern "C" int __aeabi_f2iz(float op1);            // FLOAT_TO_INT
+  extern "C" float __aeabi_i2f(int32_t op1);         // INT_TO_FLOAT
+  extern "C" int32_t __aeabi_f2iz(float op1);        // FLOAT_TO_INT
   extern "C" float __aeabi_d2f(double op1);          // DOUBLE_TO_FLOAT
   extern "C" double __aeabi_f2d(float op1);          // FLOAT_TO_DOUBLE
-  extern "C" double __aeabi_i2d(int op1);            // INT_TO_DOUBLE
-  extern "C" int __aeabi_d2iz(double op1);           // DOUBLE_TO_INT
-  extern "C" float __aeabi_l2f(long op1);            // LONG_TO_FLOAT
-  extern "C" double __aeabi_l2d(long op1);           // LONG_TO_DOUBLE
+  extern "C" double __aeabi_i2d(int32_t op1);        // INT_TO_DOUBLE
+  extern "C" int32_t __aeabi_d2iz(double op1);       // DOUBLE_TO_INT
+  extern "C" float __aeabi_l2f(int64_t op1);         // LONG_TO_FLOAT
+  extern "C" double __aeabi_l2d(int64_t op1);        // LONG_TO_DOUBLE
 
   /* Single-precision FP arithmetics */
   extern "C" float __aeabi_fadd(float a, float b);   // ADD_FLOAT[_2ADDR]
@@ -130,8 +130,8 @@
   extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
   /* Integer arithmetics */
-  extern "C" int __aeabi_idivmod(int op1, int op2);  // REM_INT[_2ADDR|_LIT8|_LIT16]
-  extern "C" int __aeabi_idiv(int op1, int op2);     // DIV_INT[_2ADDR|_LIT8|_LIT16]
+  extern "C" int __aeabi_idivmod(int32_t op1, int32_t op2);  // REM_INT[_2ADDR|_LIT8|_LIT16]
+  extern "C" int __aeabi_idiv(int32_t op1, int32_t op2);     // DIV_INT[_2ADDR|_LIT8|_LIT16]
 
   /* Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR] */
   extern "C" long long __aeabi_ldivmod(long long op1, long long op2);
@@ -144,16 +144,16 @@
 
 #if defined(__mips__)
   /* Conversions */
-  extern "C" float __floatsisf(int op1);             // INT_TO_FLOAT
-  extern "C" int __fixsfsi(float op1);               // FLOAT_TO_INT
-  extern "C" float __truncdfsf2(double op1);         // DOUBLE_TO_FLOAT
-  extern "C" double __extendsfdf2(float op1);        // FLOAT_TO_DOUBLE
-  extern "C" double __floatsidf(int op1);            // INT_TO_DOUBLE
-  extern "C" int __fixdfsi(double op1);              // DOUBLE_TO_INT
-  extern "C" float __floatdisf(long long op1);       // LONG_TO_FLOAT
-  extern "C" double __floatdidf(long long op1);      // LONG_TO_DOUBLE
-  extern "C" long long __fixsfdi(float op1);         // FLOAT_TO_LONG
-  extern "C" long long __fixdfdi(double op1);        // DOUBLE_TO_LONG
+  extern "C" float __floatsisf(int op1);        // INT_TO_FLOAT
+  extern "C" int32_t __fixsfsi(float op1);      // FLOAT_TO_INT
+  extern "C" float __truncdfsf2(double op1);    // DOUBLE_TO_FLOAT
+  extern "C" double __extendsfdf2(float op1);   // FLOAT_TO_DOUBLE
+  extern "C" double __floatsidf(int op1);       // INT_TO_DOUBLE
+  extern "C" int32_t __fixdfsi(double op1);     // DOUBLE_TO_INT
+  extern "C" float __floatdisf(int64_t op1);    // LONG_TO_FLOAT
+  extern "C" double __floatdidf(int64_t op1);   // LONG_TO_DOUBLE
+  extern "C" int64_t __fixsfdi(float op1);      // FLOAT_TO_LONG
+  extern "C" int64_t __fixdfdi(double op1);     // DOUBLE_TO_LONG
 
   /* Single-precision FP arithmetics */
   extern "C" float __addsf3(float a, float b);   // ADD_FLOAT[_2ADDR]
@@ -170,8 +170,8 @@
   extern "C" double fmod(double a, double b);     // REM_DOUBLE[_2ADDR]
 
   /* Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR] */
-  extern "C" long long __divdi3(long long op1, long long op2);
-  extern "C" long long __moddi3(long long op1, long long op2);
+  extern "C" long long __divdi3(int64_t op1, int64_t op2);
+  extern "C" long long __moddi3(int64_t op1, int64_t op2);
 #endif
 
 #endif  // ART_SRC_RUNTIME_SUPPORT_H_
diff --git a/src/thread.cc b/src/thread.cc
index e1a8946..353f94d 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -121,6 +121,11 @@
   pDdiv = __aeabi_ddiv;
   pDmul = __aeabi_dmul;
   pFmod = fmod;
+  pLadd = NULL;
+  pLsub = NULL;
+  pLand = NULL;
+  pLor = NULL;
+  pLxor = NULL;
   pLdivmod = __aeabi_ldivmod;
   pLmul = __aeabi_lmul;
   pAllocArrayFromCode = art_alloc_array_from_code;
@@ -189,6 +194,11 @@
   pDdiv = NULL;
   pDmul = NULL;
   pFmod = NULL;
+  pLadd = NULL;
+  pLsub = NULL;
+  pLand = NULL;
+  pLor = NULL;
+  pLxor = NULL;
   pLdivmod = NULL;
   pLmul = NULL;
   pAllocArrayFromCode = NULL;
diff --git a/src/thread.h b/src/thread.h
index 51385f9..c35d26a 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -586,10 +586,10 @@
   double (*pF2d)(float);
   double (*pI2d)(int);
   int (*pD2iz)(double);
-  float (*pL2f)(long);
-  double (*pL2d)(long);
-  long long (*pF2l)(float);
-  long long (*pD2l)(double);
+  float (*pL2f)(int64_t);
+  double (*pL2d)(int64_t);
+  int64_t (*pF2l)(float);
+  int64_t (*pD2l)(double);
   float (*pFadd)(float, float);
   float (*pFsub)(float, float);
   float (*pFdiv)(float, float);
@@ -602,8 +602,13 @@
   double (*pFmod)(double, double);
   int (*pIdivmod)(int, int);
   int (*pIdiv)(int, int);
-  long long (*pLmul)(long long, long long);
-  long long (*pLdivmod)(long long, long long);
+  int64_t (*pLadd)(int64_t, int64_t);
+  int64_t (*pLsub)(int64_t, int64_t);
+  int64_t (*pLand)(int64_t, int64_t);
+  int64_t (*pLor)(int64_t, int64_t);
+  int64_t (*pLxor)(int64_t, int64_t);
+  int64_t (*pLmul)(int64_t, int64_t);
+  int64_t (*pLdivmod)(int64_t, int64_t);
   void (*pCheckSuspendFromCode)(Thread*);  // Stub that is called when the suspend count is non-zero
   void (*pTestSuspendFromCode)();  // Stub that is periodically called to test the suspend count
   void* (*pAllocObjectFromCode)(uint32_t, void*);