Clean up the codegen for invoking helper callout functions.

All invoked functions are documented in compiler/codegen/arm/CalloutHelper.h
Bug: 2567981

Change-Id: Ia7cd4107272df1b0b5588fbcc0aafcc6d0723d60
diff --git a/vm/compiler/Utility.c b/vm/compiler/Utility.c
index b2654c4..cbafb79 100644
--- a/vm/compiler/Utility.c
+++ b/vm/compiler/Utility.c
@@ -265,7 +265,6 @@
     return true;
 }
 
-/* FIXME */
 void dvmDebugBitVector(char *msg, const BitVector *bv, int length)
 {
     int i;
diff --git a/vm/compiler/codegen/arm/CalloutHelper.h b/vm/compiler/codegen/arm/CalloutHelper.h
new file mode 100644
index 0000000..f6d5f4e
--- /dev/null
+++ b/vm/compiler/codegen/arm/CalloutHelper.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Dalvik.h"
+
+#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H
+#define _DALVIK_VM_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H
+
+/*
+ * Declare/comment prototypes of all native callout functions invoked by the
+ * JIT'ed code here and use the LOAD_FUNC_ADDR macro to load the address into
+ * a register. In this way we have a centralized place to find out all native
+ * helper functions and we can grep for LOAD_FUNC_ADDR to find out all the
+ * callsites.
+ */
+
+/* Load a statically compiled function address as a constant */
+#define LOAD_FUNC_ADDR(cUnit, reg, addr) loadConstant(cUnit, reg, addr)
+
+/* Conversions */
+float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
+int __aeabi_f2iz(float op1);            // OP_FLOAT_TO_INT
+float __aeabi_d2f(double op1);          // OP_DOUBLE_TO_FLOAT
+double __aeabi_f2d(float op1);          // OP_FLOAT_TO_DOUBLE
+double __aeabi_i2d(int op1);            // OP_INT_TO_DOUBLE
+int __aeabi_d2iz(double op1);           // OP_DOUBLE_TO_INT
+float __aeabi_l2f(long op1);            // OP_LONG_TO_FLOAT
+double __aeabi_l2d(long op1);           // OP_LONG_TO_DOUBLE
+s8 dvmJitf2l(float op1);                // OP_FLOAT_TO_LONG
+s8 dvmJitd2l(double op1);               // OP_DOUBLE_TO_LONG
+
+/* Single-precision FP arithmetics */
+float __aeabi_fadd(float a, float b);   // OP_ADD_FLOAT[_2ADDR]
+float __aeabi_fsub(float a, float b);   // OP_SUB_FLOAT[_2ADDR]
+float __aeabi_fdiv(float a, float b);   // OP_DIV_FLOAT[_2ADDR]
+float __aeabi_fmul(float a, float b);   // OP_MUL_FLOAT[_2ADDR]
+float fmodf(float a, float b);          // OP_REM_FLOAT[_2ADDR]
+
+/* Double-precision FP arithmetics */
+double __aeabi_dadd(double a, double b); // OP_ADD_DOUBLE[_2ADDR]
+double __aeabi_dsub(double a, double b); // OP_SUB_DOUBLE[_2ADDR]
+double __aeabi_ddiv(double a, double b); // OP_DIV_DOUBLE[_2ADDR]
+double __aeabi_dmul(double a, double b); // OP_MUL_DOUBLE[_2ADDR]
+double fmod(double a, double b);         // OP_REM_DOUBLE[_2ADDR]
+
+/* Integer arithmetics */
+int __aeabi_idivmod(int op1, int op2);  // OP_REM_INT[_2ADDR|_LIT8|_LIT16]
+int __aeabi_idiv(int op1, int op2);     // OP_DIV_INT[_2ADDR|_LIT8|_LIT16]
+
+/* Long long arithmetics - OP_REM_LONG[_2ADDR] & OP_DIV_LONG[_2ADDR] */
+long long __aeabi_ldivmod(long long op1, long long op2);
+
+/* Originally declared in Sync.h */
+bool dvmUnlockObject(struct Thread* self, struct Object* obj); //OP_MONITOR_EXIT
+
+/* Originally declared in oo/TypeCheck.h */
+bool dvmCanPutArrayElement(const ClassObject* elemClass,   // OP_APUT_OBJECT
+                           const ClassObject* arrayClass);
+int dvmInstanceofNonTrivial(const ClassObject* instance,   // OP_CHECK_CAST &&
+                            const ClassObject* clazz);     // OP_INSTANCE_OF
+
+/* Originally declared in oo/Array.h */
+ArrayObject* dvmAllocArrayByClass(ClassObject* arrayClass, // OP_NEW_ARRAY
+                                  size_t length, int allocFlags);
+
+/* Originally declared in interp/InterpDefs.h */
+bool dvmInterpHandleFillArrayData(ArrayObject* arrayObject,// OP_FILL_ARRAY_DATA
+                                  const u2* arrayData);
+
+/* Switch dispatch offset calculation for OP_PACKED_SWITCH & OP_SPARSE_SWITCH */
+static s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc);
+static s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc);
+
+/*
+ * Resolve interface callsites - OP_INVOKE_INTERFACE & OP_INVOKE_INTERFACE_RANGE
+ *
+ * Originally declared in mterp/common/FindInterface.h and only comment it here
+ * due to the INLINE attribute.
+ *
+ * INLINE Method* dvmFindInterfaceMethodInCache(ClassObject* thisClass,
+ *  u4 methodIdx, const Method* method, DvmDex* methodClassDex)
+ */
+
+/* Originally declared in alloc/Alloc.h */
+Object* dvmAllocObject(ClassObject* clazz, int flags);  // OP_NEW_INSTANCE
+
+/*
+ * Functions declared in gDvmInlineOpsTable[] are used for
+ * OP_EXECUTE_INLINE & OP_EXECUTE_INLINE_RANGE.
+ *
+ *      org_apache_harmony_dalvik_NativeTestTarget_emptyInlineMethod
+ *      javaLangString_charAt
+ *      javaLangString_compareTo
+ *      javaLangString_equals
+ *      javaLangString_indexOf_I
+ *      javaLangString_indexOf_II
+ *      javaLangString_length
+ *      javaLangMath_abs_int
+ *      javaLangMath_abs_long
+ *      javaLangMath_abs_float
+ *      javaLangMath_abs_double
+ *      javaLangMath_min_int
+ *      javaLangMath_max_int
+ *      javaLangMath_sqrt
+ *      javaLangMath_cos
+ *      javaLangMath_sin
+ */
+double sqrt(double x);  // INLINE_MATH_SQRT
+
+/*
+ * The following functions are invoked through the compiler templates (declared
+ * in compiler/template/armv5te/footer.S:
+ *
+ *      __aeabi_cdcmple         // CMPG_DOUBLE
+ *      __aeabi_cfcmple         // CMPG_FLOAT
+ *      dvmLockObject           // MONITOR_ENTER
+ */
+
+#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_CALLOUT_HELPER_H */
diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h
index 8a340e5..da65bb5 100644
--- a/vm/compiler/codegen/arm/Codegen.h
+++ b/vm/compiler/codegen/arm/Codegen.h
@@ -23,6 +23,7 @@
  */
 
 #include "compiler/CompilerIR.h"
+#include "CalloutHelper.h"
 
 /*
  * loadConstant() sometimes needs to add a small imm to a pre-existing constant
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 695f18c..0a59ea4 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -41,7 +41,7 @@
         rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1);
         loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
     }
-    loadConstant(cUnit, r2, (int)funct);
+    LOAD_FUNC_ADDR(cUnit, r2, (int)funct);
     opReg(cUnit, kOpBlx, r2);
     dvmCompilerClobberCallRegs(cUnit);
     if (tgtSize == 1) {
@@ -66,13 +66,6 @@
     RegLocation rlResult;
     void* funct;
 
-    /* TODO: use a proper include file to define these */
-    float __aeabi_fadd(float a, float b);
-    float __aeabi_fsub(float a, float b);
-    float __aeabi_fdiv(float a, float b);
-    float __aeabi_fmul(float a, float b);
-    float fmodf(float a, float b);
-
     switch (mir->dalvikInsn.opCode) {
         case OP_ADD_FLOAT_2ADDR:
         case OP_ADD_FLOAT:
@@ -104,7 +97,7 @@
     dvmCompilerFlushAllRegs(cUnit);   /* Send everything to home location */
     loadValueDirectFixed(cUnit, rlSrc1, r0);
     loadValueDirectFixed(cUnit, rlSrc2, r1);
-    loadConstant(cUnit, r2, (int)funct);
+    LOAD_FUNC_ADDR(cUnit, r2, (int)funct);
     opReg(cUnit, kOpBlx, r2);
     dvmCompilerClobberCallRegs(cUnit);
     rlResult = dvmCompilerGetReturn(cUnit);
@@ -119,13 +112,6 @@
     RegLocation rlResult;
     void* funct;
 
-    /* TODO: use a proper include file to define these */
-    double __aeabi_dadd(double a, double b);
-    double __aeabi_dsub(double a, double b);
-    double __aeabi_ddiv(double a, double b);
-    double __aeabi_dmul(double a, double b);
-    double fmod(double a, double b);
-
     switch (mir->dalvikInsn.opCode) {
         case OP_ADD_DOUBLE_2ADDR:
         case OP_ADD_DOUBLE:
@@ -155,7 +141,7 @@
             return true;
     }
     dvmCompilerFlushAllRegs(cUnit);   /* Send everything to home location */
-    loadConstant(cUnit, rlr, (int)funct);
+    LOAD_FUNC_ADDR(cUnit, rlr, (int)funct);
     loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
     loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
     opReg(cUnit, kOpBlx, rlr);
@@ -169,17 +155,6 @@
 {
     OpCode opCode = mir->dalvikInsn.opCode;
 
-    float  __aeabi_i2f(  int op1 );
-    int    __aeabi_f2iz( float op1 );
-    float  __aeabi_d2f(  double op1 );
-    double __aeabi_f2d(  float op1 );
-    double __aeabi_i2d(  int op1 );
-    int    __aeabi_d2iz( double op1 );
-    float  __aeabi_l2f(  long op1 );
-    double __aeabi_l2d(  long op1 );
-    s8 dvmJitf2l( float op1 );
-    s8 dvmJitd2l( double op1 );
-
     switch (opCode) {
         case OP_INT_TO_FLOAT:
             return genConversionCall(cUnit, mir, (void*)__aeabi_i2f, 1, 1);
@@ -536,7 +511,7 @@
 
     /* Get object to store */
     loadValueDirectFixed(cUnit, rlSrc, r0);
-    loadConstant(cUnit, r2, (int)dvmCanPutArrayElement);
+    LOAD_FUNC_ADDR(cUnit, r2, (int)dvmCanPutArrayElement);
 
     /* Are we storing null?  If so, avoid check */
     opRegImm(cUnit, kOpCmp, r0, 0);
@@ -616,8 +591,6 @@
     bool callOut = false;
     void *callTgt;
     int retReg = r0;
-    /* TODO - find proper .h file to declare these */
-    long long __aeabi_ldivmod(long long op1, long long op2);
 
     switch (mir->dalvikInsn.opCode) {
         case OP_NOT_LONG:
@@ -675,7 +648,7 @@
             int tReg = dvmCompilerAllocTemp(cUnit);
             rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadConstantValue(cUnit, tReg, 0);
+            loadConstantNoClobber(cUnit, tReg, 0);
             opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
                         tReg, rlSrc2.lowReg);
             opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
@@ -693,7 +666,7 @@
         // Adjust return regs in to handle case of rem returning r2/r3
         dvmCompilerFlushAllRegs(cUnit);   /* Send everything to home location */
         loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
-        loadConstant(cUnit, rlr, (int) callTgt);
+        LOAD_FUNC_ADDR(cUnit, rlr, (int) callTgt);
         loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
         opReg(cUnit, kOpBlx, rlr);
         dvmCompilerClobberCallRegs(cUnit);
@@ -719,10 +692,6 @@
     RegLocation rlResult;
     bool shiftOp = false;
 
-    /* TODO - find proper .h file to declare these */
-    int __aeabi_idivmod(int op1, int op2);
-    int __aeabi_idiv(int op1, int op2);
-
     switch (mir->dalvikInsn.opCode) {
         case OP_NEG_INT:
             op = kOpNeg;
@@ -817,7 +786,7 @@
         RegLocation rlResult;
         dvmCompilerFlushAllRegs(cUnit);   /* Send everything to home location */
         loadValueDirectFixed(cUnit, rlSrc2, r1);
-        loadConstant(cUnit, r2, (int) callTgt);
+        LOAD_FUNC_ADDR(cUnit, r2, (int) callTgt);
         loadValueDirectFixed(cUnit, rlSrc1, r0);
         if (checkZero) {
             genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
@@ -1340,7 +1309,7 @@
         genDispatchToHandler(cUnit, TEMPLATE_MONITOR_ENTER);
 #endif
     } else {
-        loadConstant(cUnit, r2, (int)dvmUnlockObject);
+        LOAD_FUNC_ADDR(cUnit, r2, (int)dvmUnlockObject);
         /* Do the call */
         opReg(cUnit, kOpBlx, r2);
         opRegImm(cUnit, kOpCmp, r0, 0); /* Did we throw? */
@@ -1409,7 +1378,7 @@
         case OP_CONST:
         case OP_CONST_4: {
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
-            loadConstantValue(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
+            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
             storeValue(cUnit, rlDest, rlResult);
             break;
         }
@@ -1417,7 +1386,7 @@
             //TUNING: single routine to load constant pair for support doubles
             //TUNING: load 0/-1 separately to avoid load dependency
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadConstantValue(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
+            loadConstantNoClobber(cUnit, rlResult.lowReg, mir->dalvikInsn.vB);
             opRegRegImm(cUnit, kOpAsr, rlResult.highReg,
                         rlResult.lowReg, 31);
             storeValueWide(cUnit, rlDest, rlResult);
@@ -1442,7 +1411,8 @@
 
     switch (mir->dalvikInsn.opCode) {
         case OP_CONST_HIGH16: {
-            loadConstantValue(cUnit, rlResult.lowReg, mir->dalvikInsn.vB << 16);
+            loadConstantNoClobber(cUnit, rlResult.lowReg,
+                                  mir->dalvikInsn.vB << 16);
             storeValue(cUnit, rlDest, rlResult);
             break;
         }
@@ -1479,7 +1449,7 @@
             assert(strPtr != NULL);
             rlDest = dvmCompilerGetDest(cUnit, mir, 0);
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadConstantValue(cUnit, rlResult.lowReg, (int) strPtr );
+            loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr );
             storeValue(cUnit, rlDest, rlResult);
             break;
         }
@@ -1489,7 +1459,7 @@
             assert(classPtr != NULL);
             rlDest = dvmCompilerGetDest(cUnit, mir, 0);
             rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-            loadConstantValue(cUnit, rlResult.lowReg, (int) classPtr );
+            loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr );
             storeValue(cUnit, rlDest, rlResult);
             break;
         }
@@ -1586,7 +1556,7 @@
             assert((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) == 0);
             dvmCompilerFlushAllRegs(cUnit);   /* Everything to home location */
             genExportPC(cUnit, mir);
-            loadConstant(cUnit, r2, (int)dvmAllocObject);
+            LOAD_FUNC_ADDR(cUnit, r2, (int)dvmAllocObject);
             loadConstant(cUnit, r0, (int) classPtr);
             loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
             opReg(cUnit, kOpBlx, r2);
@@ -1644,7 +1614,7 @@
              */
             /* r0 now contains object->clazz */
             loadWordDisp(cUnit, rlSrc.lowReg, offsetof(Object, clazz), r0);
-            loadConstant(cUnit, r2, (int)dvmInstanceofNonTrivial);
+            LOAD_FUNC_ADDR(cUnit, r2, (int)dvmInstanceofNonTrivial);
             opRegReg(cUnit, kOpCmp, r0, r1);
             ArmLIR *branch2 = opCondBranch(cUnit, kArmCondEq);
             opReg(cUnit, kOpBlx, r2);
@@ -1847,14 +1817,14 @@
     if (dalvikOpCode == OP_CONST_WIDE_16) {
         rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1);
         rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-        loadConstantValue(cUnit, rlResult.lowReg, BBBB);
+        loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB);
         //TUNING: do high separately to avoid load dependency
         opRegRegImm(cUnit, kOpAsr, rlResult.highReg, rlResult.lowReg, 31);
         storeValueWide(cUnit, rlDest, rlResult);
     } else if (dalvikOpCode == OP_CONST_16) {
         rlDest = dvmCompilerGetDest(cUnit, mir, 0);
         rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
-        loadConstantValue(cUnit, rlResult.lowReg, BBBB);
+        loadConstantNoClobber(cUnit, rlResult.lowReg, BBBB);
         storeValue(cUnit, rlDest, rlResult);
     } else
         return true;
@@ -1984,9 +1954,6 @@
     int shiftOp = false;
     bool isDiv = false;
 
-    int __aeabi_idivmod(int op1, int op2);
-    int __aeabi_idiv(int op1, int op2);
-
     switch (dalvikOpCode) {
         case OP_RSUB_INT_LIT8:
         case OP_RSUB_INT: {
@@ -2057,10 +2024,10 @@
             dvmCompilerClobber(cUnit, r0);
             if ((dalvikOpCode == OP_DIV_INT_LIT8) ||
                 (dalvikOpCode == OP_DIV_INT_LIT16)) {
-                loadConstant(cUnit, r2, (int)__aeabi_idiv);
+                LOAD_FUNC_ADDR(cUnit, r2, (int)__aeabi_idiv);
                 isDiv = true;
             } else {
-                loadConstant(cUnit, r2, (int)__aeabi_idivmod);
+                LOAD_FUNC_ADDR(cUnit, r2, (int)__aeabi_idivmod);
                 isDiv = false;
             }
             loadConstant(cUnit, r1, lit);
@@ -2116,7 +2083,7 @@
             genExportPC(cUnit, mir);
             loadValueDirectFixed(cUnit, rlSrc, r1);   /* Len */
             loadConstant(cUnit, r0, (int) classPtr );
-            loadConstant(cUnit, r3, (int)dvmAllocArrayByClass);
+            LOAD_FUNC_ADDR(cUnit, r3, (int)dvmAllocArrayByClass);
             /*
              * "len < 0": bail to the interpreter to re-execute the
              * instruction
@@ -2174,7 +2141,7 @@
             /* r1 now contains object->clazz */
             loadWordDisp(cUnit, r0, offsetof(Object, clazz), r1);
             /* r1 now contains object->clazz */
-            loadConstant(cUnit, r3, (int)dvmInstanceofNonTrivial);
+            LOAD_FUNC_ADDR(cUnit, r3, (int)dvmInstanceofNonTrivial);
             loadConstant(cUnit, r0, 1);                /* Assume true */
             opRegReg(cUnit, kOpCmp, r1, r2);
             ArmLIR *branch2 = opCondBranch(cUnit, kArmCondEq);
@@ -2440,7 +2407,7 @@
  * chaining cell for case default [8 bytes]
  * noChain exit
  */
-s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc)
+static s8 findPackedSwitchIndex(const u2* switchData, int testVal, int pc)
 {
     int size;
     int firstKey;
@@ -2492,7 +2459,7 @@
 }
 
 /* See comments for findPackedSwitchIndex */
-s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc)
+static s8 findSparseSwitchIndex(const u2* switchData, int testVal, int pc)
 {
     int size;
     const int *keys;
@@ -2557,7 +2524,7 @@
             dvmCompilerFlushAllRegs(cUnit);   /* Everything to home location */
             genExportPC(cUnit, mir);
             loadValueDirectFixed(cUnit, rlSrc, r0);
-            loadConstant(cUnit, r2, (int)dvmInterpHandleFillArrayData);
+            LOAD_FUNC_ADDR(cUnit, r2, (int)dvmInterpHandleFillArrayData);
             loadConstant(cUnit, r1,
                (int) (cUnit->method->insns + mir->offset + mir->dalvikInsn.vB));
             opReg(cUnit, kOpBlx, r2);
@@ -2589,9 +2556,9 @@
             u2 size = switchData[1];
 
             if (dalvikOpCode == OP_PACKED_SWITCH) {
-                loadConstant(cUnit, r4PC, (int)findPackedSwitchIndex);
+                LOAD_FUNC_ADDR(cUnit, r4PC, (int)findPackedSwitchIndex);
             } else {
-                loadConstant(cUnit, r4PC, (int)findSparseSwitchIndex);
+                LOAD_FUNC_ADDR(cUnit, r4PC, (int)findSparseSwitchIndex);
             }
             /* r0 <- Addr of the switch data */
             loadConstant(cUnit, r0,
@@ -2648,7 +2615,6 @@
          * calleeMethod = method->clazz->super->vtable[method->clazz->pDvmDex
          *                ->pResMethods[BBBB]->methodIndex]
          */
-        /* TODO - not excersized in RunPerf.jar */
         case OP_INVOKE_SUPER:
         case OP_INVOKE_SUPER_RANGE: {
             int mIndex = cUnit->method->clazz->pDvmDex->
@@ -2847,8 +2813,8 @@
             /* r3 = pDvmDex */
             loadConstant(cUnit, r3, (int) cUnit->method->clazz->pDvmDex);
 
-            loadConstant(cUnit, r7,
-                         (intptr_t) dvmFindInterfaceMethodInCache);
+            LOAD_FUNC_ADDR(cUnit, r7,
+                           (intptr_t) dvmFindInterfaceMethodInCache);
             opReg(cUnit, kOpBlx, r7);
 
             /* r0 = calleeMethod (returned from dvmFindInterfaceMethodInCache */
@@ -3174,7 +3140,7 @@
             dvmCompilerClobber(cUnit, r7);
             opRegRegImm(cUnit, kOpAdd, r4PC, rGLUE, offset);
             opImm(cUnit, kOpPush, (1<<r4PC) | (1<<r7));
-            loadConstant(cUnit, r4PC, (int)inLineTable[operation].func);
+            LOAD_FUNC_ADDR(cUnit, r4PC, (int)inLineTable[operation].func);
             genExportPC(cUnit, mir);
             for (i=0; i < dInsn->vA; i++) {
                 loadValueDirect(cUnit, dvmCompilerGetSrc(cUnit, mir, i), i);
@@ -3202,10 +3168,10 @@
     //TUNING: We're using core regs here - not optimal when target is a double
     RegLocation rlDest = dvmCompilerGetDestWide(cUnit, mir, 0, 1);
     RegLocation rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
-    loadConstantValue(cUnit, rlResult.lowReg,
-                      mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL);
-    loadConstantValue(cUnit, rlResult.highReg,
-                      (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL);
+    loadConstantNoClobber(cUnit, rlResult.lowReg,
+                          mir->dalvikInsn.vB_wide & 0xFFFFFFFFUL);
+    loadConstantNoClobber(cUnit, rlResult.highReg,
+                          (mir->dalvikInsn.vB_wide>>32) & 0xFFFFFFFFUL);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
 }
diff --git a/vm/compiler/codegen/arm/FP/Thumb2VFP.c b/vm/compiler/codegen/arm/FP/Thumb2VFP.c
index 42d0657..9149646 100644
--- a/vm/compiler/codegen/arm/FP/Thumb2VFP.c
+++ b/vm/compiler/codegen/arm/FP/Thumb2VFP.c
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <math.h>  // for double sqrt(double)
-
 static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir,
                             RegLocation rlDest, RegLocation rlSrc1,
                             RegLocation rlSrc2)
@@ -197,7 +195,7 @@
     newLIR0(cUnit, kThumb2Fmstat);
     branch = newLIR2(cUnit, kThumbBCond, 0, kArmCondEq);
     dvmCompilerClobberCallRegs(cUnit);
-    loadConstant(cUnit, r2, (int)sqrt);
+    LOAD_FUNC_ADDR(cUnit, r2, (int)sqrt);
     newLIR3(cUnit, kThumb2Fmrrd, r0, r1, S2D(rlSrc.lowReg, rlSrc.highReg));
     newLIR1(cUnit, kThumbBlxR, r2);
     newLIR3(cUnit, kThumb2Fmdrr, S2D(rlResult.lowReg, rlResult.highReg),
diff --git a/vm/compiler/codegen/arm/FP/ThumbVFP.c b/vm/compiler/codegen/arm/FP/ThumbVFP.c
index adf5fa1..39db549 100644
--- a/vm/compiler/codegen/arm/FP/ThumbVFP.c
+++ b/vm/compiler/codegen/arm/FP/ThumbVFP.c
@@ -19,7 +19,6 @@
  * variant-specific code.
  */
 
-/* FIXME */
 extern void dvmCompilerFlushRegWideForV5TEVFP(CompilationUnit *cUnit,
                                               int reg1, int reg2);
 extern void dvmCompilerFlushRegForV5TEVFP(CompilationUnit *cUnit, int reg);
diff --git a/vm/compiler/codegen/arm/Thumb/Factory.c b/vm/compiler/codegen/arm/Thumb/Factory.c
index ba64922..4c010c6 100644
--- a/vm/compiler/codegen/arm/Thumb/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb/Factory.c
@@ -25,7 +25,6 @@
 static int coreTemps[] = {r0, r1, r2, r3, r4PC, r7};
 static int corePreserved[] = {};
 
-/* FIXME - circular dependency */
 static void storePair(CompilationUnit *cUnit, int base, int lowReg,
                       int highReg);
 static void loadPair(CompilationUnit *cUnit, int base, int lowReg, int highReg);
@@ -43,8 +42,13 @@
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.  If target is
  * a high register, build constant into a low register and copy.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) rDest is freshly returned from dvmCompilerAllocTemp or
+ * 2) The codegen is under fixed register usage
  */
-static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value)
+static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest,
+                                     int value)
 {
     ArmLIR *res;
     int tDest = LOWREG(rDest) ? rDest : dvmCompilerAllocTemp(cUnit);
@@ -113,7 +117,7 @@
         dvmCompilerClobber(cUnit, rDest);
         dvmCompilerMarkInUse(cUnit, rDest);
     }
-    return loadConstantValue(cUnit, rDest, value);
+    return loadConstantNoClobber(cUnit, rDest, value);
 }
 
 static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op)
@@ -436,8 +440,8 @@
                                      int rDestHi, int valLo, int valHi)
 {
     ArmLIR *res;
-    res = loadConstantValue(cUnit, rDestLo, valLo);
-    loadConstantValue(cUnit, rDestHi, valHi);
+    res = loadConstantNoClobber(cUnit, rDestLo, valLo);
+    loadConstantNoClobber(cUnit, rDestHi, valHi);
     return res;
 }
 
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c
index eb36193..0141a0f 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.c
@@ -132,8 +132,13 @@
 /*
  * Load a immediate using a shortcut if possible; otherwise
  * grab from the per-translation literal pool.
+ *
+ * No additional register clobbering operation performed. Use this version when
+ * 1) rDest is freshly returned from dvmCompilerAllocTemp or
+ * 2) The codegen is under fixed register usage
  */
-static ArmLIR *loadConstantValue(CompilationUnit *cUnit, int rDest, int value)
+static ArmLIR *loadConstantNoClobber(CompilationUnit *cUnit, int rDest,
+                                     int value)
 {
     ArmLIR *res;
     int modImm;
@@ -206,7 +211,7 @@
         dvmCompilerClobber(cUnit, rDest);
         dvmCompilerMarkInUse(cUnit, rDest);
     }
-    return loadConstantValue(cUnit, rDest, value);
+    return loadConstantNoClobber(cUnit, rDest, value);
 }
 
 static ArmLIR *opNone(CompilationUnit *cUnit, OpKind op)
@@ -661,8 +666,8 @@
         res = newLIR2(cUnit, kThumb2Vmovd_IMM8, S2D(rDestLo, rDestHi),
                       encodedImm);
     } else {
-        res = loadConstantValue(cUnit, rDestLo, valLo);
-        loadConstantValue(cUnit, rDestHi, valHi);
+        res = loadConstantNoClobber(cUnit, rDestLo, valLo);
+        loadConstantNoClobber(cUnit, rDestHi, valHi);
     }
     return res;
 }
diff --git a/vm/compiler/codegen/arm/Thumb2/Gen.c b/vm/compiler/codegen/arm/Thumb2/Gen.c
index 93120d6..1782bec 100644
--- a/vm/compiler/codegen/arm/Thumb2/Gen.c
+++ b/vm/compiler/codegen/arm/Thumb2/Gen.c
@@ -284,7 +284,7 @@
     // Export PC (part 1)
     loadConstant(cUnit, r3, (int) (cUnit->method->insns + mir->offset));
 
-    loadConstant(cUnit, r7, (int)dvmUnlockObject);
+    LOAD_FUNC_ADDR(cUnit, r7, (int)dvmUnlockObject);
     // Export PC (part 2)
     newLIR3(cUnit, kThumb2StrRRI8Predec, r3, rFP,
             sizeof(StackSaveArea) -