Merge "MIPS switch table support" into dalvik-dev
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 013fd1d..4c8bd44 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -482,7 +482,7 @@
     int offset;
     const u2* table;            // Original dex table
     int vaddr;                  // Dalvik offset of switch opcode
-    LIR* bxInst;                // Switch indirect branch instruction
+    LIR* anchor;                // Reference instruction for relative offsets
     LIR** targets;              // Array of case targets
 } SwitchTable;
 
diff --git a/src/compiler/codegen/CodegenFactory.cc b/src/compiler/codegen/CodegenFactory.cc
index a42ebc3..e467ea0 100644
--- a/src/compiler/codegen/CodegenFactory.cc
+++ b/src/compiler/codegen/CodegenFactory.cc
@@ -256,7 +256,6 @@
     storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
                      kUnsignedByte);
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branchOver->target = (LIR*)target;
     oatFreeTemp(cUnit, regCardBase);
     oatFreeTemp(cUnit, regCardNo);
diff --git a/src/compiler/codegen/CodegenUtil.cc b/src/compiler/codegen/CodegenUtil.cc
index cc61f78..8a38db4 100644
--- a/src/compiler/codegen/CodegenUtil.cc
+++ b/src/compiler/codegen/CodegenUtil.cc
@@ -421,6 +421,10 @@
     insn->opcode = opcode;
     setupResourceMasks(insn);
     insn->dalvikOffset = cUnit->currentDalvikOffset;
+    if (opcode == kPseudoTargetLabel) {
+        // Always make labels scheduling barriers
+        insn->defMask = ENCODE_ALL;
+    }
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
 }
@@ -472,7 +476,6 @@
     return insn;
 }
 
-#if defined(TARGET_ARM)
 LIR* newLIR4(CompilationUnit* cUnit, int opcode,
                            int dest, int src1, int src2, int info)
 {
@@ -489,7 +492,6 @@
     oatAppendLIR(cUnit, (LIR*) insn);
     return insn;
 }
-#endif
 
 /*
  * Search the existing constants in the literal pool for an exact or close match
@@ -587,7 +589,17 @@
              &iterator);
         if (tabRec == NULL) break;
         alignBuffer(cUnit->codeBuffer, tabRec->offset);
-        int bxOffset = tabRec->bxInst->offset + 4;
+        /*
+         * For Arm, our reference point is the address of the bx
+         * instruction that does the launch, so we have to subtract
+         * the auto pc-advance.  For other targets the reference point
+         * is a label, so we can use the offset as-is.
+         */
+#if defined(TARGET_ARM)
+        int bxOffset = tabRec->anchor->offset + 4;
+#else
+        int bxOffset = tabRec->anchor->offset;
+#endif
         if (cUnit->printMe) {
             LOG(INFO) << "Switch table for offset 0x" << std::hex << bxOffset;
         }
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 4d80a69..d0063fa 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -343,7 +343,6 @@
         loadConstant(cUnit, rIdx, dInsn->vA - 1);
         // Generate the copy loop.  Going backwards for convenience
         LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-        target->defMask = ENCODE_ALL;
         // Copy next element
         loadBaseIndexed(cUnit, rSrc, rIdx, rVal, 2, kWord);
         storeBaseIndexed(cUnit, rDst, rIdx, rVal, 2, kWord);
@@ -432,7 +431,6 @@
             opRegCopy(cUnit, rBase, rRET0);
 #endif
             LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
-            skipTarget->defMask = ENCODE_ALL;
             branchOver->target = (LIR*)skipTarget;
         }
         // rBase now holds static storage base
@@ -539,7 +537,6 @@
             opRegCopy(cUnit, rBase, rRET0);
 #endif
             LIR* skipTarget = newLIR0(cUnit, kPseudoTargetLabel);
-            skipTarget->defMask = ENCODE_ALL;
             branchOver->target = (LIR*)skipTarget;
         }
         // rBase now holds static storage base
@@ -588,7 +585,6 @@
     loadWordDisp(cUnit, rSELF,
                  OFFSETOF_MEMBER(Thread, pDebugMe), rINVOKE_TGT);
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = -1;
     branchOver->target = (LIR*)target;
 }
 
@@ -608,6 +604,7 @@
     int numElems = cUnit->suspendLaunchpads.numUsed;
 
     for (int i = 0; i < numElems; i++) {
+        oatResetRegPool(cUnit);
         /* TUNING: move suspend count load into helper */
         LIR* lab = suspendLabel[i];
         LIR* resumeLab = (LIR*)lab->operands[0];
@@ -638,6 +635,7 @@
     int i;
 
     for (i = 0; i < numElems; i++) {
+        oatResetRegPool(cUnit);
         LIR* lab = throwLabel[i];
         cUnit->currentDalvikOffset = lab->operands[1];
         oatAppendLIR(cUnit, (LIR *)lab);
@@ -662,9 +660,6 @@
                         opRegCopy(cUnit, rTmp, v1);
                         opRegCopy(cUnit, rARG1, v2);
                         opRegCopy(cUnit, rARG0, rTmp);
-#if !(defined(TARGET_ARM))
-                        oatFreeTemp(cUnit, rTmp);
-#endif
                     } else {
                         opRegCopy(cUnit, rARG1, v2);
                         opRegCopy(cUnit, rARG0, v1);
@@ -873,7 +868,6 @@
             LIR* branch2 = opUnconditionalBranch(cUnit,0);
             // TUNING: move slow path to end & remove unconditional branch
             LIR* target1 = newLIR0(cUnit, kPseudoTargetLabel);
-            target1->defMask = ENCODE_ALL;
             // Call out to helper, which will return resolved type in rARG0
             int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
                                   pInitializeTypeFromCode));
@@ -884,7 +878,6 @@
             storeValue(cUnit, rlDest, rlResult);
             // Rejoin code paths
             LIR* target2 = newLIR0(cUnit, kPseudoTargetLabel);
-            target2->defMask = ENCODE_ALL;
             branch1->target = (LIR*)target1;
             branch2->target = (LIR*)target2;
         } else {
@@ -927,7 +920,6 @@
         opRegCopy(cUnit, rARG0, rARG2);   // .eq
         opReg(cUnit, kOpBlx, rTgt);
         LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-        target->defMask = ENCODE_ALL;
         branch->target = target;
 #endif
         genBarrier(cUnit);
@@ -1013,7 +1005,6 @@
             loadValueDirectFixed(cUnit, rlSrc, rARG0);  /* reload Ref */
             // Rejoin code paths
             LIR* hopTarget = newLIR0(cUnit, kPseudoTargetLabel);
-            hopTarget->defMask = ENCODE_ALL;
             hopBranch->target = (LIR*)hopTarget;
         }
     }
@@ -1044,7 +1035,6 @@
     oatClobberCalleeSave(cUnit);
     /* branch targets here */
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     RegLocation rlResult = oatGetReturn(cUnit);
     storeValue(cUnit, rlDest, rlResult);
     branch1->target = target;
@@ -1093,7 +1083,6 @@
             opRegCopy(cUnit, classReg, rARG0); // Align usage with fast path
             // Rejoin code paths
             LIR* hopTarget = newLIR0(cUnit, kPseudoTargetLabel);
-            hopTarget->defMask = ENCODE_ALL;
             hopBranch->target = (LIR*)hopTarget;
         }
     }
@@ -1118,7 +1107,6 @@
     callRuntimeHelper(cUnit, rTgt);
     /* branch target here */
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch1->target = (LIR*)target;
     branch2->target = (LIR*)target;
 }
@@ -1827,14 +1815,22 @@
             break;
         case OP_ADD_LONG:
         case OP_ADD_LONG_2ADDR:
+#if defined(TARGET_MIPS)
+            return genAddLong(cUnit, mir, rlDest, rlSrc1, rlSrc2);
+#else
             firstOp = kOpAdd;
             secondOp = kOpAdc;
             break;
+#endif
         case OP_SUB_LONG:
         case OP_SUB_LONG_2ADDR:
+#if defined(TARGET_MIPS)
+            return genSubLong(cUnit, mir, rlDest, rlSrc1, rlSrc2);
+#else
             firstOp = kOpSub;
             secondOp = kOpSbc;
             break;
+#endif
         case OP_MUL_LONG:
         case OP_MUL_LONG_2ADDR:
             callOut = true;
@@ -1873,27 +1869,7 @@
             secondOp = kOpXor;
             break;
         case OP_NEG_LONG: {
-            rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
-            rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
-            int zReg = oatAllocTemp(cUnit);
-            loadConstantNoClobber(cUnit, zReg, 0);
-            // Check for destructive overlap
-            if (rlResult.lowReg == rlSrc2.highReg) {
-                int tReg = oatAllocTemp(cUnit);
-                opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
-                            zReg, rlSrc2.lowReg);
-                opRegRegReg(cUnit, kOpSbc, rlResult.highReg,
-                            zReg, tReg);
-                oatFreeTemp(cUnit, tReg);
-            } else {
-                opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
-                            zReg, rlSrc2.lowReg);
-                opRegRegReg(cUnit, kOpSbc, rlResult.highReg,
-                            zReg, rlSrc2.highReg);
-            }
-            oatFreeTemp(cUnit, zReg);
-            storeValueWide(cUnit, rlDest, rlResult);
-            return false;
+            return genNegLong(cUnit, mir, rlDest, rlSrc2);
         }
         default:
             LOG(FATAL) << "Invalid long arith op";
@@ -2124,7 +2100,6 @@
     loadConstant(cUnit, rARG2, offset);
     opReg(cUnit, kOpBlx, rSUSPEND);
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch->target = (LIR*)target;
 #endif
     oatFreeTemp(cUnit, rARG2);
@@ -2152,7 +2127,6 @@
 #endif
     }
     LIR* retLab = newLIR0(cUnit, kPseudoTargetLabel);
-    retLab->defMask = ENCODE_ALL;
     LIR* target = (LIR*)oatNew(cUnit, sizeof(LIR), true, kAllocLIR);
     target->dalvikOffset = cUnit->currentDalvikOffset;
     target->opcode = kPseudoSuspendTarget;
diff --git a/src/compiler/codegen/arm/ArchFactory.cc b/src/compiler/codegen/arm/ArchFactory.cc
index b47fae1..8a23d5c 100644
--- a/src/compiler/codegen/arm/ArchFactory.cc
+++ b/src/compiler/codegen/arm/ArchFactory.cc
@@ -26,6 +26,32 @@
 
 void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset);
 
+bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc)
+{
+    rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    int zReg = oatAllocTemp(cUnit);
+    loadConstantNoClobber(cUnit, zReg, 0);
+    // Check for destructive overlap
+    if (rlResult.lowReg == rlSrc.highReg) {
+        int tReg = oatAllocTemp(cUnit);
+        opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
+                    zReg, rlSrc.lowReg);
+        opRegRegReg(cUnit, kOpSbc, rlResult.highReg,
+                    zReg, tReg);
+        oatFreeTemp(cUnit, tReg);
+    } else {
+        opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
+                    zReg, rlSrc.lowReg);
+        opRegRegReg(cUnit, kOpSbc, rlResult.highReg,
+                    zReg, rlSrc.highReg);
+    }
+    oatFreeTemp(cUnit, zReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
 int loadHelper(CompilationUnit* cUnit, int offset)
 {
     loadWordDisp(cUnit, rSELF, offset, rLR);
diff --git a/src/compiler/codegen/arm/Codegen.h b/src/compiler/codegen/arm/Codegen.h
index b985e1b..3b866ce 100644
--- a/src/compiler/codegen/arm/Codegen.h
+++ b/src/compiler/codegen/arm/Codegen.h
@@ -31,6 +31,8 @@
 LIR *opRegReg(CompilationUnit* cUnit, OpKind op, int rDestSrc1, int rSrc2);
 LIR* opCmpImmBranch(CompilationUnit* cUnit, ConditionCode cond, int reg,
                     int checkValue, LIR* target);
+bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc);
 
 /* Forward declaraton the portable versions due to circular dependency */
 bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 838fcaf..cba37b7 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -119,14 +119,13 @@
     loadConstant(cUnit, rIdx, size);
     // Establish loop branch target
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     // Load next key/disp
     newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
     opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
     // Go if match. NOTE: No instruction set switch here - must stay Thumb2
     opIT(cUnit, kArmCondEq, "");
     LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
-    tabRec->bxInst = switchBranch;
+    tabRec->anchor = switchBranch;
     // Needs to use setflags encoding here
     newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
     opCondBranch(cUnit, kCondNe, target);
@@ -173,11 +172,10 @@
 
     // ..and go! NOTE: No instruction set switch here - must stay Thumb2
     LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
-    tabRec->bxInst = switchBranch;
+    tabRec->anchor = switchBranch;
 
     /* branchOver target here */
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branchOver->target = (LIR*)target;
 }
 
@@ -287,7 +285,6 @@
     branch = newLIR2(cUnit, kThumb2Cbz, r1, 0);
 
     hopTarget = newLIR0(cUnit, kPseudoTargetLabel);
-    hopTarget->defMask = ENCODE_ALL;
     hopBranch->target = (LIR*)hopTarget;
 
     // Go expensive route - artLockObjectFromCode(self, obj);
@@ -297,7 +294,6 @@
 
     // Resume here
     target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch->target = (LIR*)target;
 }
 
@@ -333,7 +329,6 @@
     branch = opNone(cUnit, kOpUncondBr);
 
     hopTarget = newLIR0(cUnit, kPseudoTargetLabel);
-    hopTarget->defMask = ENCODE_ALL;
     hopBranch->target = (LIR*)hopTarget;
 
     // Go expensive route - UnlockObjectFromCode(obj);
@@ -343,7 +338,6 @@
 
     // Resume here
     target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch->target = (LIR*)target;
 }
 
@@ -383,11 +377,9 @@
     genBarrier(cUnit);
 
     target2 = newLIR0(cUnit, kPseudoTargetLabel);
-    target2->defMask = -1;
     opRegReg(cUnit, kOpNeg, tReg, tReg);
 
     target1 = newLIR0(cUnit, kPseudoTargetLabel);
-    target1->defMask = -1;
 
     RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
     rlTemp.lowReg = tReg;
diff --git a/src/compiler/codegen/mips/ArchFactory.cc b/src/compiler/codegen/mips/ArchFactory.cc
index 963427d..9050cf9 100644
--- a/src/compiler/codegen/mips/ArchFactory.cc
+++ b/src/compiler/codegen/mips/ArchFactory.cc
@@ -24,6 +24,77 @@
 
 namespace art {
 
+bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    /*
+     *  [v1 v0] =  [a1 a0] + [a3 a2];
+     *    addu v0,a2,a0
+     *    addu t1,a3,a1
+     *    sltu v1,v0,a2
+     *    addu v1,v1,t1
+     */
+
+    opRegRegReg(cUnit, kOpAdd, rlResult.lowReg, rlSrc2.lowReg, rlSrc1.lowReg);
+    int tReg = oatAllocTemp(cUnit);
+    opRegRegReg(cUnit, kOpAdd, tReg, rlSrc2.highReg, rlSrc1.highReg);
+    newLIR3(cUnit, kMipsSltu, rlResult.highReg, rlResult.lowReg, rlSrc2.lowReg);
+    opRegRegReg(cUnit, kOpAdd, rlResult.highReg, rlResult.highReg, tReg);
+    oatFreeTemp(cUnit, tReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
+bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc1, RegLocation rlSrc2)
+{
+    rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
+    rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    /*
+     *  [v1 v0] =  [a1 a0] - [a3 a2];
+     *    subu    v0,a0,a2
+     *    subu    v1,a1,a3
+     *    sltu    t1,a0,v0
+     *    subu    v1,v1,t1
+     */
+
+    opRegRegReg(cUnit, kOpSub, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
+    opRegRegReg(cUnit, kOpSub, rlResult.highReg, rlSrc1.highReg, rlSrc2.highReg);
+    int tReg = oatAllocTemp(cUnit);
+    newLIR3(cUnit, kMipsSltu, tReg, rlSrc1.lowReg, rlResult.lowReg);
+    opRegRegReg(cUnit, kOpSub, rlResult.highReg, rlResult.highReg, tReg);
+    oatFreeTemp(cUnit, tReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
+bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc)
+{
+    rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
+    RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
+    /*
+     *  [v1 v0] =  -[a1 a0]
+     *    negu    v0,a0
+     *    negu    v1,a1
+     *    sltu    t1,r_zero
+     *    subu    v1,v1,t1
+     */
+
+    opRegReg(cUnit, kOpNeg, rlResult.lowReg, rlSrc.lowReg);
+    opRegReg(cUnit, kOpNeg, rlResult.highReg, rlSrc.highReg);
+    int tReg = oatAllocTemp(cUnit);
+    newLIR3(cUnit, kMipsSltu, tReg, r_ZERO, rlResult.lowReg);
+    opRegRegReg(cUnit, kOpSub, rlResult.highReg, rlResult.highReg, tReg);
+    oatFreeTemp(cUnit, tReg);
+    storeValueWide(cUnit, rlDest, rlResult);
+    return false;
+}
+
 void genDebuggerUpdate(CompilationUnit* cUnit, int32_t offset);
 
 /*
diff --git a/src/compiler/codegen/mips/Assemble.cc b/src/compiler/codegen/mips/Assemble.cc
index 0021318..5f215ef 100644
--- a/src/compiler/codegen/mips/Assemble.cc
+++ b/src/compiler/codegen/mips/Assemble.cc
@@ -221,7 +221,7 @@
     ENCODING_MAP(kMipsNop, 0x00000000,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
-                 "nop", "", 4),
+                 "nop", ";", 4),
     ENCODING_MAP(kMipsNor, 0x00000027, /* used for "not" too */
                  kFmtBitBlt, 15, 11, kFmtBitBlt, 25, 21, kFmtBitBlt, 20, 16,
                  kFmtUnused, -1, -1, IS_TERTIARY_OP | REG_DEF0_USE12,
@@ -398,6 +398,22 @@
                  kFmtUnused, -1, -1, IS_BINARY_OP | REG_USE0 | REG_DEF1,
                  "mtc1", "!0r,!1s", 4),
 #endif
+    ENCODING_MAP(kMipsDelta, 0x27e00000,
+                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, 15, 0,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0 | REG_USE_LR,
+                 "addiu", "!0r,r_ra,0x!1h(!1d)", 4),
+    ENCODING_MAP(kMipsDeltaHi, 0x3C000000,
+                 kFmtBitBlt, 20, 16, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0,
+                 "lui", "!0r,0x!1h(!1d)", 4),
+    ENCODING_MAP(kMipsDeltaLo, 0x34000000,
+                 kFmtBlt5_2, 16, 21, kFmtBitBlt, 15, 0, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, IS_QUAD_OP | REG_DEF0_USE0,
+                 "ori", "!0r,!0r,0x!1h(!1d)", 4),
+    ENCODING_MAP(kMipsCurrPC, 0x0c000000,
+                 kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
+                 kFmtUnused, -1, -1, NO_OPERAND | IS_BRANCH | REG_DEF_LR,
+                 "pc2ra", "; r_ra <- .+8", 4),
     ENCODING_MAP(kMipsUndefined, 0x64000000,
                  kFmtUnused, -1, -1, kFmtUnused, -1, -1, kFmtUnused, -1, -1,
                  kFmtUnused, -1, -1, NO_OPERAND,
@@ -426,7 +442,56 @@
             continue;
         }
 
-        if (lir->opcode == kMipsB || lir->opcode == kMipsBal) {
+// TODO: check for lir->flags.pcRelFixup
+
+        if (lir->opcode == kMipsDelta) {
+            int offset1 = ((LIR*)lir->operands[2])->offset;
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+            int offset2 = tabRec ? tabRec->offset : lir->target->offset;
+            int delta = offset2 - offset1;
+            if ((delta & 0xffff) == delta) {
+                // Fits
+                lir->operands[1] = delta;
+            } else {
+                // Doesn't fit - must expand to kMipsDelta[Hi|Lo] pair
+                LIR *newDeltaHi =
+                    (LIR *)oatNew(cUnit, sizeof(LIR), true,
+                    kAllocLIR);
+                newDeltaHi->dalvikOffset = lir->dalvikOffset;
+                newDeltaHi->target = lir->target;
+                newDeltaHi->opcode = kMipsDeltaHi;
+                newDeltaHi->operands[0] = lir->operands[0];
+                newDeltaHi->operands[2] = lir->operands[2];
+                newDeltaHi->operands[3] = lir->operands[3];
+                oatSetupResourceMasks(newDeltaHi);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaHi);
+                LIR *newDeltaLo =
+                    (LIR *)oatNew(cUnit, sizeof(LIR), true,
+                    kAllocLIR);
+                newDeltaLo->dalvikOffset = lir->dalvikOffset;
+                newDeltaLo->target = lir->target;
+                newDeltaLo->opcode = kMipsDeltaLo;
+                newDeltaLo->operands[0] = lir->operands[0];
+                newDeltaLo->operands[2] = lir->operands[2];
+                newDeltaLo->operands[3] = lir->operands[3];
+                oatSetupResourceMasks(newDeltaLo);
+                oatInsertLIRBefore((LIR*)lir, (LIR*)newDeltaLo);
+                lir->flags.isNop = true;
+                res = kRetryAll;
+            }
+        } else if (lir->opcode == kMipsDeltaLo) {
+            int offset1 = ((LIR*)lir->operands[2])->offset;
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+            int offset2 = tabRec ? tabRec->offset : lir->target->offset;
+            int delta = offset2 - offset1;
+            lir->operands[1] = delta & 0xffff;
+        } else if (lir->opcode == kMipsDeltaHi) {
+            int offset1 = ((LIR*)lir->operands[2])->offset;
+            SwitchTable *tabRec = (SwitchTable*)lir->operands[3];
+            int offset2 = tabRec ? tabRec->offset : lir->target->offset;
+            int delta = offset2 - offset1;
+            lir->operands[1] = (delta >> 16) & 0xffff;
+        } else if (lir->opcode == kMipsB || lir->opcode == kMipsBal) {
             LIR *targetLIR = (LIR *) lir->target;
             intptr_t pc = lir->offset + 4;
             intptr_t target = targetLIR->offset;
@@ -508,6 +573,11 @@
                     }
                     bits |= value;
                     break;
+                case kFmtBlt5_2:
+                    value = (operand & 0x1f);
+                    bits |= (value << encoder->fieldLoc[i].start);
+                    bits |= (value << encoder->fieldLoc[i].end);
+                    break;
                 case kFmtDfp: {
                     DCHECK(DOUBLEREG(operand));
                     DCHECK((operand & 0x1) == 0);
diff --git a/src/compiler/codegen/mips/Codegen.h b/src/compiler/codegen/mips/Codegen.h
index c6f1d98..520d638 100644
--- a/src/compiler/codegen/mips/Codegen.h
+++ b/src/compiler/codegen/mips/Codegen.h
@@ -27,6 +27,12 @@
 namespace art {
 
 #if defined(_CODEGEN_C)
+bool genAddLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc1, RegLocation rlSrc2);
+bool genSubLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc1, RegLocation rlSrc2);
+bool genNegLong(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
+                RegLocation rlSrc);
 LIR *opRegImm(CompilationUnit* cUnit, OpKind op, int rDestSrc1, int value);
 LIR *opRegReg(CompilationUnit* cUnit, OpKind op, int rDestSrc1, int rSrc2);
 LIR* opCmpBranch(CompilationUnit* cUnit, ConditionCode cond, int src1,
diff --git a/src/compiler/codegen/mips/FP/MipsFP.cc b/src/compiler/codegen/mips/FP/MipsFP.cc
index 4501210..6a51f69 100644
--- a/src/compiler/codegen/mips/FP/MipsFP.cc
+++ b/src/compiler/codegen/mips/FP/MipsFP.cc
@@ -61,7 +61,7 @@
 
     return false;
 #else
-    UNIMPLEMENTED(FATAL) << "Need Mips soft float implementation";
+    UNIMPLEMENTED(WARNING) << "Need Mips soft float implementation";
     return false;
 #endif
 }
@@ -112,7 +112,7 @@
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
 #else
-    UNIMPLEMENTED(FATAL) << "Need Mips soft float implementation";
+    UNIMPLEMENTED(WARNING) << "Need Mips soft float implementation";
     return false;
 #endif
 }
@@ -181,7 +181,7 @@
     }
     return false;
 #else
-    UNIMPLEMENTED(FATAL) << "Need Mips soft float implementation";
+    UNIMPLEMENTED(WARNING) << "Need Mips soft float implementation";
     return false;
 #endif
 }
@@ -189,7 +189,7 @@
 static bool genCmpFP(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
                      RegLocation rlSrc1, RegLocation rlSrc2)
 {
-    UNIMPLEMENTED(FATAL) << "Need Mips implementation";
+    UNIMPLEMENTED(WARNING) << "Need Mips implementation";
     return false;
 #if 0
     TemplateOpcode templateOpcode;
diff --git a/src/compiler/codegen/mips/Mips32/Factory.cc b/src/compiler/codegen/mips/Mips32/Factory.cc
index 46987d3..105677e 100644
--- a/src/compiler/codegen/mips/Mips32/Factory.cc
+++ b/src/compiler/codegen/mips/Mips32/Factory.cc
@@ -28,7 +28,7 @@
                          r_T0, r_T1, r_T2, r_T3, r_T4, r_T5, r_T6, r_T7,
                          r_S0, r_S1, r_S2, r_S3, r_S4, r_S5, r_S6, r_S7, r_T8,
                          r_T9, r_K0, r_K1, r_GP, r_SP, r_FP, r_RA};
-static int reservedRegs[] = {r_ZERO, r_AT, r_S0, r_S1, r_K0, r_K1, r_GP, r_SP};
+static int reservedRegs[] = {r_ZERO, r_AT, r_S0, r_S1, r_K0, r_K1, r_GP, r_SP, r_RA};
 static int coreTemps[] = {r_V0, r_V1, r_A0, r_A1, r_A2, r_A3, r_T0, r_T1, r_T2,
                           r_T3, r_T4, r_T5, r_T6, r_T7, r_T8, r_T9};
 #ifdef __mips_hard_float
@@ -227,6 +227,10 @@
         case kOpAsr:
             opcode = kMipsSrav;
             break;
+        case kOpAdc:
+        case kOpSbc:
+            LOG(FATAL) << "No carry bit on MIPS";
+            break;
         default:
             LOG(FATAL) << "bad case in opRegRegReg";
             break;
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index 155675c..c975889 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -25,28 +25,38 @@
 namespace art {
 
 /*
- * The sparse table in the literal pool is an array of <key,displacement>
- * pairs.  For each set, we'll load them as a pair using ldmia.
- * This means that the register number of the temp we use for the key
- * must be lower than the reg for the displacement.
+ * The lack of pc-relative loads on Mips presents somewhat of a challenge
+ * for our PIC switch table strategy.  To materialize the current location
+ * we'll do a dummy JAL and reference our tables using r_RA as the
+ * base register.  Note that r_RA will be used both as the base to
+ * locate the switch table data and as the reference base for the switch
+ * target offsets stored in the table.  We'll use a special pseudo-instruction
+ * to represent the jal and trigger the construction of the
+ * switch table offsets (which will happen after final assembly and all
+ * labels are fixed).
  *
  * The test loop will look something like:
  *
- *   adr   rBase, <table>
- *   ldr   rVal, [rSP, vRegOff]
- *   mov   rIdx, #tableSize
- * lp:
- *   ldmia rBase!, {rKey, rDisp}
- *   sub   rIdx, #1
- *   cmp   rVal, rKey
- *   ifeq
- *   add   rPC, rDisp   ; This is the branch from which we compute displacement
- *   cbnz  rIdx, lp
+ *   ori   rEnd, r_ZERO, #tableSize  ; size in bytes
+ *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
+ *   nop                     ; opportunistically fill
+ * BaseLabel:
+ *   addiu rBase, r_RA, <table> - <BaseLabel>  ; table relative to BaseLabel
+     addu  rEnd, rEnd, rBase                   ; end of table
+ *   lw    rVal, [rSP, vRegOff]                ; Test Value
+ * loop:
+ *   beq   rBase, rEnd, done
+ *   lw    rKey, 0(rBase)
+ *   addu  rBase, 8
+ *   bne   rVal, rKey, loop
+ *   lw    rDisp, -4(rBase)
+ *   addu  r_RA, rDisp
+ *   jr    r_RA
+ * done:
+ *
  */
 void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Needs Mips sparse switch";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
         dumpSparseSwitchTable(table);
@@ -56,49 +66,75 @@
                          true, kAllocData);
     tabRec->table = table;
     tabRec->vaddr = mir->offset;
-    int size = table[1];
-    tabRec->targets = (LIR* *)oatNew(cUnit, size * sizeof(LIR*), true,
+    int elements = table[1];
+    tabRec->targets = (LIR* *)oatNew(cUnit, elements * sizeof(LIR*), true,
                                      kAllocLIR);
     oatInsertGrowableList(cUnit, &cUnit->switchTables, (intptr_t)tabRec);
 
-    // Get the switch value
-    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
-    int rBase = oatAllocTemp(cUnit);
-    /* Allocate key and disp temps */
-    int rKey = oatAllocTemp(cUnit);
-    int rDisp = oatAllocTemp(cUnit);
-    // Make sure rKey's register number is less than rDisp's number for ldmia
-    if (rKey > rDisp) {
-        int tmp = rDisp;
-        rDisp = rKey;
-        rKey = tmp;
+    // The table is composed of 8-byte key/disp pairs
+    int byteSize = elements * 8;
+
+    int sizeHi = byteSize >> 16;
+    int sizeLo = byteSize & 0xffff;
+
+    int rEnd = oatAllocTemp(cUnit);
+    if (sizeHi) {
+        newLIR2(cUnit, kMipsLui, rEnd, sizeHi);
     }
-    // Materialize a pointer to the switch table
-    newLIR3(cUnit, kThumb2Adr, rBase, 0, (intptr_t)tabRec);
-    // Set up rIdx
-    int rIdx = oatAllocTemp(cUnit);
-    loadConstant(cUnit, rIdx, size);
-    // Establish loop branch target
-    LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
-    // Load next key/disp
-    newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
-    opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
-    // Go if match. NOTE: No instruction set switch here - must stay Thumb2
-    opIT(cUnit, kArmCondEq, "");
-    LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
-    tabRec->bxInst = switchBranch;
-    // Needs to use setflags encoding here
-    newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
-    LIR* branch = opCondBranch(cUnit, kCondNe, target);
-#endif
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);  // Scheduling barrier
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot
+    if (sizeHi) {
+        newLIR3(cUnit, kMipsOri, rEnd, rEnd, sizeLo);
+    } else {
+        newLIR3(cUnit, kMipsOri, rEnd, r_ZERO, sizeLo);
+    }
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    // Remember base label so offsets can be computed later
+    tabRec->anchor = baseLabel;
+    int rBase = oatAllocTemp(cUnit);
+    newLIR4(cUnit, kMipsDelta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
+    opRegRegReg(cUnit, kOpAdd, rEnd, rEnd, rBase);
+
+    // Grab switch test value
+    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+
+    // Test loop
+    int rKey = oatAllocTemp(cUnit);
+    LIR* loopLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    LIR* exitBranch = opCmpBranch(cUnit , kCondEq, rBase, rEnd, NULL);
+    loadWordDisp(cUnit, rBase, 0, rKey);
+    opRegImm(cUnit, kOpAdd, rBase, 8);
+    opCmpBranch(cUnit, kCondNe, rlSrc.lowReg, rKey, loopLabel);
+    int rDisp = oatAllocTemp(cUnit);
+    loadWordDisp(cUnit, rBase, -4, rDisp);
+    opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp);
+    opReg(cUnit, kOpBx, r_RA);
+
+    // Loop exit
+    LIR* exitLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    exitBranch->target = exitLabel;
 }
 
-
+/*
+ * Code pattern will look something like:
+ *
+ *   lw    rVal
+ *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
+ *   nop                     ; opportunistically fill
+ *   [subiu rVal, bias]      ; Remove bias if lowVal != 0
+ *   bound check -> done
+ *   lw    rDisp, [r_RA, rVal]
+ *   addu  r_RA, rDisp
+ *   jr    r_RA
+ * done:
+ */
 void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Need Mips packed switch";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
         dumpPackedSwitchTable(table);
@@ -115,35 +151,59 @@
 
     // Get the switch value
     rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
-    int tableBase = oatAllocTemp(cUnit);
-    // Materialize a pointer to the switch table
-    newLIR3(cUnit, kThumb2Adr, tableBase, 0, (intptr_t)tabRec);
+
+    // Prepare the bias.  If too big, handle 1st stage here
     int lowKey = s4FromSwitchData(&table[2]);
-    int keyReg;
-    // Remove the bias, if necessary
+    bool largeBias = false;
+    int rKey;
     if (lowKey == 0) {
-        keyReg = rlSrc.lowReg;
+        rKey = rlSrc.lowReg;
+    } else if ((lowKey & 0xffff) != lowKey) {
+        rKey = oatAllocTemp(cUnit);
+        loadConstant(cUnit, rKey, lowKey);
+        largeBias = true;
     } else {
-        keyReg = oatAllocTemp(cUnit);
-        opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
+        rKey = oatAllocTemp(cUnit);
     }
+
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot with bias strip
+    if (lowKey == 0) {
+        newLIR0(cUnit, kMipsNop);
+    } else {
+        if (largeBias) {
+            opRegRegReg(cUnit, kOpSub, rKey, rlSrc.lowReg, rKey);
+        } else {
+            opRegRegImm(cUnit, kOpSub, rKey, rlSrc.lowReg, lowKey);
+        }
+    }
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+    // Remember base label so offsets can be computed later
+    tabRec->anchor = baseLabel;
+
     // Bounds check - if < 0 or >= size continue following switch
-    opRegImm(cUnit, kOpCmp, keyReg, size-1);
-    LIR* branchOver = opCondBranch(cUnit, kCondHi, NULL);
+    LIR* branchOver = opCmpImmBranch(cUnit, kCondHi, rKey, size-1, NULL);
+
+    // Materialize the table base pointer
+    int rBase = oatAllocTemp(cUnit);
+    newLIR4(cUnit, kMipsDelta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
 
     // Load the displacement from the switch table
-    int dispReg = oatAllocTemp(cUnit);
-    loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
+    int rDisp = oatAllocTemp(cUnit);
+    loadBaseIndexed(cUnit, rBase, rKey, rDisp, 2, kWord);
 
-    // ..and go! NOTE: No instruction set switch here - must stay Thumb2
-    LIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
-    tabRec->bxInst = switchBranch;
+    // Add to r_AP and go
+    opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp);
+    opReg(cUnit, kOpBx, r_RA);
 
     /* branchOver target here */
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branchOver->target = (LIR*)target;
-#endif
 }
 
 /*
@@ -158,8 +218,6 @@
  */
 void genFillArrayData(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
-    UNIMPLEMENTED(FATAL) << "Needs Mips FillArrayData";
-#if 0
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     // Add the table to the list - we'll process it later
     FillArrayData *tabRec = (FillArrayData *)
@@ -174,13 +232,25 @@
 
     // Making a call - use explicit registers
     oatFlushAllRegs(cUnit);   /* Everything to home location */
+    oatLockCallTemps(cUnit);
     loadValueDirectFixed(cUnit, rlSrc, rARG0);
-    loadWordDisp(cUnit, rSELF,
-                 OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR);
+
+    // Must prevent code motion for the curr pc pair
+    genBarrier(cUnit);
+    newLIR0(cUnit, kMipsCurrPC);  // Really a jal to .+8
+    // Now, fill the branch delay slot with the helper load
+    int rTgt = loadHelper(cUnit, OFFSETOF_MEMBER(Thread,
+                          pHandleFillArrayDataFromCode));
+    genBarrier(cUnit);  // Scheduling barrier
+
+    // Construct BaseLabel and set up table base register
+    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
+
     // Materialize a pointer to the fill data image
-    newLIR3(cUnit, kThumb2Adr, r1, 0, (intptr_t)tabRec);
-    callRuntimeHelper(cUnit, rLR);
-#endif
+    newLIR4(cUnit, kMipsDelta, rARG1, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
+
+    // And go...
+    callRuntimeHelper(cUnit, rTgt);  // ( array*, fill_data* )
 }
 
 void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest, RegLocation rlSrc)
@@ -266,7 +336,6 @@
     oatFreeTemp(cUnit, t0);
     oatFreeTemp(cUnit, t1);
     LIR* target = newLIR0(cUnit, kPseudoTargetLabel);
-    target->defMask = ENCODE_ALL;
     branch->target = (LIR*)target;
     storeValue(cUnit, rlDest, rlResult);
 }
@@ -314,6 +383,11 @@
             sltOp = kMipsSlt;
             brOp = kMipsBnez;
             break;
+        case kCondHi:  // Gtu
+            sltOp = kMipsSltu;
+            brOp = kMipsBnez;
+            swapped = true;
+            break;
         default:
             UNIMPLEMENTED(FATAL) << "No support for ConditionCode: "
                                  << (int) cond;
diff --git a/src/compiler/codegen/mips/MipsLIR.h b/src/compiler/codegen/mips/MipsLIR.h
index b2cfdbe..5034623 100644
--- a/src/compiler/codegen/mips/MipsLIR.h
+++ b/src/compiler/codegen/mips/MipsLIR.h
@@ -426,6 +426,10 @@
     kMipsMfc1,    /* mfc1 t,s [01000100000] t[20..16] s[15..11] [00000000000] */
     kMipsMtc1,    /* mtc1 t,s [01000100100] t[20..16] s[15..11] [00000000000] */
 #endif
+    kMipsDelta,   /* Psuedo for ori t, s, <label>-<label> */
+    kMipsDeltaHi, /* Pseudo for lui t, high16(<label>-<label>) */
+    kMipsDeltaLo, /* Pseudo for ori t, s, low16(<label>-<label>) */
+    kMipsCurrPC,  /* jal to .+8 to materialize pc */
     kMipsUndefined,  /* undefined [011001xxxxxxxxxxxxxxxx] */
     kMipsLast
 } MipsOpCode;
@@ -458,6 +462,7 @@
     kMemLoad,
     kMemStore,
     kPCRelFixup,
+    kRegUseLR,
 // FIXME: add NEEDS_FIXUP to instruction attributes
 } MipsOpFeatureFlags;
 
@@ -486,7 +491,8 @@
 #define IS_IT           (1 << kIsIT)
 #define SETS_CCODES     (1 << kSetsCCodes)
 #define USES_CCODES     (1 << kUsesCCodes)
-#define NEEDS_FIXUP      (1 << kPCRelFixup)
+#define NEEDS_FIXUP     (1 << kPCRelFixup)
+#define REG_USE_LR      (1 << kRegUseLR)
 
 /*  attributes, included for compatibility */
 #define REG_DEF_FPCS_LIST0   (0)
@@ -513,6 +519,7 @@
     kFmtBitBlt,        /* Bit string using end/start */
     kFmtDfp,           /* Double FP reg */
     kFmtSfp,           /* Single FP reg */
+    kFmtBlt5_2,        /* Same 5-bit field to 2 locations */
 } MipsEncodingKind;
 
 /* Struct used to define the snippet positions for each Thumb opcode */