Merge "Use the operator<< generator more widely." into ics-mr1-plus-art
diff --git a/build/Android.oattest.mk b/build/Android.oattest.mk
index 0ccf106..3166b75 100644
--- a/build/Android.oattest.mk
+++ b/build/Android.oattest.mk
@@ -67,7 +67,6 @@
 .PHONY: test-art-host-oat-$(1)
 test-art-host-oat-$(1): $(ART_TEST_OUT)/oat-test-dex-$(1).jar $(HOST_CORE_IMG_OUT)
 	mkdir -p /tmp/android-data/test-art-host-oat-$(1)
-	$(DEX2OAT) --runtime-arg -Xms16m --runtime-arg -Xmx16m --dex-file=$(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar --oat-file=$(HOST_OUT_JAVA_LIBRARIES)/oat-test-dex-$(1).jar.oat --instruction-set=X86
 	ANDROID_DATA=/tmp/android-data/test-art-host-oat-$(1) \
 	  ANDROID_ROOT=$(HOST_OUT) \
 	  LD_LIBRARY_PATH=$(HOST_OUT_SHARED_LIBRARIES) \
diff --git a/src/compiler/codegen/GenCommon.cc b/src/compiler/codegen/GenCommon.cc
index 444f5f2..aeacab8 100644
--- a/src/compiler/codegen/GenCommon.cc
+++ b/src/compiler/codegen/GenCommon.cc
@@ -1858,7 +1858,7 @@
     } else {
         RegLocation rlResult;
         oatFlushAllRegs(cUnit);   /* Send everything to home location */
-        loadValueDirectFixed(cUnit, rlSrc2, rRET1);
+        loadValueDirectFixed(cUnit, rlSrc2, rARG1);
 #if !defined(TARGET_X86)
         int rTgt = loadHelper(cUnit, funcOffset);
 #endif
@@ -2197,14 +2197,17 @@
             retReg = rRET0;
             funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
             break;
-        /* NOTE - result is in rARG2/rARG3 instead of rRET0/rRET1 */
-        // FIXME: is true, or could be made true, or other targets?
         case Instruction::REM_LONG:
         case Instruction::REM_LONG_2ADDR:
             callOut = true;
             checkZero = true;
-            funcOffset = ENTRYPOINT_OFFSET(pLdivmod);
+            funcOffset = ENTRYPOINT_OFFSET(pLdiv);
+#if defined(TARGET_ARM)
+            /* NOTE - result is in rARG2/rARG3 instead of rRET0/rRET1 */
             retReg = rARG2;
+#else
+            retReg = rRET0;
+#endif
             break;
         case Instruction::AND_LONG_2ADDR:
         case Instruction::AND_LONG:
diff --git a/src/compiler/codegen/MethodCodegenDriver.cc b/src/compiler/codegen/MethodCodegenDriver.cc
index b28df01..0b8a19d 100644
--- a/src/compiler/codegen/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/MethodCodegenDriver.cc
@@ -395,7 +395,7 @@
             break;
 
         case Instruction::SPARSE_SWITCH:
-            genSparseSwitch(cUnit, mir, rlSrc[0]);
+            genSparseSwitch(cUnit, mir, rlSrc[0], labelList);
             break;
 
         case Instruction::CMPL_FLOAT:
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index f485403..44cae0f 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -369,7 +369,8 @@
  *   add   rPC, rDisp   ; This is the branch from which we compute displacement
  *   cbnz  rIdx, lp
  */
-void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
+                     LIR* labelList)
 {
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
diff --git a/src/compiler/codegen/mips/Mips32/Gen.cc b/src/compiler/codegen/mips/Mips32/Gen.cc
index b810f98..ade2fd8 100644
--- a/src/compiler/codegen/mips/Mips32/Gen.cc
+++ b/src/compiler/codegen/mips/Mips32/Gen.cc
@@ -63,7 +63,8 @@
  * done:
  *
  */
-void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
+void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
+                     LIR* labelList)
 {
     const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
     if (cUnit->printMe) {
diff --git a/src/compiler/codegen/x86/X86/Gen.cc b/src/compiler/codegen/x86/X86/Gen.cc
index 31939f2..f2dbc11 100644
--- a/src/compiler/codegen/x86/X86/Gen.cc
+++ b/src/compiler/codegen/x86/X86/Gen.cc
@@ -46,117 +46,42 @@
 }
 
 /*
- * The lack of pc-relative loads on X86 presents somewhat of a challenge
- * for our PIC switch table strategy.  To materialize the current location
- * we'll do a dummy JAL and reference our tables using r_RA as the
- * base register.  Note that r_RA will be used both as the base to
- * locate the switch table data and as the reference base for the switch
- * target offsets stored in the table.  We'll use a special pseudo-instruction
- * to represent the jal and trigger the construction of the
- * switch table offsets (which will happen after final assembly and all
- * labels are fixed).
- *
- * The test loop will look something like:
- *
- *   ori   rEnd, r_ZERO, #tableSize  ; size in bytes
- *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
- *   nop                     ; opportunistically fill
- * BaseLabel:
- *   addiu rBase, r_RA, <table> - <BaseLabel>  ; table relative to BaseLabel
-     addu  rEnd, rEnd, rBase                   ; end of table
- *   lw    rVal, [rSP, vRegOff]                ; Test Value
- * loop:
- *   beq   rBase, rEnd, done
- *   lw    rKey, 0(rBase)
- *   addu  rBase, 8
- *   bne   rVal, rKey, loop
- *   lw    rDisp, -4(rBase)
- *   addu  r_RA, rDisp
- *   jr    r_RA
- * done:
- *
+ * The sparse table in the literal pool is an array of <key,displacement>
+ * pairs.
  */
-void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
-{
-    UNIMPLEMENTED(WARNING) << "genSparseSwitch";
-    newLIR0(cUnit, kX86Bkpt);
-    return;
-#if 0
-    const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
-    if (cUnit->printMe) {
-        dumpSparseSwitchTable(table);
-    }
-    // Add the table to the list - we'll process it later
-    SwitchTable *tabRec = (SwitchTable *)oatNew(cUnit, sizeof(SwitchTable),
-                         true, kAllocData);
-    tabRec->table = table;
-    tabRec->vaddr = mir->offset;
-    int elements = table[1];
-    tabRec->targets = (LIR* *)oatNew(cUnit, elements * sizeof(LIR*), true,
-                                     kAllocLIR);
-    oatInsertGrowableList(cUnit, &cUnit->switchTables, (intptr_t)tabRec);
-
-    // The table is composed of 8-byte key/disp pairs
-    int byteSize = elements * 8;
-
-    int sizeHi = byteSize >> 16;
-    int sizeLo = byteSize & 0xffff;
-
-    int rEnd = oatAllocTemp(cUnit);
-    if (sizeHi) {
-        newLIR2(cUnit, kX86Lui, rEnd, sizeHi);
-    }
-    // Must prevent code motion for the curr pc pair
-    genBarrier(cUnit);  // Scheduling barrier
-    newLIR0(cUnit, kX86CurrPC);  // Really a jal to .+8
-    // Now, fill the branch delay slot
-    if (sizeHi) {
-        newLIR3(cUnit, kX86Ori, rEnd, rEnd, sizeLo);
-    } else {
-        newLIR3(cUnit, kX86Ori, rEnd, r_ZERO, sizeLo);
-    }
-    genBarrier(cUnit);  // Scheduling barrier
-
-    // Construct BaseLabel and set up table base register
-    LIR* baseLabel = newLIR0(cUnit, kPseudoTargetLabel);
-    // Remember base label so offsets can be computed later
-    tabRec->anchor = baseLabel;
-    int rBase = oatAllocTemp(cUnit);
-    newLIR4(cUnit, kX86Delta, rBase, 0, (intptr_t)baseLabel, (intptr_t)tabRec);
-    opRegRegReg(cUnit, kOpAdd, rEnd, rEnd, rBase);
-
-    // Grab switch test value
-    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
-
-    // Test loop
-    int rKey = oatAllocTemp(cUnit);
-    LIR* loopLabel = newLIR0(cUnit, kPseudoTargetLabel);
-    LIR* exitBranch = opCmpBranch(cUnit , kCondEq, rBase, rEnd, NULL);
-    loadWordDisp(cUnit, rBase, 0, rKey);
-    opRegImm(cUnit, kOpAdd, rBase, 8);
-    opCmpBranch(cUnit, kCondNe, rlSrc.lowReg, rKey, loopLabel);
-    int rDisp = oatAllocTemp(cUnit);
-    loadWordDisp(cUnit, rBase, -4, rDisp);
-    opRegRegReg(cUnit, kOpAdd, r_RA, r_RA, rDisp);
-    opReg(cUnit, kOpBx, r_RA);
-
-    // Loop exit
-    LIR* exitLabel = newLIR0(cUnit, kPseudoTargetLabel);
-    exitBranch->target = exitLabel;
-#endif
+BasicBlock *findBlock(CompilationUnit* cUnit, unsigned int codeOffset,
+                      bool split, bool create, BasicBlock** immedPredBlockP);
+void genSparseSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc, LIR* labelList) {
+  const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
+  if (cUnit->printMe) {
+    dumpSparseSwitchTable(table);
+  }
+  int entries = table[1];
+  int* keys = (int*)&table[2];
+  int* targets = &keys[entries];
+  rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
+  for (int i = 0; i < entries; i++) {
+    int key = keys[i];
+    BasicBlock* case_block = findBlock(cUnit, mir->offset + targets[i],
+                                       false, false, NULL);
+    opCmpImmBranch(cUnit, kCondEq, rlSrc.lowReg, key, &labelList[case_block->id]);
+  }
 }
 
 /*
  * Code pattern will look something like:
  *
- *   lw    rVal
- *   jal   BaseLabel         ; stores "return address" (BaseLabel) in r_RA
- *   nop                     ; opportunistically fill
- *   [subiu rVal, bias]      ; Remove bias if lowVal != 0
- *   bound check -> done
- *   lw    rDisp, [r_RA, rVal]
- *   addu  r_RA, rDisp
- *   jr    r_RA
+ * mov  rVal, ..
+ * call 0
+ * pop  rStartOfMethod
+ * sub  rStartOfMethod, ..
+ * mov  rKeyReg, rVal
+ * sub  rKeyReg, lowKey
+ * cmp  rKeyReg, size-1  ; bound check
+ * ja   done
+ * mov  rDisp, [rStartOfMethod + rKeyReg * 4 + tableOffset]
+ * add  rStartOfMethod, rDisp
+ * jmp  rStartOfMethod
  * done:
  */
 void genPackedSwitch(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc) {
diff --git a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
index bed4fba..69e9c98 100644
--- a/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
+++ b/src/oat/runtime/arm/oat_support_entrypoints_arm.cc
@@ -220,7 +220,8 @@
   points->pF2l = F2L;
   points->pLadd = NULL;
   points->pLand = NULL;
-  points->pLdivmod = __aeabi_ldivmod;
+  points->pLdiv = __aeabi_ldivmod;
+  points->pLdivmod = __aeabi_ldivmod;  // result returned in r2:r3
   points->pLmul = __aeabi_lmul;
   points->pLor = NULL;
   points->pLsub = NULL;
diff --git a/src/oat/runtime/mips/oat_support_entrypoints_mips.cc b/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
index e20332a..62b20f2 100644
--- a/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
+++ b/src/oat/runtime/mips/oat_support_entrypoints_mips.cc
@@ -218,6 +218,7 @@
   points->pF2l = F2L;
   points->pLadd = NULL;
   points->pLand = NULL;
+  points->pLdiv = NULL;
   points->pLdivmod = NULL;
   points->pLmul = NULL;
   points->pLor = NULL;
diff --git a/src/oat/runtime/oat_support_entrypoints.h b/src/oat/runtime/oat_support_entrypoints.h
index 0e59dd8..1a8e675 100644
--- a/src/oat/runtime/oat_support_entrypoints.h
+++ b/src/oat/runtime/oat_support_entrypoints.h
@@ -107,6 +107,7 @@
   int64_t (*pF2l)(float);
   int64_t (*pLadd)(int64_t, int64_t);
   int64_t (*pLand)(int64_t, int64_t);
+  int64_t (*pLdiv)(int64_t, int64_t);
   int64_t (*pLdivmod)(int64_t, int64_t);
   int64_t (*pLmul)(int64_t, int64_t);
   int64_t (*pLor)(int64_t, int64_t);
diff --git a/src/oat/runtime/support_math.cc b/src/oat/runtime/support_math.cc
index adef64a..133b857 100644
--- a/src/oat/runtime/support_math.cc
+++ b/src/oat/runtime/support_math.cc
@@ -94,4 +94,12 @@
   }
 }
 
+extern "C" int64_t artLdivFromCode(int64_t a, int64_t b) {
+  return a / b;
+}
+
+extern "C" int64_t artLdivmodFromCode(int64_t a, int64_t b) {
+  return a % b;
+}
+
 }  // namespace art
diff --git a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
index d2f97eb..dd139ee 100644
--- a/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
+++ b/src/oat/runtime/x86/oat_support_entrypoints_x86.cc
@@ -74,6 +74,8 @@
 extern int64_t F2L(float f);
 extern "C" int32_t art_idiv_from_code(int32_t, int32_t);
 extern "C" int32_t art_idivmod_from_code(int32_t, int32_t);
+extern "C" int64_t art_ldiv_from_code(int64_t, int64_t);
+extern "C" int64_t art_ldivmod_from_code(int64_t, int64_t);
 
 // Intrinsic entrypoints.
 extern "C" int32_t art_memcmp16(void*, void*, int32_t);
@@ -183,7 +185,8 @@
   points->pF2l = F2L;
   points->pLadd = NULL;
   points->pLand = NULL;
-  points->pLdivmod = NULL;
+  points->pLdiv = art_ldiv_from_code;
+  points->pLdivmod = art_ldivmod_from_code;
   points->pLmul = NULL;
   points->pLor = NULL;
   points->pLsub = NULL;
diff --git a/src/oat/runtime/x86/runtime_support_x86.S b/src/oat/runtime/x86/runtime_support_x86.S
index d8bf336..c6a3aad 100644
--- a/src/oat/runtime/x86/runtime_support_x86.S
+++ b/src/oat/runtime/x86/runtime_support_x86.S
@@ -130,8 +130,8 @@
     mov %esp, %ecx
     // Outgoing argument set up
     subl  LITERAL(8), %esp        // alignment padding
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     pushl %ecx                    // pass SP
+    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     call VAR(cxx_name, 1)         // cxx_name(Thread*, SP)
     int3                          // unreached
 END_MACRO
@@ -143,7 +143,7 @@
     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
     mov %esp, %ecx
     // Outgoing argument set up
-    pushl LITERAL(0)              // alignment padding
+    pushl %eax                    // alignment padding
     pushl %ecx                    // pass SP
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     pushl %eax                    // pass arg1
@@ -329,7 +329,7 @@
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     pushl %eax                    // pass arg1
     call SYMBOL(artLockObjectFromCode)    // (Object*, Thread*, SP)
-    addl 16, %esp        // pop arguments
+    addl LITERAL(16), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     ret
 
@@ -342,7 +342,7 @@
     pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
     pushl %eax                    // pass arg1
     call SYMBOL(artUnlockObjectFromCode)  // (Object*, Thread*, SP)
-    addl 16, %esp                 // pop arguments
+    addl LITERAL(16), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     testl %eax, %eax              // eax == 0 ?
     jnz 1f
@@ -359,9 +359,9 @@
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
     call SYMBOL(artHandleFillArrayDataFromCode)  // (Array* array, const uint16_t* table, Thread*, SP)
-    addl 16, %esp        // pop arguments
+    addl LITERAL(16), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
-    testl %eax, %eax               // eax == 0 ?
+    testl %eax, %eax              // eax == 0 ?
     jnz 1f
     ret
 1:
@@ -372,7 +372,7 @@
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b, Thread*, SP)
-    addl 12, %esp                 // pop arguments
+    addl LITERAL(12), %esp        // pop arguments
     ret
 
 DEFINE_FUNCTION art_memcpy
@@ -380,7 +380,7 @@
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
     call SYMBOL(memcpy)           // (void*, const void*, size_t)
-    addl 12, %esp                 // pop arguments
+    addl LITERAL(12), %esp        // pop arguments
     ret
 
 DEFINE_FUNCTION art_check_cast_from_code
@@ -392,7 +392,7 @@
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
     call SYMBOL(artCheckCastFromCode)  // (Class* a, Class* b, Thread*, SP)
-    addl 16, %esp                 // pop arguments
+    addl LITERAL(16), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     testl %eax, %eax              // eax == 0 ?
     jnz 1f
@@ -411,6 +411,26 @@
     movl %eax, %edx
     ret
 
+DEFINE_FUNCTION art_ldiv_from_code
+    addl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass arg4
+    pushl %edx                    // pass arg3
+    pushl %ecx                    // pass arg2
+    pushl %eax                    // pass arg1
+    call SYMBOL(artLdivFromCode)  // (jlong a, jlong b, Thread*, SP)
+    addl LITERAL(28), %esp        // pop arguments
+    ret
+
+DEFINE_FUNCTION art_ldivmod_from_code
+    addl LITERAL(12), %esp        // alignment padding
+    pushl %ebx                    // pass arg4
+    pushl %edx                    // pass arg3
+    pushl %ecx                    // pass arg2
+    pushl %eax                    // pass arg1
+    call SYMBOL(artLdivmodFromCode) // (jlong a, jlong b, Thread*, SP)
+    addl LITERAL(28), %esp        // pop arguments
+    ret
+
 DEFINE_FUNCTION art_can_put_array_element_from_code
     test %eax, %eax               // Null is trivially storable
     jz   1f
@@ -422,7 +442,7 @@
     pushl %ecx                    // pass arg2
     pushl %eax                    // pass arg1
     call SYMBOL(artCanPutArrayElementFromCode)  // (Object* element, Class* array_class, Thread*, SP)
-    addl 16, %esp                 // pop arguments
+    addl LITERAL(16), %esp        // pop arguments
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
     testl %eax, %eax              // eax == 0 ?
     jnz 2f