Inline Sqrt bug fix; add support for fp/gen register copies
diff --git a/vm/InlineNative.c b/vm/InlineNative.c
index fd28708..ec8a1fb 100644
--- a/vm/InlineNative.c
+++ b/vm/InlineNative.c
@@ -633,8 +633,11 @@
  * pointer field.
  *
  * IMPORTANT: you must update DALVIK_VM_BUILD in DalvikVersion.h if you make
- * changes to this table.  Must also be kept in sync with NativeInlineOps
- * enum in InlineNative.h.
+ * changes to this table.
+ *
+ * NOTE: If present, the JIT will also need to know about changes
+ * to this table.  Update the NativeInlineOps enum in InlineNative.h and
+ * the dispatch code in compiler/codegen/<target>/Codegen.c.
  */
 const InlineOperation gDvmInlineOpsTable[] = {
     { org_apache_harmony_dalvik_NativeTestTarget_emptyInlineMethod,
@@ -782,4 +785,3 @@
 #endif
     return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult);
 }
-
diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h
index 87978d8..be793d6 100644
--- a/vm/compiler/codegen/arm/ArmLIR.h
+++ b/vm/compiler/codegen/arm/ArmLIR.h
@@ -481,14 +481,27 @@
                                    rd[11..8] imm8 */
     THUMB2_IT,            /* it [10111111] firstcond[7-4] mask[3-0] */
     THUMB2_FMSTAT,        /* fmstat [11101110111100011111101000010000] */
-    THUMB2_VCMPED,        /* vcmpe [111011101] D [11011] rd[15-12] [1011]
+    THUMB2_VCMPD,         /* vcmp [111011101] D [11011] rd[15-12] [1011]
                                    E [1] M [0] rm[3-0] */
-    THUMB2_VCMPES,        /* vcmpe [111011101] D [11010] rd[15-12] [1011]
+    THUMB2_VCMPS,         /* vcmp [111011101] D [11010] rd[15-12] [1011]
                                    E [1] M [0] rm[3-0] */
     THUMB2_LDR_PC_REL12,  /* ldr rd,[pc,#imm12] [1111100011011111] rt[15-12]
                                      imm12[11-0] */
     THUMB2_B_COND,        /* b<c> [1110] S cond[25-22] imm6[21-16] [10]
                                   J1 [0] J2 imm11[10..0] */
+    THUMB2_VMOVD_RR,      /* vmov [111011101] D [110000] vd[15-12 [101101]
+                                  M [0] vm[3-0] */
+    THUMB2_VMOVS_RR,      /* vmov [111011101] D [110000] vd[15-12 [101001]
+                                  M [0] vm[3-0] */
+    THUMB2_FMRS,          /* vmov [111011100000] vn[19-16] rt[15-12] [1010]
+                                  N [0010000] */
+    THUMB2_FMSR,          /* vmov [111011100001] vn[19-16] rt[15-12] [1010]
+                                  N [0010000] */
+    THUMB2_FMRRD,         /* vmov [111011000100] rt2[19-16] rt[15-12]
+                                  [101100] M [1] vm[3-0] */
+    THUMB2_FMDRR,         /* vmov [111011000101] rt2[19-16] rt[15-12]
+                                  [101100] M [1] vm[3-0] */
+
     ARM_LAST,
 } ArmOpCode;
 
diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c
index b140457..77cbb4d 100644
--- a/vm/compiler/codegen/arm/Assemble.c
+++ b/vm/compiler/codegen/arm/Assemble.c
@@ -414,7 +414,7 @@
     ENCODING_MAP(THUMB2_VDIVD,        0xee800b00,
                  DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
-                 "vdivs", "!0S, !1S, !2S", 2),
+                 "vdivd", "!0S, !1S, !2S", 2),
     ENCODING_MAP(THUMB2_VCVTIF,       0xeeb80ac0,
                  SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
@@ -490,11 +490,11 @@
     ENCODING_MAP(THUMB2_VMOVS,       0xeeb00a40,
                  SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
-                 "vmov.f32 ", "!0s, !1s", 2),
+                 "vmov.f32 ", " !0s, !1s", 2),
     ENCODING_MAP(THUMB2_VMOVD,       0xeeb00b40,
                  DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST,
-                 "vmov.f64 ", "!0s, !1s", 2),
+                 "vmov.f64 ", " !0S, !1S", 2),
     ENCODING_MAP(THUMB2_LDMIA,         0xe8900000,
                  BITBLT, 19, 16, BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
@@ -723,22 +723,47 @@
                  UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1, UNUSED, -1, -1,
                  NO_OPERAND | SETS_CCODES,
                  "fmstat", "", 2),
-    ENCODING_MAP(THUMB2_VCMPED,        0xeeb40bc0,
+    ENCODING_MAP(THUMB2_VCMPD,        0xeeb40b40,
                  DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP,
-                 "vcmpe.f64", "!0S, !1S", 2),
-    ENCODING_MAP(THUMB2_VCMPES,        0xeeb40ac0,
+                 "vcmp.f64", "!0S, !1S", 2),
+    ENCODING_MAP(THUMB2_VCMPS,        0xeeb40a40,
                  SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_BINARY_OP,
-                 "vcmpe.f32", "!0s, !1s", 2),
+                 "vcmp.f32", "!0s, !1s", 2),
     ENCODING_MAP(THUMB2_LDR_PC_REL12,       0xf8df0000,
                  BITBLT, 15, 12, BITBLT, 11, 0, UNUSED, -1, -1, UNUSED, -1, -1,
                  IS_TERTIARY_OP | CLOBBER_DEST,
                  "ldr", "r!0d,[rpc, #!1d", 2),
     ENCODING_MAP(THUMB2_B_COND,        0xf0008000,
-                 BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1, UNUSED, -1, -1,
+                 BROFFSET, -1, -1, BITBLT, 25, 22, UNUSED, -1, -1,
+                 UNUSED, -1, -1,
                  IS_BINARY_OP | IS_BRANCH | USES_CCODES,
                  "b!1c", "!0t", 2),
+    ENCODING_MAP(THUMB2_VMOVD_RR,       0xeeb00b40,
+                 DFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST,
+                 "vmov.f64", "!0S, !1S", 2),
+    ENCODING_MAP(THUMB2_VMOVD_RR,       0xeeb00a40,
+                 SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST,
+                 "vmov.f32", "!0S, !1S", 2),
+    ENCODING_MAP(THUMB2_FMRS,       0xee100a10,
+                 BITBLT, 15, 12, SFP, 8, 16, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST,
+                 "fmrs", "r!0d, !1s", 2),
+    ENCODING_MAP(THUMB2_FMSR,       0xee000a10,
+                 SFP, 8, 16, BITBLT, 15, 12, UNUSED, -1, -1, UNUSED, -1, -1,
+                 IS_BINARY_OP | CLOBBER_DEST,
+                 "fmsr", "!0s, r!1d", 2),
+    ENCODING_MAP(THUMB2_FMRRD,       0xec500b10,
+                 BITBLT, 15, 12, BITBLT, 19, 16, DFP, 5, 0, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST | CLOBBER_SRC1,
+                 "fmrrd", "r!0d, r!1d, !2S", 2),
+    ENCODING_MAP(THUMB2_FMDRR,       0xec400b10,
+                 DFP, 5, 0, BITBLT, 15, 12, BITBLT, 19, 16, UNUSED, -1, -1,
+                 IS_TERTIARY_OP | CLOBBER_DEST,
+                 "fmdrr", "!0S, r!1d, r!2d", 2),
 };
 
 
@@ -812,7 +837,8 @@
             } else if (delta > 1020) {
                 return true;
             }
-            lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ? delta : delta >> 2;
+            lir->operands[1] = (lir->opCode == THUMB2_LDR_PC_REL12) ?
+                                delta : delta >> 2;
         } else if (lir->opCode == THUMB2_CBNZ || lir->opCode == THUMB2_CBZ) {
             ArmLIR *targetLIR = (ArmLIR *) lir->generic.target;
             intptr_t pc = lir->generic.offset + 4;
@@ -820,8 +846,8 @@
             int delta = target - pc;
             if (delta > 126 || delta < 0) {
                 /*
-                 * TODO: allow multiple kinds of assembler failure to allow us to
-                 * change code patterns when things don't fit.
+                 * TODO: allow multiple kinds of assembler failure to allow
+                 * change of code patterns when things don't fit.
                  */
                 return true;
             } else {
diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c
index 559cf0d..806bd02 100644
--- a/vm/compiler/codegen/arm/Thumb2Util.c
+++ b/vm/compiler/codegen/arm/Thumb2Util.c
@@ -1232,7 +1232,11 @@
     int vDest = inlinedTarget(mir);
     // TUNING: handle case of src already in FP reg
     if (vDest >= 0) {
-        if (vDest == vSrc) {
+        /*
+         * FIXME: disable this case to to work around bug until after
+         * new schedule/ralloc mechanisms are done.
+         */
+        if (0 && (vDest == vSrc)) {
             loadValue(cUnit, vSrc+1, ophi);
             opRegRegImm(cUnit, OP_AND, ophi, ophi, 0x7fffffff, signMask);
             storeValue(cUnit, ophi, vDest + 1, signMask);
diff --git a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
index 732172a..41a79de 100644
--- a/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv5te-vfp/ArchVariant.c
@@ -116,11 +116,12 @@
 {
     int offset = offsetof(InterpState, retval);
     OpCode opCode = mir->dalvikInsn.opCode;
-    int vSrc = mir->dalvikInsn.vA;
+    int vSrc = mir->dalvikInsn.arg[0];
     loadValueAddress(cUnit, vSrc, r2);
     genDispatchToHandler(cUnit, TEMPLATE_SQRT_DOUBLE_VFP);
     newLIR3(cUnit, THUMB_STR_RRI5, r0, rGLUE, offset >> 2);
     newLIR3(cUnit, THUMB_STR_RRI5, r1, rGLUE, (offset >> 2) + 1);
+    resetRegisterScoreboard(cUnit);
     return false;
 }
 
diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
index 39df8c4..65e0ec0 100644
--- a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
+++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <math.h>  // for double sqrt(double)
+
 
 /*
  * This file is included by Codegen-armv5te-vfp.c, and implements architecture
@@ -116,14 +118,26 @@
 static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir)
 {
     int offset = offsetof(InterpState, retval);
-    int vSrc = mir->dalvikInsn.vA;
+    int vSrc = mir->dalvikInsn.arg[0];
     int vDest = inlinedTarget(mir);
+    ArmLIR *branch;
+    ArmLIR *target;
+
     loadDouble(cUnit, vSrc, dr1);
     newLIR2(cUnit, THUMB2_VSQRTD, dr0, dr1);
+    newLIR2(cUnit, THUMB2_VCMPD, dr0, dr0);
+    newLIR0(cUnit, THUMB2_FMSTAT);
+    branch = newLIR2(cUnit, THUMB_B_COND, 0, ARM_COND_EQ);
+    loadConstant(cUnit, r2, (int)sqrt);
+    newLIR3(cUnit, THUMB2_FMRRD, r0, r1, dr1);
+    newLIR1(cUnit, THUMB_BLX_R, r2);
+    newLIR3(cUnit, THUMB2_FMDRR, dr0, r0, r1);
     if (vDest >= 0)
-        storeDouble(cUnit, dr0, vDest, rNone);
+        target = storeDouble(cUnit, dr0, vDest, rNone);
     else
-        newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
+        target = newLIR3(cUnit, THUMB2_VSTRD, dr0, rGLUE, offset >> 2);
+    branch->generic.target = (LIR *)target;
+    resetRegisterScoreboard(cUnit);
     return true;
 }
 
@@ -304,13 +318,13 @@
         loadDouble(cUnit, vSrc2, dr1);
         // Hard-coded use of r7 as temp.  Revisit
         loadConstant(cUnit,r7, defaultResult);
-        newLIR2(cUnit, THUMB2_VCMPED, dr0, dr1);
+        newLIR2(cUnit, THUMB2_VCMPD, dr0, dr1);
     } else {
         loadFloat(cUnit, vSrc1, fr0);
         loadFloat(cUnit, vSrc2, fr2);
         // Hard-coded use of r7 as temp.  Revisit
         loadConstant(cUnit,r7, defaultResult);
-        newLIR2(cUnit, THUMB2_VCMPES, fr0, fr2);
+        newLIR2(cUnit, THUMB2_VCMPS, fr0, fr2);
     }
     newLIR0(cUnit, THUMB2_FMSTAT);
     genIT(cUnit, (defaultResult == -1) ? ARM_COND_GT : ARM_COND_MI, "");
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
index 3801f49..1b143a9 100644
--- a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S
@@ -25,7 +25,7 @@
     /* op vAA, vBB, vCC */
     fldd    d0, [r0]                    @ d0<- vBB
     fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    fcmpd  d0, d1                       @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
index 1faafa1..0510ef6 100644
--- a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S
@@ -24,7 +24,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
diff --git a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
index 014f160..bdb42d6 100644
--- a/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
+++ b/vm/compiler/template/armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S
@@ -24,7 +24,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mvn     r0, #0                      @ r0<- -1 (default)
     fmstat                              @ export status flags
     movgt   r0, #1                      @ (greater than) r0<- 1
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
index fcea772..7b1d6aa 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv5te-vfp.S
@@ -876,7 +876,7 @@
     /* op vAA, vBB, vCC */
     fldd    d0, [r0]                    @ d0<- vBB
     fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    fcmpd  d0, d1                       @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
@@ -945,7 +945,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
@@ -979,7 +979,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mvn     r0, #0                      @ r0<- -1 (default)
     fmstat                              @ export status flags
     movgt   r0, #1                      @ (greater than) r0<- 1
diff --git a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
index c5b1a52..854871b 100644
--- a/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
+++ b/vm/compiler/template/out/CompilerTemplateAsm-armv7-a.S
@@ -876,7 +876,7 @@
     /* op vAA, vBB, vCC */
     fldd    d0, [r0]                    @ d0<- vBB
     fldd    d1, [r1]                    @ d1<- vCC
-    fcmped  d0, d1                      @ compare (vBB, vCC)
+    fcmpd  d0, d1                       @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
@@ -945,7 +945,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mov     r0, #1                      @ r0<- 1 (default)
     fmstat                              @ export status flags
     mvnmi   r0, #0                      @ (less than) r0<- -1
@@ -979,7 +979,7 @@
     /* op vAA, vBB, vCC */
     flds    s0, [r0]                    @ d0<- vBB
     flds    s1, [r1]                    @ d1<- vCC
-    fcmpes  s0, s1                      @ compare (vBB, vCC)
+    fcmps  s0, s1                      @ compare (vBB, vCC)
     mvn     r0, #0                      @ r0<- -1 (default)
     fmstat                              @ export status flags
     movgt   r0, #1                      @ (greater than) r0<- 1