JIT: Fix for 2813841, use core regs for sub-word data

In an attempt to avoid unnecessary register copies, the JIT allows
data items to live in either floating point or core registers until
an instruction is used which requires one or the other.  The bug here
was that sub-word data was allowed to live in floating point registers
at the point of a load or store.  This cl forces the use of core registers
in those cases.

Change-Id: Iaee57545c6a62990186a5d0ab5bb22728d75dd60
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index e9f00dd..c84128e 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -289,11 +289,11 @@
 {
     int regPtr;
     RegLocation rlResult;
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0);
     rlObj = loadValue(cUnit, rlObj, kCoreReg);
-    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
     genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
                  NULL);/* null object? */
 
@@ -312,12 +312,11 @@
 static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
                     int fieldOffset)
 {
-    DecodedInstruction *dInsn = &mir->dalvikInsn;
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 1);
     rlObj = loadValue(cUnit, rlObj, kCoreReg);
-    rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
-    int regPtr;
+    rlSrc = loadValue(cUnit, rlSrc, regClass);
     genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
                  NULL);/* null object? */
 
@@ -334,6 +333,7 @@
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlDest, int scale)
 {
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = offsetof(ArrayObject, length);
     int dataOffset = offsetof(ArrayObject, contents);
     RegLocation rlResult;
@@ -373,7 +373,7 @@
         } else {
             opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
         }
-        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
 
         HEAP_ACCESS_SHADOW(true);
         loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
@@ -382,7 +382,7 @@
         dvmCompilerFreeTemp(cUnit, regPtr);
         storeValueWide(cUnit, rlDest, rlResult);
     } else {
-        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
 
         HEAP_ACCESS_SHADOW(true);
         loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
@@ -402,6 +402,7 @@
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlSrc, int scale)
 {
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = offsetof(ArrayObject, length);
     int dataOffset = offsetof(ArrayObject, contents);
 
@@ -450,7 +451,7 @@
         } else {
             opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
         }
-        rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
+        rlSrc = loadValueWide(cUnit, rlSrc, regClass);
 
         HEAP_ACCESS_SHADOW(true);
         storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
@@ -458,7 +459,7 @@
 
         dvmCompilerFreeTemp(cUnit, regPtr);
     } else {
-        rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+        rlSrc = loadValue(cUnit, rlSrc, regClass);
 
         HEAP_ACCESS_SHADOW(true);
         storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
diff --git a/vm/compiler/codegen/arm/Ralloc.h b/vm/compiler/codegen/arm/Ralloc.h
index 6c7dfaa..cc3e605 100644
--- a/vm/compiler/codegen/arm/Ralloc.h
+++ b/vm/compiler/codegen/arm/Ralloc.h
@@ -27,6 +27,20 @@
 #include "compiler/Dataflow.h"
 #include "compiler/codegen/arm/ArmLIR.h"
 
+/*
+ * Return most flexible allowed register class based on size.
+ * Bug: 2813841
+ * Must use a core register for data types narrower than word (due
+ * to possible unaligned load/store.
+ */
+static inline RegisterClass dvmCompilerRegClassBySize(OpSize size)
+{
+    return (size == kUnsignedHalf ||
+            size == kSignedHalf ||
+            size == kUnsignedByte ||
+            size == kSignedByte ) ? kCoreReg : kAnyReg;
+}
+
 static inline int dvmCompilerS2VReg(CompilationUnit *cUnit, int sReg)
 {
     assert(sReg != INVALID_SREG);
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c
index 360b2c1..0141a0f 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.c
@@ -751,15 +751,9 @@
 
     if (FPREG(rSrc)) {
         assert(SINGLEREG(rSrc));
-        if ((size != kWord) && (size != kSingle)) {
-           /* Move float value into core register */
-           int tReg = dvmCompilerAllocTemp(cUnit);
-           dvmCompilerRegCopy(cUnit, tReg, rSrc);
-           rSrc = tReg;
-        } else {
-            opCode = kThumb2Vstrs;
-            size = kSingle;
-        }
+        assert((size == kWord) || (size == kSingle));
+        opCode = kThumb2Vstrs;
+        size = kSingle;
     } else {
         if (size == kSingle)
             size = kWord;
diff --git a/vm/compiler/codegen/arm/Thumb2/Ralloc.c b/vm/compiler/codegen/arm/Thumb2/Ralloc.c
index bfd7f3f..6adfd62 100644
--- a/vm/compiler/codegen/arm/Thumb2/Ralloc.c
+++ b/vm/compiler/codegen/arm/Thumb2/Ralloc.c
@@ -22,6 +22,9 @@
  *
  */
 
+/* Stress mode for testing: if defined will reverse corereg/floatreg hint */
+//#define REGCLASS_STRESS_MODE
+
 /*
  * Alloc a pair of core registers, or a double.  Low reg in low byte,
  * high reg in next byte.
@@ -32,6 +35,11 @@
     int highReg;
     int lowReg;
     int res = 0;
+
+#if defined(REGCLASS_STRESS_MODE)
+    fpHint = !fpHint;
+#endif
+
     if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg)) {
         lowReg = dvmCompilerAllocTempDouble(cUnit);
         highReg = lowReg + 1;
@@ -46,6 +54,9 @@
 int dvmCompilerAllocTypedTemp(CompilationUnit *cUnit, bool fpHint,
                                      int regClass)
 {
+#if defined(REGCLASS_STRESS_MODE)
+    fpHint = !fpHint;
+#endif
     if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg))
         return dvmCompilerAllocTempFloat(cUnit);
     return dvmCompilerAllocTemp(cUnit);