JIT: Fix for 2813841, use core regs for sub-word data

In an attempt to avoid unnecessary register copies, the JIT allows
data items to live in either floating point or core registers until
an instruction is used which requires one or the other.  The bug here
was that sub-word data was allowed to live in floating point registers
at the point of a load or store.  This cl forces the use of core registers
in those cases.

Change-Id: I60c2a0d1df9a299f6c5130371f44f2be9c348ded
diff --git a/vm/compiler/codegen/arm/CodegenDriver.c b/vm/compiler/codegen/arm/CodegenDriver.c
index 3e5126a..03fc2af 100644
--- a/vm/compiler/codegen/arm/CodegenDriver.c
+++ b/vm/compiler/codegen/arm/CodegenDriver.c
@@ -285,10 +285,11 @@
                     int fieldOffset)
 {
     RegLocation rlResult;
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlDest = dvmCompilerGetDest(cUnit, mir, 0);
     rlObj = loadValue(cUnit, rlObj, kCoreReg);
-    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
     genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
                  NULL);/* null object? */
 
@@ -307,10 +308,11 @@
 static void genIPut(CompilationUnit *cUnit, MIR *mir, OpSize size,
                     int fieldOffset)
 {
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
     RegLocation rlObj = dvmCompilerGetSrc(cUnit, mir, 1);
     rlObj = loadValue(cUnit, rlObj, kCoreReg);
-    rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+    rlSrc = loadValue(cUnit, rlSrc, regClass);
     genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
                  NULL);/* null object? */
 
@@ -327,6 +329,7 @@
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlDest, int scale)
 {
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = offsetof(ArrayObject, length);
     int dataOffset = offsetof(ArrayObject, contents);
     RegLocation rlResult;
@@ -366,7 +369,7 @@
         } else {
             opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
         }
-        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
 
         HEAP_ACCESS_SHADOW(true);
         loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
@@ -375,7 +378,7 @@
         dvmCompilerFreeTemp(cUnit, regPtr);
         storeValueWide(cUnit, rlDest, rlResult);
     } else {
-        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kAnyReg, true);
+        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, regClass, true);
 
         HEAP_ACCESS_SHADOW(true);
         loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
@@ -395,6 +398,7 @@
                         RegLocation rlArray, RegLocation rlIndex,
                         RegLocation rlSrc, int scale)
 {
+    RegisterClass regClass = dvmCompilerRegClassBySize(size);
     int lenOffset = offsetof(ArrayObject, length);
     int dataOffset = offsetof(ArrayObject, contents);
 
@@ -443,7 +447,7 @@
         } else {
             opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
         }
-        rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
+        rlSrc = loadValueWide(cUnit, rlSrc, regClass);
 
         HEAP_ACCESS_SHADOW(true);
         storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
@@ -451,7 +455,7 @@
 
         dvmCompilerFreeTemp(cUnit, regPtr);
     } else {
-        rlSrc = loadValue(cUnit, rlSrc, kAnyReg);
+        rlSrc = loadValue(cUnit, rlSrc, regClass);
 
         HEAP_ACCESS_SHADOW(true);
         storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
diff --git a/vm/compiler/codegen/arm/Ralloc.h b/vm/compiler/codegen/arm/Ralloc.h
index 6c7dfaa..cc3e605 100644
--- a/vm/compiler/codegen/arm/Ralloc.h
+++ b/vm/compiler/codegen/arm/Ralloc.h
@@ -27,6 +27,20 @@
 #include "compiler/Dataflow.h"
 #include "compiler/codegen/arm/ArmLIR.h"
 
+/*
+ * Return most flexible allowed register class based on size.
+ * Bug: 2813841
+ * Must use a core register for data types narrower than word (due
+ * to possible unaligned load/store.
+ */
+static inline RegisterClass dvmCompilerRegClassBySize(OpSize size)
+{
+    return (size == kUnsignedHalf ||
+            size == kSignedHalf ||
+            size == kUnsignedByte ||
+            size == kSignedByte ) ? kCoreReg : kAnyReg;
+}
+
 static inline int dvmCompilerS2VReg(CompilationUnit *cUnit, int sReg)
 {
     assert(sReg != INVALID_SREG);
diff --git a/vm/compiler/codegen/arm/Thumb2/Factory.c b/vm/compiler/codegen/arm/Thumb2/Factory.c
index 3f1755e..c7b52fd 100644
--- a/vm/compiler/codegen/arm/Thumb2/Factory.c
+++ b/vm/compiler/codegen/arm/Thumb2/Factory.c
@@ -748,15 +748,9 @@
 
     if (FPREG(rSrc)) {
         assert(SINGLEREG(rSrc));
-        if ((size != kWord) && (size != kSingle)) {
-           /* Move float value into core register */
-           int tReg = dvmCompilerAllocTemp(cUnit);
-           dvmCompilerRegCopy(cUnit, tReg, rSrc);
-           rSrc = tReg;
-        } else {
-            opCode = kThumb2Vstrs;
-            size = kSingle;
-        }
+        assert((size == kWord) || (size == kSingle));
+        opCode = kThumb2Vstrs;
+        size = kSingle;
     } else {
         if (size == kSingle)
             size = kWord;
diff --git a/vm/compiler/codegen/arm/Thumb2/Ralloc.c b/vm/compiler/codegen/arm/Thumb2/Ralloc.c
index bfd7f3f..6adfd62 100644
--- a/vm/compiler/codegen/arm/Thumb2/Ralloc.c
+++ b/vm/compiler/codegen/arm/Thumb2/Ralloc.c
@@ -22,6 +22,9 @@
  *
  */
 
+/* Stress mode for testing: if defined will reverse corereg/floatreg hint */
+//#define REGCLASS_STRESS_MODE
+
 /*
  * Alloc a pair of core registers, or a double.  Low reg in low byte,
  * high reg in next byte.
@@ -32,6 +35,11 @@
     int highReg;
     int lowReg;
     int res = 0;
+
+#if defined(REGCLASS_STRESS_MODE)
+    fpHint = !fpHint;
+#endif
+
     if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg)) {
         lowReg = dvmCompilerAllocTempDouble(cUnit);
         highReg = lowReg + 1;
@@ -46,6 +54,9 @@
 int dvmCompilerAllocTypedTemp(CompilationUnit *cUnit, bool fpHint,
                                      int regClass)
 {
+#if defined(REGCLASS_STRESS_MODE)
+    fpHint = !fpHint;
+#endif
     if (((regClass == kAnyReg) && fpHint) || (regClass == kFPReg))
         return dvmCompilerAllocTempFloat(cUnit);
     return dvmCompilerAllocTemp(cUnit);