Added support for 64-bit shifts, fix const-wide

Change-Id: I4823056d83652ecc7d3e391e905d480d73fab718
diff --git a/build/Android.common.mk b/build/Android.common.mk
index f13fdd0..9e78a96 100644
--- a/build/Android.common.mk
+++ b/build/Android.common.mk
@@ -71,7 +71,8 @@
 	src/thread.cc \
 	src/utf.cc \
 	src/utils.cc \
-	src/zip_archive.cc
+	src/zip_archive.cc \
+	src/runtime_support.S
 
 LIBART_TARGET_SRC_FILES := \
 	$(LIBART_COMMON_SRC_FILES) \
diff --git a/src/compiler/codegen/arm/MethodCodegenDriver.cc b/src/compiler/codegen/arm/MethodCodegenDriver.cc
index fd1b8a7..483d7a7 100644
--- a/src/compiler/codegen/arm/MethodCodegenDriver.cc
+++ b/src/compiler/codegen/arm/MethodCodegenDriver.cc
@@ -897,7 +897,8 @@
         case OP_CONST_WIDE:
             rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
             loadConstantValueWide(cUnit, rlResult.lowReg, rlResult.highReg,
-                                  0, mir->dalvikInsn.vB);
+                          mir->dalvikInsn.vB_wide & 0xffffffff,
+                          (mir->dalvikInsn.vB_wide >> 32) & 0xffffffff);
             storeValueWide(cUnit, rlDest, rlResult);
             break;
 
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index d8ecc84..4a2bdb3 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -1324,34 +1324,32 @@
                            RegLocation rlDest, RegLocation rlSrc1,
                            RegLocation rlShift)
 {
-    /*
-     * Don't mess with the regsiters here as there is a particular calling
-     * convention to the out-of-line handler.
-     */
-    RegLocation rlResult;
+    int funcOffset;
 
-    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
-    loadValueDirect(cUnit, rlShift, r2);
     switch( mir->dalvikInsn.opcode) {
         case OP_SHL_LONG:
         case OP_SHL_LONG_2ADDR:
-            UNIMPLEMENTED(FATAL) << "Need SHL_LONG helper";
-            //genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG);
+            funcOffset = OFFSETOF_MEMBER(Thread, pShlLong);
             break;
         case OP_SHR_LONG:
         case OP_SHR_LONG_2ADDR:
-            UNIMPLEMENTED(FATAL) << "Need SHR_LONG helper";
-            //genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG);
+            funcOffset = OFFSETOF_MEMBER(Thread, pShrLong);
             break;
         case OP_USHR_LONG:
         case OP_USHR_LONG_2ADDR:
-            UNIMPLEMENTED(FATAL) << "Need USHR_LONG helper";
-            //genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG);
+            funcOffset = OFFSETOF_MEMBER(Thread, pUshrLong);
             break;
         default:
+            LOG(FATAL) << "Unexpected case";
             return true;
     }
-    rlResult = oatGetReturnWide(cUnit);
+    oatFlushAllRegs(cUnit);   /* Send everything to home location */
+    loadWordDisp(cUnit, rSELF, funcOffset, rLR);
+    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
+    loadValueDirect(cUnit, rlShift, r2);
+    opReg(cUnit, kOpBlx, rLR);
+    oatClobberCallRegs(cUnit);
+    RegLocation rlResult = oatGetReturnWide(cUnit);
     storeValueWide(cUnit, rlDest, rlResult);
     return false;
 }
diff --git a/src/compiler_test.cc b/src/compiler_test.cc
index 004a4a8..7b7f294 100644
--- a/src/compiler_test.cc
+++ b/src/compiler_test.cc
@@ -81,7 +81,6 @@
 }
 #endif
 
-#if 0 // Fails, needs 64-bit shift helper functions
 TEST_F(CompilerTest, ShiftTest2) {
   scoped_ptr<DexFile> dex_file(OpenDexFileBase64(kIntMathDex,
                                "kIntMathDex"));
@@ -101,7 +100,6 @@
 
   ASSERT_EQ(0, result);
 }
-#endif
 
 TEST_F(CompilerTest, UnsignedShiftTest) {
   scoped_ptr<DexFile> dex_file(OpenDexFileBase64(kIntMathDex,
diff --git a/src/runtime_support.S b/src/runtime_support.S
new file mode 100644
index 0000000..46c2dd3
--- /dev/null
+++ b/src/runtime_support.S
@@ -0,0 +1,72 @@
+#if defined(__arm__)
+
+    .balign 4
+    .global art_shl_long
+art_shl_long:
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     * On entry:
+     *   r0: low word
+     *   r1: high word
+     *   r2: shift count
+     */
+    /* shl-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r2<- r2 & 0x3f
+    mov     r1, r1, asl r2              @  r1<- r1 << r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
+    mov     r0, r0, asl r2              @  r0<- r0 << r2
+    bx      lr
+
+    .balign 4
+    .global art_shr_long
+art_shr_long:
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     * On entry:
+     *   r0: low word
+     *   r1: high word
+     *   r2: shift count
+     */
+    /* shr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
+    mov     r1, r1, asr r2              @  r1<- r1 >> r2
+    bx      lr
+
+    .balign 4
+    .global art_ushr_long
+art_ushr_long:
+    /*
+     * Long integer shift.  This is different from the generic 32/64-bit
+     * binary operations because vAA/vBB are 64-bit but vCC (the shift
+     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
+     * 6 bits.
+     * On entry:
+     *   r0: low word
+     *   r1: high word
+     *   r2: shift count
+     */
+    /* ushr-long vAA, vBB, vCC */
+    and     r2, r2, #63                 @ r0<- r0 & 0x3f
+    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
+    rsb     r3, r2, #32                 @  r3<- 32 - r2
+    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
+    subs    ip, r2, #32                 @  ip<- r2 - 32
+    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
+    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
+    bx      lr
+
+#endif
diff --git a/src/runtime_support.h b/src/runtime_support.h
new file mode 100644
index 0000000..02b176b
--- /dev/null
+++ b/src/runtime_support.h
@@ -0,0 +1,12 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+
+#ifndef ART_SRC_RUNTIME_SUPPORT_H_
+#define ART_SRC_RUNTIME_SUPPORT_H_
+
+#if defined(__arm__)
+  extern "C" uint64_t art_shl_long(uint64_t, uint32_t);
+  extern "C" uint64_t art_shr_long(uint64_t, uint32_t);
+  extern "C" uint64_t art_ushr_long(uint64_t, uint32_t);
+#endif
+
+#endif  // ART_SRC_RUNTIME_SUPPORT_H_
diff --git a/src/thread.cc b/src/thread.cc
index 409db2a..f37dcba 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -13,12 +13,18 @@
 #include "object.h"
 #include "runtime.h"
 #include "utils.h"
+#include "runtime_support.h"
 
 namespace art {
 
 pthread_key_t Thread::pthread_key_self_;
 
 void Thread::InitFunctionPointers() {
+#if defined(__arm__)
+  pShlLong = art_shl_long;
+  pShrLong = art_shr_long;
+  pUshrLong = art_ushr_long;
+#endif
   pArtAllocArrayByClass = Array::Alloc;
   pMemcpy = memcpy;
 #if 0
diff --git a/src/thread.h b/src/thread.h
index bb97d4e..68b6cbf 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -170,6 +170,9 @@
 
   // Runtime support function pointers
   void* (*pMemcpy)(void*, const void*, size_t);
+  uint64_t (*pShlLong)(uint64_t, uint32_t);
+  uint64_t (*pShrLong)(uint64_t, uint32_t);
+  uint64_t (*pUshrLong)(uint64_t, uint32_t);
   float (*pI2f)(int);
   int (*pF2iz)(float);
   float (*pD2f)(double);