Merge "ART: Move __memcmp16 from Bionic to ART"
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 49cf71b..0b7272c 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -180,7 +180,7 @@
   EXPECT_EQ(80U, sizeof(OatHeader));
   EXPECT_EQ(8U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(79 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
+  EXPECT_EQ(78 * GetInstructionSetPointerSize(kRuntimeISA), sizeof(QuickEntryPoints));
 }
 
 TEST_F(OatTest, OatHeaderIsValid) {
diff --git a/runtime/Android.mk b/runtime/Android.mk
index 8d532c7..52b5f43 100644
--- a/runtime/Android.mk
+++ b/runtime/Android.mk
@@ -158,6 +158,7 @@
 
 LIBART_COMMON_SRC_FILES += \
 	arch/context.cc \
+	arch/memcmp16.cc \
 	arch/arm/registers_arm.cc \
 	arch/arm64/registers_arm64.cc \
 	arch/x86/registers_x86.cc \
@@ -209,6 +210,7 @@
 	arch/arm/context_arm.cc.arm \
 	arch/arm/entrypoints_init_arm.cc \
 	arch/arm/jni_entrypoints_arm.S \
+	arch/arm/memcmp16_arm.S \
 	arch/arm/portable_entrypoints_arm.S \
 	arch/arm/quick_entrypoints_arm.S \
 	arch/arm/arm_sdiv.S \
@@ -254,6 +256,7 @@
 	arch/mips/context_mips.cc \
 	arch/mips/entrypoints_init_mips.cc \
 	arch/mips/jni_entrypoints_mips.S \
+	arch/mips/memcmp16.S \
 	arch/mips/portable_entrypoints_mips.S \
 	arch/mips/quick_entrypoints_mips.S \
 	arch/mips/thread_mips.cc \
diff --git a/runtime/arch/arm/asm_support_arm.S b/runtime/arch/arm/asm_support_arm.S
index c4f68af..e1b0ce7 100644
--- a/runtime/arch/arm/asm_support_arm.S
+++ b/runtime/arch/arm/asm_support_arm.S
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
-#define ART_RUNTIME_ARCH_X86_ASM_SUPPORT_X86_S_
+#ifndef ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_ASM_SUPPORT_ARM_S_
 
 #include "asm_support_arm.h"
 
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 340a83e..ebceb63 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -102,7 +102,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -213,7 +212,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm/memcmp16_arm.S b/runtime/arch/arm/memcmp16_arm.S
new file mode 100644
index 0000000..3762194
--- /dev/null
+++ b/runtime/arch/arm/memcmp16_arm.S
@@ -0,0 +1,227 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+#define ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
+
+#include "asm_support_arm.S"
+
+/*
+ * Optimized memcmp16() for ARM9.
+ * This would not be optimal on XScale or ARM11, where more prefetching
+ * and use of pld will be needed.
+ * The 2 major optimzations here are
+ * (1) The main loop compares 16 bytes at a time
+ * (2) The loads are scheduled in a way they won't stall
+ */
+
+ARM_ENTRY __memcmp16
+        pld         [r0, #0]
+        pld         [r1, #0]
+
+        /* take of the case where length is nul or the buffers are the same */
+        cmp         r0, r1
+        cmpne       r2, #0
+        moveq       r0, #0
+        bxeq        lr
+
+        /* since r0 hold the result, move the first source
+         * pointer somewhere else
+         */
+
+        mov         r3, r0
+
+         /* make sure we have at least 12 words, this simplify things below
+          * and avoid some overhead for small blocks
+          */
+
+        cmp         r2, #12
+        bpl         0f
+
+        /* small blocks (less then 12 words) */
+        pld         [r0, #32]
+        pld         [r1, #32]
+
+1:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bxne        lr
+        subs        r2, r2, #1
+        bne         1b
+        bx          lr
+
+
+        /* save registers */
+0:      stmfd       sp!, {r4, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset lr, 4
+
+        /* align first pointer to word boundary */
+        tst         r3, #2
+        beq         0f
+
+        ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        sub         r2, r2, #1
+        subs        r0, r0, ip
+        /* restore registers and return */
+        ldmnefd     sp!, {r4, lr}
+        bxne        lr
+
+
+0:      /* here the first pointer is aligned, and we have at least 3 words
+         * to process.
+         */
+
+        /* see if the pointers are congruent */
+        eor         r0, r3, r1
+        ands        r0, r0, #2
+        bne         5f
+
+        /* congruent case, 16 half-words per iteration
+         * We need to make sure there are at least 16+2 words left
+         * because we effectively read ahead one long word, and we could
+         * read past the buffer (and segfault) if we're not careful.
+         */
+
+        ldr         ip, [r1]
+        subs        r2, r2, #(16 + 2)
+        bmi         1f
+
+0:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        ldr         r0, [r3], #4
+        ldr         lr, [r1, #4]!
+        eors        r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        ldreq       r0, [r3], #4
+        ldreq       lr, [r1, #4]!
+        eoreqs      r0, r0, ip
+        ldreq       r0, [r3], #4
+        ldreq       ip, [r1, #4]!
+        eoreqs      r0, r0, lr
+        bne         2f
+        subs        r2, r2, #16
+        bhs         0b
+
+        /* do we have at least 2 words left? */
+1:      adds        r2, r2, #(16 - 2 + 2)
+        bmi         4f
+
+        /* finish off 2 words at a time */
+3:      ldr         r0, [r3], #4
+        ldr         ip, [r1], #4
+        eors        r0, r0, ip
+        bne         2f
+        subs        r2, r2, #2
+        bhs         3b
+
+        /* are we done? */
+4:      adds        r2, r2, #2
+        bne         8f
+        /* restore registers and return */
+        mov         r0, #0
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+2:      /* the last 2 words are different, restart them */
+        ldrh        r0, [r3, #-4]
+        ldrh        ip, [r1, #-4]
+        subs        r0, r0, ip
+        ldreqh      r0, [r3, #-2]
+        ldreqh      ip, [r1, #-2]
+        subeqs      r0, r0, ip
+        /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+        /* process the last few words */
+8:      ldrh        r0, [r3], #2
+        ldrh        ip, [r1], #2
+        subs        r0, r0, ip
+        bne         9f
+        subs        r2, r2, #1
+        bne         8b
+
+9:      /* restore registers and return */
+        ldmfd       sp!, {r4, lr}
+        bx          lr
+
+
+5:      /*************** non-congruent case ***************/
+
+        /* align the unaligned pointer */
+        bic         r1, r1, #3
+        ldr         lr, [r1], #4
+        sub         r2, r2, #8
+
+6:
+        pld         [r3, #64]
+        pld         [r1, #64]
+        mov         ip, lr, lsr #16
+        ldr         lr, [r1], #4
+        ldr         r0, [r3], #4
+        orr         ip, ip, lr, lsl #16
+        eors        r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        moveq       ip, lr, lsr #16
+        ldreq       lr, [r1], #4
+        ldreq       r0, [r3], #4
+        orreq       ip, ip, lr, lsl #16
+        eoreqs      r0, r0, ip
+        bne         7f
+        subs        r2, r2, #8
+        bhs         6b
+        sub         r1, r1, #2
+        /* are we done? */
+        adds        r2, r2, #8
+        moveq       r0, #0
+        beq         9b
+        /* finish off the remaining bytes */
+        b           8b
+
+7:      /* fix up the 2 pointers and fallthrough... */
+        sub         r1, r1, #2
+        b           2b
+END __memcmp16
+
+
+#endif  // ART_RUNTIME_ARCH_ARM_MEMCMP16_ARM_S_
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 46e819e..84ee778 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -85,7 +85,6 @@
 extern "C" double fmod(double a, double b);         // REM_DOUBLE[_2ADDR]
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -199,7 +198,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9a877f6..6031e25 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1662,7 +1662,7 @@
      *    x1:   comp object pointer
      *
      */
-    .extern __memcmp16
+    .extern memcmp16_generic_static
 ENTRY art_quick_string_compareto
     mov    x2, x0         // x0 is return, use x2 for first input.
     sub    x0, x2, x1     // Same string object?
@@ -1758,7 +1758,7 @@
 
     mov x0, x2
     uxtw x2, w3
-    bl __memcmp16
+    bl memcmp16_generic_static
 
     ldr x1, [sp], #16            // Restore old x0 = length diff
 
diff --git a/runtime/arch/memcmp16.cc b/runtime/arch/memcmp16.cc
new file mode 100644
index 0000000..7928085
--- /dev/null
+++ b/runtime/arch/memcmp16.cc
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "memcmp16.h"
+
+// This linked against by assembly stubs, only.
+#pragma GCC diagnostic ignored "-Wunused-function"
+
+int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+#pragma GCC diagnostic warning "-Wunused-function"
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
new file mode 100644
index 0000000..ad58588
--- /dev/null
+++ b/runtime/arch/memcmp16.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MEMCMP16_H_
+#define ART_RUNTIME_ARCH_MEMCMP16_H_
+
+#include <cstddef>
+#include <cstdint>
+
+// memcmp16 support.
+//
+// This can either be optimized assembly code, in which case we expect a function __memcmp16,
+// or generic C support.
+//
+// In case of the generic support we declare two versions: one in this header file meant to be
+// inlined, and a static version that assembly stubs can link against.
+//
+// In both cases, MemCmp16 is declared.
+
+#if defined(__arm__) || defined(__mips)
+
+extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
+#define MemCmp16 __memcmp16
+
+#else
+
+// This is the generic inlined version.
+static inline int32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
+  for (size_t i = 0; i < count; i++) {
+    if (s0[i] != s1[i]) {
+      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
+    }
+  }
+  return 0;
+}
+
+extern "C" int32_t memcmp16_generic_static(const uint16_t* s0, const uint16_t* s1, size_t count);
+#endif
+
+#endif  // ART_RUNTIME_ARCH_MEMCMP16_H_
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 500a2eb..08caa80 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -103,7 +103,6 @@
 extern "C" uint64_t art_quick_ushr_long(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t __memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_indexof(void*, uint32_t, uint32_t, uint32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 
@@ -216,7 +215,6 @@
 
   // Intrinsics
   qpoints->pIndexOf = art_quick_indexof;
-  qpoints->pMemcmp16 = __memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = memcpy;
 
diff --git a/runtime/arch/mips/memcmp16_mips.S b/runtime/arch/mips/memcmp16_mips.S
new file mode 100644
index 0000000..571ad75
--- /dev/null
+++ b/runtime/arch/mips/memcmp16_mips.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+#define ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
+
+#include "asm_support_mips.h"
+
+// u4 __memcmp16(const u2*, const u2*, size_t);
+ENTRY(__memcmp16)
+  li  t0,0
+  li  t1,0
+  beqz  a2,done   /* 0 length string */
+  beq a0,a1,done    /* strings are identical */
+
+  /* Unoptimised... */
+1:  lhu t0,0(a0)
+  lhu t1,0(a1)
+  addu  a1,2
+  bne t0,t1,done
+  addu  a0,2
+  subu  a2,1
+  bnez  a2,1b
+
+done:
+  subu  v0,t0,t1
+  j ra
+END(__memcmp16)
+
+#endif  // ART_RUNTIME_ARCH_MIPS_MEMCMP16_MIPS_S_
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index c53fa1e..c30dca1 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -81,7 +81,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -194,7 +193,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = nullptr;  // Not needed on x86
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index aeda072..2612417 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -80,7 +80,6 @@
 extern "C" uint64_t art_quick_lushr(uint64_t, uint32_t);
 
 // Intrinsic entrypoints.
-extern "C" int32_t art_quick_memcmp16(void*, void*, int32_t);
 extern "C" int32_t art_quick_string_compareto(void*, void*);
 extern "C" void* art_quick_memcpy(void*, const void*, size_t);
 
@@ -193,7 +192,6 @@
 
   // Intrinsics
   // qpoints->pIndexOf = NULL;  // Not needed on x86.
-  qpoints->pMemcmp16 = art_quick_memcmp16;
   qpoints->pStringCompareTo = art_quick_string_compareto;
   qpoints->pMemcpy = art_quick_memcpy;
 
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 7bd1582..469d373 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -115,7 +115,6 @@
 
   // Intrinsics
   int32_t (*pIndexOf)(void*, uint32_t, uint32_t, uint32_t);
-  int32_t (*pMemcmp16)(void*, void*, int32_t);
   int32_t (*pStringCompareTo)(void*, void*);
   void* (*pMemcpy)(void*, const void*, size_t);
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index f33befb..0dd33cf 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -233,8 +233,7 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShlLong, pShrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pShrLong, pUshrLong, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUshrLong, pIndexOf, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pMemcmp16, kPointerSize);
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcmp16, pStringCompareTo, kPointerSize);
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pIndexOf, pStringCompareTo, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringCompareTo, pMemcpy, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pMemcpy, pQuickImtConflictTrampoline, kPointerSize);
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pQuickImtConflictTrampoline, pQuickResolutionTrampoline,
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 1d79106..5c57dce 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -16,6 +16,7 @@
 
 #include "string-inl.h"
 
+#include "arch/memcmp16.h"
 #include "array.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
@@ -206,21 +207,6 @@
   return result;
 }
 
-#ifdef HAVE__MEMCMP16
-// "count" is in 16-bit units.
-extern "C" uint32_t __memcmp16(const uint16_t* s0, const uint16_t* s1, size_t count);
-#define MemCmp16 __memcmp16
-#else
-static uint32_t MemCmp16(const uint16_t* s0, const uint16_t* s1, size_t count) {
-  for (size_t i = 0; i < count; i++) {
-    if (s0[i] != s1[i]) {
-      return static_cast<int32_t>(s0[i]) - static_cast<int32_t>(s1[i]);
-    }
-  }
-  return 0;
-}
-#endif
-
 int32_t String::CompareTo(String* rhs) {
   // Quick test for comparison of a string with itself.
   String* lhs = this;
@@ -233,13 +219,13 @@
   // *without* sign extension before it subtracts them (which makes some
   // sense since "char" is unsigned).  So what we get is the result of
   // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
-  int lhsCount = lhs->GetLength();
-  int rhsCount = rhs->GetLength();
-  int countDiff = lhsCount - rhsCount;
-  int minCount = (countDiff < 0) ? lhsCount : rhsCount;
+  int32_t lhsCount = lhs->GetLength();
+  int32_t rhsCount = rhs->GetLength();
+  int32_t countDiff = lhsCount - rhsCount;
+  int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
   const uint16_t* lhsChars = lhs->GetCharArray()->GetData() + lhs->GetOffset();
   const uint16_t* rhsChars = rhs->GetCharArray()->GetData() + rhs->GetOffset();
-  int otherRes = MemCmp16(lhsChars, rhsChars, minCount);
+  int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
   if (otherRes != 0) {
     return otherRes;
   }
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 96834b8..ecd1983 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -22,7 +22,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '3', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '4', '\0' };
 
 OatHeader::OatHeader() {
   memset(this, 0, sizeof(*this));
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 22f0e80..7f7b542 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -1862,7 +1862,6 @@
   QUICK_ENTRY_POINT_INFO(pShrLong)
   QUICK_ENTRY_POINT_INFO(pUshrLong)
   QUICK_ENTRY_POINT_INFO(pIndexOf)
-  QUICK_ENTRY_POINT_INFO(pMemcmp16)
   QUICK_ENTRY_POINT_INFO(pStringCompareTo)
   QUICK_ENTRY_POINT_INFO(pMemcpy)
   QUICK_ENTRY_POINT_INFO(pQuickImtConflictTrampoline)