Merge "Remove forced alignment code."
diff --git a/NativeCode.mk b/NativeCode.mk
index 1449e30..910527c 100644
--- a/NativeCode.mk
+++ b/NativeCode.mk
@@ -106,6 +106,37 @@
 
 endif # LIBCORE_SKIP_TESTS
 
+# Set of gtest unit tests.
+include $(CLEAR_VARS)
+LOCAL_CFLAGS += $(core_cflags)
+LOCAL_CPPFLAGS += $(core_cppflags)
+LOCAL_SRC_FILES += \
+  luni/src/test/native/libcore_io_Memory_test.cpp \
+
+LOCAL_C_INCLUDES += libcore/include
+LOCAL_MODULE_TAGS := debug
+LOCAL_MODULE := libjavacore-unit-tests
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/NativeCode.mk
+LOCAL_CXX_STL := libc++
+include $(BUILD_NATIVE_TEST)
+
+# Set of benchmarks for libjavacore functions.
+include $(CLEAR_VARS)
+LOCAL_CFLAGS += $(core_cflags)
+LOCAL_CPPFLAGS += $(core_cppflags)
+LOCAL_SRC_FILES += \
+  luni/src/benchmark/native/libcore_io_Memory_bench.cpp \
+
+LOCAL_C_INCLUDES += libcore/include bionic/benchmarks
+LOCAL_MODULE_TAGS := debug
+LOCAL_MODULE := libjavacore-benchmarks
+LOCAL_ADDITIONAL_DEPENDENCIES := $(LOCAL_PATH)/NativeCode.mk
+LOCAL_CXX_STL := libc++
+LOCAL_MULTILIB := both
+LOCAL_MODULE_STEM_32 := $(LOCAL_MODULE)32
+LOCAL_MODULE_STEM_64 := $(LOCAL_MODULE)64
+include $(BUILD_NATIVE_BENCHMARK)
+
 
 #
 # Build for the host.
diff --git a/luni/src/benchmark/native/libcore_io_Memory_bench.cpp b/luni/src/benchmark/native/libcore_io_Memory_bench.cpp
new file mode 100644
index 0000000..0819c27
--- /dev/null
+++ b/luni/src/benchmark/native/libcore_io_Memory_bench.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// The functions we want to benchmark are static, so include the source code.
+#include "luni/src/main/native/libcore_io_Memory.cpp"
+
+#include <benchmark/Benchmark.h>
+
+template<typename T, size_t ALIGN>
+void swap_bench(testing::Benchmark* bench, void (*swap_func)(T*, const T*, size_t),
+                int iters, size_t num_elements) {
+  T* src;
+  T* dst;
+  T* src_elems;
+  T* dst_elems;
+
+  if (ALIGN) {
+    src_elems = new T[num_elements + 1];
+    dst_elems = new T[num_elements + 1];
+
+    src = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(src_elems) + ALIGN);
+    dst = reinterpret_cast<T*>(reinterpret_cast<uintptr_t>(dst_elems) + ALIGN);
+  } else {
+    src_elems = new T[num_elements];
+    dst_elems = new T[num_elements];
+
+    src = src_elems;
+    dst = dst_elems;
+  }
+
+  memset(dst, 0, sizeof(T) * num_elements);
+  memset(src, 0x12, sizeof(T) * num_elements);
+
+  bench->StartBenchmarkTiming();
+
+  for (int i = 0; i < iters; i++) {
+    swap_func(src, dst, num_elements);
+  }
+
+  bench->StopBenchmarkTiming();
+
+  delete[] src_elems;
+  delete[] dst_elems;
+}
+
+#define AT_COMMON_VALUES \
+    Arg(10)->Arg(100)->Arg(1000)->Arg(1024*10)->Arg(1024*100)
+
+BENCHMARK_WITH_ARG(BM_libcore_swapShorts_aligned, int)->AT_COMMON_VALUES;
+void BM_libcore_swapShorts_aligned::Run(int iters, int num_shorts) {
+  swap_bench<jshort, 0>(this, swapShorts, iters, num_shorts);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapInts_aligned, int)->AT_COMMON_VALUES;
+void BM_libcore_swapInts_aligned::Run(int iters, int num_ints) {
+  swap_bench<jint, 0>(this, swapInts, iters, num_ints);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapLongs_aligned, int)->AT_COMMON_VALUES;
+void BM_libcore_swapLongs_aligned::Run(int iters, int num_longs) {
+  swap_bench<jlong, 0>(this, swapLongs, iters, num_longs);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapShorts_unaligned1, int)->AT_COMMON_VALUES;
+void BM_libcore_swapShorts_unaligned1::Run(int iters, int num_shorts) {
+  swap_bench<jshort, 1>(this, swapShorts, iters, num_shorts);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapInts_unaligned1, int)->AT_COMMON_VALUES;
+void BM_libcore_swapInts_unaligned1::Run(int iters, int num_ints) {
+  swap_bench<jint, 1>(this, swapInts, iters, num_ints);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapLongs_unaligned1, int)->AT_COMMON_VALUES;
+void BM_libcore_swapLongs_unaligned1::Run(int iters, int num_longs) {
+  swap_bench<jlong, 1>(this, swapLongs, iters, num_longs);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapShorts_unaligned2, int)->AT_COMMON_VALUES;
+void BM_libcore_swapShorts_unaligned2::Run(int iters, int num_shorts) {
+  swap_bench<jshort, 2>(this, swapShorts, iters, num_shorts);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapInts_unaligned2, int)->AT_COMMON_VALUES;
+void BM_libcore_swapInts_unaligned2::Run(int iters, int num_ints) {
+  swap_bench<jint, 2>(this, swapInts, iters, num_ints);
+}
+
+BENCHMARK_WITH_ARG(BM_libcore_swapLongs_unaligned2, int)->AT_COMMON_VALUES;
+void BM_libcore_swapLongs_unaligned2::Run(int iters, int num_longs) {
+  swap_bench<jlong, 2>(this, swapLongs, iters, num_longs);
+}
diff --git a/luni/src/main/native/libcore_io_Memory.cpp b/luni/src/main/native/libcore_io_Memory.cpp
index 70bd9e4..5122a6c 100644
--- a/luni/src/main/native/libcore_io_Memory.cpp
+++ b/luni/src/main/native/libcore_io_Memory.cpp
@@ -27,25 +27,6 @@
 #include <string.h>
 #include <sys/mman.h>
 
-#if defined(__arm__)
-// 32-bit ARM has load/store alignment restrictions for longs.
-#define LONG_ALIGNMENT_MASK 0x3
-#define INT_ALIGNMENT_MASK 0x0
-#define SHORT_ALIGNMENT_MASK 0x0
-#elif defined(__mips__)
-// MIPS has load/store alignment restrictions for longs, ints and shorts.
-#define LONG_ALIGNMENT_MASK 0x7
-#define INT_ALIGNMENT_MASK 0x3
-#define SHORT_ALIGNMENT_MASK 0x1
-#elif defined(__aarch64__) || defined(__i386__) || defined(__x86_64__)
-// These architectures can load anything at any alignment.
-#define LONG_ALIGNMENT_MASK 0x0
-#define INT_ALIGNMENT_MASK 0x0
-#define SHORT_ALIGNMENT_MASK 0x0
-#else
-#error unknown load/store alignment restrictions for this architecture
-#endif
-
 // Use packed structures for access to unaligned data on targets with alignment restrictions.
 // The compiler will generate appropriate code to access these structures without
 // generating alignment exceptions.
@@ -81,63 +62,31 @@
     // Do 32-bit swaps as long as possible...
     jint* dst = reinterpret_cast<jint*>(dstShorts);
     const jint* src = reinterpret_cast<const jint*>(srcShorts);
-
-    if ((reinterpret_cast<uintptr_t>(dst) & INT_ALIGNMENT_MASK) == 0 &&
-        (reinterpret_cast<uintptr_t>(src) & INT_ALIGNMENT_MASK) == 0) {
-        for (size_t i = 0; i < count / 2; ++i) {
-            jint v = *src++;
-            *dst++ = bswap_2x16(v);
-        }
-        // ...with one last 16-bit swap if necessary.
-        if ((count % 2) != 0) {
-            jshort v = *reinterpret_cast<const jshort*>(src);
-            *reinterpret_cast<jshort*>(dst) = bswap_16(v);
-        }
-    } else {
-        for (size_t i = 0; i < count / 2; ++i) {
-            jint v = get_unaligned<jint>(src++);
-            put_unaligned<jint>(dst++, bswap_2x16(v));
-        }
-        if ((count % 2) != 0) {
-          jshort v = get_unaligned<jshort>(reinterpret_cast<const jshort*>(src));
-          put_unaligned<jshort>(reinterpret_cast<jshort*>(dst), bswap_16(v));
-        }
+    for (size_t i = 0; i < count / 2; ++i) {
+        jint v = get_unaligned<jint>(src++);
+        put_unaligned<jint>(dst++, bswap_2x16(v));
+    }
+    if ((count % 2) != 0) {
+      jshort v = get_unaligned<jshort>(reinterpret_cast<const jshort*>(src));
+      put_unaligned<jshort>(reinterpret_cast<jshort*>(dst), bswap_16(v));
     }
 }
 
 static inline void swapInts(jint* dstInts, const jint* srcInts, size_t count) {
-    if ((reinterpret_cast<uintptr_t>(dstInts) & INT_ALIGNMENT_MASK) == 0 &&
-        (reinterpret_cast<uintptr_t>(srcInts) & INT_ALIGNMENT_MASK) == 0) {
-        for (size_t i = 0; i < count; ++i) {
-            jint v = *srcInts++;
-            *dstInts++ = bswap_32(v);
-        }
-    } else {
-        for (size_t i = 0; i < count; ++i) {
-            jint v = get_unaligned<int>(srcInts++);
-            put_unaligned<jint>(dstInts++, bswap_32(v));
-        }
+    for (size_t i = 0; i < count; ++i) {
+        jint v = get_unaligned<int>(srcInts++);
+        put_unaligned<jint>(dstInts++, bswap_32(v));
     }
 }
 
 static inline void swapLongs(jlong* dstLongs, const jlong* srcLongs, size_t count) {
     jint* dst = reinterpret_cast<jint*>(dstLongs);
     const jint* src = reinterpret_cast<const jint*>(srcLongs);
-    if ((reinterpret_cast<uintptr_t>(dstLongs) & INT_ALIGNMENT_MASK) == 0 &&
-        (reinterpret_cast<uintptr_t>(srcLongs) & INT_ALIGNMENT_MASK) == 0) {
-        for (size_t i = 0; i < count; ++i) {
-          jint v1 = *src++;
-          jint v2 = *src++;
-          *dst++ = bswap_32(v2);
-          *dst++ = bswap_32(v1);
-        }
-    } else {
-        for (size_t i = 0; i < count; ++i) {
-            jint v1 = get_unaligned<jint>(src++);
-            jint v2 = get_unaligned<jint>(src++);
-            put_unaligned<jint>(dst++, bswap_32(v2));
-            put_unaligned<jint>(dst++, bswap_32(v1));
-        }
+    for (size_t i = 0; i < count; ++i) {
+        jint v1 = get_unaligned<jint>(src++);
+        jint v2 = get_unaligned<jint>(src++);
+        put_unaligned<jint>(dst++, bswap_32(v2));
+        put_unaligned<jint>(dst++, bswap_32(v1));
     }
 }
 
@@ -259,39 +208,27 @@
 }
 
 static jshort Memory_peekShortNative(JNIEnv*, jclass, jlong srcAddress) {
-    return *cast<const jshort*>(srcAddress);
+    return get_unaligned<jshort>(cast<const jshort*>(srcAddress));
 }
 
 static void Memory_pokeShortNative(JNIEnv*, jclass, jlong dstAddress, jshort value) {
-    *cast<jshort*>(dstAddress) = value;
+    put_unaligned<jshort>(cast<jshort*>(dstAddress), value);
 }
 
 static jint Memory_peekIntNative(JNIEnv*, jclass, jlong srcAddress) {
-    return *cast<const jint*>(srcAddress);
+    return get_unaligned<jint>(cast<const jint*>(srcAddress));
 }
 
 static void Memory_pokeIntNative(JNIEnv*, jclass, jlong dstAddress, jint value) {
-    *cast<jint*>(dstAddress) = value;
+    put_unaligned<jint>(cast<jint*>(dstAddress), value);
 }
 
 static jlong Memory_peekLongNative(JNIEnv*, jclass, jlong srcAddress) {
-    jlong result;
-    const jlong* src = cast<const jlong*>(srcAddress);
-    if ((srcAddress & LONG_ALIGNMENT_MASK) == 0) {
-        result = *src;
-    } else {
-        result = get_unaligned<jlong>(src);
-    }
-    return result;
+    return get_unaligned<jlong>(cast<const jlong*>(srcAddress));
 }
 
 static void Memory_pokeLongNative(JNIEnv*, jclass, jlong dstAddress, jlong value) {
-    jlong* dst = cast<jlong*>(dstAddress);
-    if ((dstAddress & LONG_ALIGNMENT_MASK) == 0) {
-        *dst = value;
-    } else {
-        put_unaligned<jlong>(dst, value);
-    }
+    put_unaligned<jlong>(cast<jlong*>(dstAddress), value);
 }
 
 static void unsafeBulkCopy(jbyte* dst, const jbyte* src, jint byteCount,
diff --git a/luni/src/test/native/libcore_io_Memory_test.cpp b/luni/src/test/native/libcore_io_Memory_test.cpp
new file mode 100644
index 0000000..2d95155
--- /dev/null
+++ b/luni/src/test/native/libcore_io_Memory_test.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "luni/src/main/native/libcore_io_Memory.cpp"
+
+#include <stdlib.h>
+
+#include <functional>
+
+#include <gtest/gtest.h>
+
+#define ALIGNMENT 8
+
+template<typename T, size_t NUM_ELEMENTS>
+void swap_align_test(void (*swap_func)(T*, const T*, size_t),
+                     std::function<void (T*, T*, uint64_t)> init_func) {
+  uint8_t* dst = nullptr;
+  uint8_t* src = nullptr;
+  ASSERT_EQ(0, posix_memalign(reinterpret_cast<void**>(&dst), ALIGNMENT,
+                              sizeof(T) * NUM_ELEMENTS + ALIGNMENT));
+  ASSERT_EQ(0, posix_memalign(reinterpret_cast<void**>(&src), ALIGNMENT,
+                              sizeof(T) * NUM_ELEMENTS + ALIGNMENT));
+
+  T src_buf[NUM_ELEMENTS];
+  T dst_buf[NUM_ELEMENTS];
+  for (uint64_t i = 0; i < NUM_ELEMENTS; i++) {
+    init_func(&src_buf[i], &dst_buf[i], i);
+  }
+
+  // Vary a few alignments.
+  for (size_t dst_align = 0; dst_align < ALIGNMENT; dst_align++) {
+    T* dst_aligned = reinterpret_cast<T*>(&dst[dst_align]);
+    for (size_t src_align = 0; src_align < ALIGNMENT; src_align++) {
+      T* src_aligned = reinterpret_cast<T*>(&src[src_align]);
+      memset(dst_aligned, 0, sizeof(T) * NUM_ELEMENTS);
+      memcpy(src_aligned, src_buf, sizeof(T) * NUM_ELEMENTS);
+      swap_func(dst_aligned, src_aligned, NUM_ELEMENTS);
+      ASSERT_EQ(0, memcmp(dst_buf, dst_aligned, sizeof(T) * NUM_ELEMENTS))
+          << "Failed at dst align " << dst_align << " src align " << src_align;
+    }
+  }
+  free(dst);
+  free(src);
+}
+
+TEST(libcore, swapShorts_align_test) {
+  // Use an odd number to guarantee that the last 16-bit swap code
+  // is executed.
+  swap_align_test<jshort, 9> (swapShorts, [] (jshort* src, jshort* dst, uint64_t i) {
+    *src = ((2*i) << 8) | (2*(i+1));
+    *dst = (2*i) | ((2*(i+1)) << 8);
+  });
+}
+
+TEST(libcore, swapInts_align_test) {
+  swap_align_test<jint, 10> (swapInts, [] (jint* src, jint* dst, uint64_t i) {
+    *src = ((4*i) << 24) | ((4*(i+1)) << 16) | ((4*(i+2)) << 8) | (4*(i+3));
+    *dst = (4*i) | ((4*(i+1)) << 8) | ((4*(i+2)) << 16) | ((4*(i+3)) << 24);
+  });
+}
+
+TEST(libcore, swapLongs_align_test) {
+  swap_align_test<jlong, 10> (swapLongs, [] (jlong* src, jlong* dst, uint64_t i) {
+    *src = ((8*i) << 56) | ((8*(i+1)) << 48) | ((8*(i+2)) << 40) | ((8*(i+3)) << 32) |
+        ((8*(i+4)) << 24) | ((8*(i+5)) << 16) | ((8*(i+6)) << 8) | (8*(i+7));
+    *dst = (8*i) | ((8*(i+1)) << 8) | ((8*(i+2)) << 16) | ((8*(i+3)) << 24) |
+        ((8*(i+4)) << 32) | ((8*(i+5)) << 40) | ((8*(i+6)) << 48) | ((8*(i+7)) << 56);
+  });
+}
+
+template<typename T>
+void memory_peek_test(T (*peek_func)(JNIEnv*, jclass, jlong), T value) {
+  T* src = nullptr;
+  ASSERT_EQ(0, posix_memalign(reinterpret_cast<void**>(&src), ALIGNMENT,
+                              sizeof(T) + ALIGNMENT));
+  for (size_t i = 0; i < ALIGNMENT; i++) {
+    jlong src_aligned = reinterpret_cast<jlong>(src) + i;
+    memcpy(reinterpret_cast<void*>(src_aligned), &value, sizeof(T));
+    T result = peek_func(nullptr, nullptr, src_aligned);
+    ASSERT_EQ(value, result);
+  }
+  free(src);
+}
+
+TEST(libcore, Memory_peekShortNative_align_check) {
+  memory_peek_test<jshort>(Memory_peekShortNative, 0x0102);
+}
+
+TEST(libcore, Memory_peekIntNative_align_check) {
+  memory_peek_test<jint>(Memory_peekIntNative, 0x01020304);
+}
+
+TEST(libcore, Memory_peekLongNative_align_check) {
+  memory_peek_test<jlong>(Memory_peekLongNative, 0x01020405060708ULL);
+}
+
+template<typename T>
+void memory_poke_test(void (*poke_func)(JNIEnv*, jclass, jlong, T), T value) {
+  T* dst = nullptr;
+  ASSERT_EQ(0, posix_memalign(reinterpret_cast<void**>(&dst), ALIGNMENT,
+                              sizeof(T) + ALIGNMENT));
+  for(size_t i = 0; i < ALIGNMENT; i++) {
+    memset(dst, 0, sizeof(T) + ALIGNMENT);
+    jlong dst_aligned = reinterpret_cast<jlong>(dst) + i;
+    poke_func(nullptr, nullptr, dst_aligned, value);
+    ASSERT_EQ(0, memcmp(reinterpret_cast<void*>(dst_aligned), &value, sizeof(T)));
+  }
+  free(dst);
+}
+
+TEST(libcore, Memory_pokeShortNative_align_check) {
+  memory_poke_test<jshort>(Memory_pokeShortNative, 0x0102);
+}
+
+TEST(libcore, Memory_pokeIntNative_align_check) {
+  memory_poke_test<jint>(Memory_pokeIntNative, 0x01020304);
+}
+
+TEST(libcore, Memory_pokeLongNative_align_check) {
+  memory_poke_test<jlong>(Memory_pokeLongNative, 0x0102030405060708ULL);
+}