ART: Add CRC32.updateBytes intrinsic for ARM64

Use crc32 instructions for
java.util.zip.CRC32.updateBytes(int,byte[],int,int).

The intrinsic is used if a number of processed bytes is less or equal to
kCRC32UpdateBytesThreshold. If it exceeds kCRC32UpdateBytesThreshold the
core library provided function is used.

Note that CRC32 is an optional feature in ARMv8, this intrinsic
is only enabled for devices supporting the CRC32 instructions.

The CL is based on code from tim.zhang@linaro.org.

Performance improvements - speedup:
array size | Cortex-A53 | Cortex-A57
------------------------------------
128        | 14x        | 20x
256        | 10x        | 14x
512        | 8x         | 11x
1024       | 7x         | 9x
2048       | 6x         | 8x
4096       | 5x         | 7x
8192       | 5x         | 7x
16384      | 5x         | 7x
32768      | 5x         | 7x
65536      | 5x         | 7x

Test: m test-art-target-gtest
Test: m test-art-host-gtest
Test: art/test/testrunner/testrunner.py --target --optimizing --interpreter
Test: art/test/testrunner/testrunner.py --target --jit
Test: art/test/testrunner/testrunner.py --host --optimizing --interpreter
Test: art/test/testrunner/testrunner.py --host --jit
Test: 580-crc32

Change-Id: I0054cea41b5fc3e712e18b0afc7e3eacbf41feb6
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 6d04b0e..1688ea7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2950,6 +2950,151 @@
   __ Mvn(out, out);
 }
 
+// The threshold for sizes of arrays to use the library provided implementation
+// of CRC32.updateBytes instead of the intrinsic.
+static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
+
+void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+  if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+    return;
+  }
+
+  LocationSummary* locations
+      = new (allocator_) LocationSummary(invoke,
+                                         LocationSummary::kCallOnSlowPath,
+                                         kIntrinsified);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
+//
+// Note: The intrinsic is not used if len exceeds a threshold.
+void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+  auto masm = GetVIXLAssembler();
+  auto locations = invoke->GetLocations();
+
+  auto slow_path =
+    new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  Register length = WRegisterFrom(locations->InAt(3));
+  __ Cmp(length, kCRC32UpdateBytesThreshold);
+  __ B(slow_path->GetEntryLabel(), hi);
+
+  const uint32_t array_data_offset =
+      mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
+  Register ptr = XRegisterFrom(locations->GetTemp(0));
+  Register array = XRegisterFrom(locations->InAt(1));
+  auto offset = locations->InAt(2);
+  if (offset.IsConstant()) {
+    int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(ptr, array, array_data_offset + offset_value);
+  } else {
+    __ Add(ptr, array, array_data_offset);
+    __ Add(ptr, ptr, XRegisterFrom(offset));
+  }
+
+  // The algorithm of CRC32 of bytes is:
+  //   crc = ~crc
+  //   process a few first bytes to make the array 8-byte aligned
+  //   while array has 8 bytes do:
+  //     crc = crc32_of_8bytes(crc, 8_bytes(array))
+  //   if array has 4 bytes:
+  //     crc = crc32_of_4bytes(crc, 4_bytes(array))
+  //   if array has 2 bytes:
+  //     crc = crc32_of_2bytes(crc, 2_bytes(array))
+  //   if array has a byte:
+  //     crc = crc32_of_byte(crc, 1_byte(array))
+  //   crc = ~crc
+
+  vixl::aarch64::Label loop, done;
+  vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
+  vixl::aarch64::Label aligned2, aligned4, aligned8;
+
+  // Use VIXL scratch registers as the VIXL macro assembler won't use them in
+  // instructions below.
+  UseScratchRegisterScope temps(masm);
+  Register len = temps.AcquireW();
+  Register array_elem = temps.AcquireW();
+
+  Register out = WRegisterFrom(locations->Out());
+  __ Mvn(out, WRegisterFrom(locations->InAt(0)));
+  __ Mov(len, length);
+
+  __ Tbz(ptr, 0, &aligned2);
+  __ Subs(len, len, 1);
+  __ B(&done, lo);
+  __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
+  __ Crc32b(out, out, array_elem);
+
+  __ Bind(&aligned2);
+  __ Tbz(ptr, 1, &aligned4);
+  __ Subs(len, len, 2);
+  __ B(&process_1byte, lo);
+  __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
+  __ Crc32h(out, out, array_elem);
+
+  __ Bind(&aligned4);
+  __ Tbz(ptr, 2, &aligned8);
+  __ Subs(len, len, 4);
+  __ B(&process_2bytes, lo);
+  __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
+  __ Crc32w(out, out, array_elem);
+
+  __ Bind(&aligned8);
+  __ Subs(len, len, 8);
+  // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
+  __ B(&process_4bytes, lo);
+
+  // The main loop processing data by 8 bytes.
+  __ Bind(&loop);
+  __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
+  __ Subs(len, len, 8);
+  __ Crc32x(out, out, array_elem.X());
+  // if len >= 8, process the next 8 bytes.
+  __ B(&loop, hs);
+
+  // Process the data which is less than 8 bytes.
+  // The code generated below works with values of len
+  // which come in the range [-8, 0].
+  // The first three bits are used to detect whether 4 bytes or 2 bytes or
+  // a byte can be processed.
+  // The checking order is from bit 2 to bit 0:
+  //  bit 2 is set: at least 4 bytes available
+  //  bit 1 is set: at least 2 bytes available
+  //  bit 0 is set: at least a byte available
+  __ Bind(&process_4bytes);
+  // Goto process_2bytes if less than four bytes available
+  __ Tbz(len, 2, &process_2bytes);
+  __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
+  __ Crc32w(out, out, array_elem);
+
+  __ Bind(&process_2bytes);
+  // Goto process_1bytes if less than two bytes available
+  __ Tbz(len, 1, &process_1byte);
+  __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
+  __ Crc32h(out, out, array_elem);
+
+  __ Bind(&process_1byte);
+  // Goto done if no bytes available
+  __ Tbz(len, 0, &done);
+  __ Ldrb(array_elem, MemOperand(ptr));
+  __ Crc32b(out, out, array_elem);
+
+  __ Bind(&done);
+  __ Mvn(out, out);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 4d45a99..88f1457 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -3060,6 +3060,7 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 21fb7d7..08ba0a0 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2697,6 +2697,7 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 4b86f5d..59d3ba2 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2347,6 +2347,7 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes)
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index a73f4e8..1d94950 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3071,6 +3071,7 @@
 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
 
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 88c766f..4f0b61d 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2738,6 +2738,7 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
 
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
diff --git a/runtime/hidden_api.h b/runtime/hidden_api.h
index 614154c..9daf825 100644
--- a/runtime/hidden_api.h
+++ b/runtime/hidden_api.h
@@ -235,6 +235,7 @@
       case Intrinsics::kUnsafeStoreFence:
       case Intrinsics::kUnsafeFullFence:
       case Intrinsics::kCRC32Update:
+      case Intrinsics::kCRC32UpdateBytes:
       case Intrinsics::kStringNewStringFromBytes:
       case Intrinsics::kStringNewStringFromChars:
       case Intrinsics::kStringNewStringFromString:
diff --git a/runtime/image.cc b/runtime/image.cc
index ae3d8e3..fb581f9 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,7 +29,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '1', '\0' };  // Add image blocks.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '2', '\0' };  // CRC32UpdateBytes intrinsic
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 24a026a..16e118c 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -559,6 +559,7 @@
     UNIMPLEMENTED_CASE(IntegerValueOf /* (I)Ljava/lang/Integer; */)
     UNIMPLEMENTED_CASE(ThreadInterrupted /* ()Z */)
     UNIMPLEMENTED_CASE(CRC32Update /* (II)I */)
+    UNIMPLEMENTED_CASE(CRC32UpdateBytes /* (I[BII)I */)
     INTRINSIC_CASE(VarHandleFullFence)
     INTRINSIC_CASE(VarHandleAcquireFence)
     INTRINSIC_CASE(VarHandleReleaseFence)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index 093dd7f..82ea476 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -220,6 +220,7 @@
   V(VarHandleStoreStoreFence, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/invoke/VarHandle;", "storeStoreFence", "()V") \
   V(ReachabilityFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/ref/Reference;", "reachabilityFence", "(Ljava/lang/Object;)V") \
   V(CRC32Update, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/util/zip/CRC32;", "update", "(II)I") \
+  V(CRC32UpdateBytes, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/util/zip/CRC32;", "updateBytes", "(I[BII)I") \
   SIGNATURE_POLYMORPHIC_INTRINSICS_LIST(V)
 
 #endif  // ART_RUNTIME_INTRINSICS_LIST_H_
diff --git a/test/580-crc32/src/Main.java b/test/580-crc32/src/Main.java
index 7fc1273..6199e9b 100644
--- a/test/580-crc32/src/Main.java
+++ b/test/580-crc32/src/Main.java
@@ -15,29 +15,29 @@
  */
 
 import java.util.zip.CRC32;
+import java.util.Random;
 
 /**
- * The ART compiler can use intrinsics for the java.util.zip.CRC32 method:
- *    private native static int update(int crc, int b)
+ * The ART compiler can use intrinsics for the java.util.zip.CRC32 methods:
+ *   private native static int update(int crc, int b)
+ *   private native static int updateBytes(int crc, byte[] b, int off, int len)
  *
- * As the method is private it is not possible to check the use of intrinsics
- * for it directly.
+ * As the methods are private it is not possible to check the use of intrinsics
+ * for them directly.
  * The tests check that correct checksums are produced.
  */
 public class Main {
-  private static CRC32 crc32 = new CRC32();
-
   public Main() {
   }
 
-  public static long TestInt(int value) {
-    crc32.reset();
+  public static long CRC32Byte(int value) {
+    CRC32 crc32 = new CRC32();
     crc32.update(value);
     return crc32.getValue();
   }
 
-  public static long TestInt(int... values) {
-    crc32.reset();
+  public static long CRC32BytesUsingUpdateInt(int... values) {
+    CRC32 crc32 = new CRC32();
     for (int value : values) {
       crc32.update(value);
     }
@@ -50,82 +50,301 @@
     }
   }
 
-  public static void main(String args[]) {
+  private static void assertEqual(boolean expected, boolean actual) {
+    if (expected != actual) {
+      throw new Error("Expected: " + expected + ", found: " + actual);
+    }
+  }
+
+  private static void TestCRC32Update() {
     // public void update(int b)
     //
     // Tests for checksums of the byte 0x0
-    assertEqual(0xD202EF8DL, TestInt(0x0));
-    assertEqual(0xD202EF8DL, TestInt(0x0100));
-    assertEqual(0xD202EF8DL, TestInt(0x010000));
-    assertEqual(0xD202EF8DL, TestInt(0x01000000));
-    assertEqual(0xD202EF8DL, TestInt(0xff00));
-    assertEqual(0xD202EF8DL, TestInt(0xffff00));
-    assertEqual(0xD202EF8DL, TestInt(0xffffff00));
-    assertEqual(0xD202EF8DL, TestInt(0x1200));
-    assertEqual(0xD202EF8DL, TestInt(0x123400));
-    assertEqual(0xD202EF8DL, TestInt(0x12345600));
-    assertEqual(0xD202EF8DL, TestInt(Integer.MIN_VALUE));
+    // Check that only the low eight bits of the argument are used.
+    assertEqual(0xD202EF8DL, CRC32Byte(0x0));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x0100));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x010000));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x01000000));
+    assertEqual(0xD202EF8DL, CRC32Byte(0xff00));
+    assertEqual(0xD202EF8DL, CRC32Byte(0xffff00));
+    assertEqual(0xD202EF8DL, CRC32Byte(0xffffff00));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x1200));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x123400));
+    assertEqual(0xD202EF8DL, CRC32Byte(0x12345600));
+    assertEqual(0xD202EF8DL, CRC32Byte(Integer.MIN_VALUE));
 
     // Tests for checksums of the byte 0x1
-    assertEqual(0xA505DF1BL, TestInt(0x1));
-    assertEqual(0xA505DF1BL, TestInt(0x0101));
-    assertEqual(0xA505DF1BL, TestInt(0x010001));
-    assertEqual(0xA505DF1BL, TestInt(0x01000001));
-    assertEqual(0xA505DF1BL, TestInt(0xff01));
-    assertEqual(0xA505DF1BL, TestInt(0xffff01));
-    assertEqual(0xA505DF1BL, TestInt(0xffffff01));
-    assertEqual(0xA505DF1BL, TestInt(0x1201));
-    assertEqual(0xA505DF1BL, TestInt(0x123401));
-    assertEqual(0xA505DF1BL, TestInt(0x12345601));
+    // Check that only the low eight bits of the argument are used.
+    assertEqual(0xA505DF1BL, CRC32Byte(0x1));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x0101));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x010001));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x01000001));
+    assertEqual(0xA505DF1BL, CRC32Byte(0xff01));
+    assertEqual(0xA505DF1BL, CRC32Byte(0xffff01));
+    assertEqual(0xA505DF1BL, CRC32Byte(0xffffff01));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x1201));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x123401));
+    assertEqual(0xA505DF1BL, CRC32Byte(0x12345601));
 
     // Tests for checksums of the byte 0x0f
-    assertEqual(0x42BDF21CL, TestInt(0x0f));
-    assertEqual(0x42BDF21CL, TestInt(0x010f));
-    assertEqual(0x42BDF21CL, TestInt(0x01000f));
-    assertEqual(0x42BDF21CL, TestInt(0x0100000f));
-    assertEqual(0x42BDF21CL, TestInt(0xff0f));
-    assertEqual(0x42BDF21CL, TestInt(0xffff0f));
-    assertEqual(0x42BDF21CL, TestInt(0xffffff0f));
-    assertEqual(0x42BDF21CL, TestInt(0x120f));
-    assertEqual(0x42BDF21CL, TestInt(0x12340f));
-    assertEqual(0x42BDF21CL, TestInt(0x1234560f));
+    // Check that only the low eight bits of the argument are used.
+    assertEqual(0x42BDF21CL, CRC32Byte(0x0f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x010f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x01000f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x0100000f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0xff0f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0xffff0f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0xffffff0f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x120f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x12340f));
+    assertEqual(0x42BDF21CL, CRC32Byte(0x1234560f));
 
     // Tests for checksums of the byte 0xff
-    assertEqual(0xFF000000L, TestInt(0x00ff));
-    assertEqual(0xFF000000L, TestInt(0x01ff));
-    assertEqual(0xFF000000L, TestInt(0x0100ff));
-    assertEqual(0xFF000000L, TestInt(0x010000ff));
-    assertEqual(0xFF000000L, TestInt(0x0000ffff));
-    assertEqual(0xFF000000L, TestInt(0x00ffffff));
-    assertEqual(0xFF000000L, TestInt(0xffffffff));
-    assertEqual(0xFF000000L, TestInt(0x12ff));
-    assertEqual(0xFF000000L, TestInt(0x1234ff));
-    assertEqual(0xFF000000L, TestInt(0x123456ff));
-    assertEqual(0xFF000000L, TestInt(Integer.MAX_VALUE));
+    // Check that only the low eight bits of the argument are used.
+    assertEqual(0xFF000000L, CRC32Byte(0x00ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x01ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x0100ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x010000ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x0000ffff));
+    assertEqual(0xFF000000L, CRC32Byte(0x00ffffff));
+    assertEqual(0xFF000000L, CRC32Byte(0xffffffff));
+    assertEqual(0xFF000000L, CRC32Byte(0x12ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x1234ff));
+    assertEqual(0xFF000000L, CRC32Byte(0x123456ff));
+    assertEqual(0xFF000000L, CRC32Byte(Integer.MAX_VALUE));
 
     // Tests for sequences
-    assertEqual(0xFF41D912L, TestInt(0, 0, 0));
-    assertEqual(0xFF41D912L, TestInt(0x0100, 0x010000, 0x01000000));
-    assertEqual(0xFF41D912L, TestInt(0xff00, 0xffff00, 0xffffff00));
-    assertEqual(0xFF41D912L, TestInt(0x1200, 0x123400, 0x12345600));
+    // Check that only the low eight bits of the values are used.
+    assertEqual(0xFF41D912L, CRC32BytesUsingUpdateInt(0, 0, 0));
+    assertEqual(0xFF41D912L,
+                CRC32BytesUsingUpdateInt(0x0100, 0x010000, 0x01000000));
+    assertEqual(0xFF41D912L,
+                CRC32BytesUsingUpdateInt(0xff00, 0xffff00, 0xffffff00));
+    assertEqual(0xFF41D912L,
+                CRC32BytesUsingUpdateInt(0x1200, 0x123400, 0x12345600));
 
-    assertEqual(0x909FB2F2L, TestInt(1, 1, 1));
-    assertEqual(0x909FB2F2L, TestInt(0x0101, 0x010001, 0x01000001));
-    assertEqual(0x909FB2F2L, TestInt(0xff01, 0xffff01, 0xffffff01));
-    assertEqual(0x909FB2F2L, TestInt(0x1201, 0x123401, 0x12345601));
+    assertEqual(0x909FB2F2L, CRC32BytesUsingUpdateInt(1, 1, 1));
+    assertEqual(0x909FB2F2L,
+                CRC32BytesUsingUpdateInt(0x0101, 0x010001, 0x01000001));
+    assertEqual(0x909FB2F2L,
+                CRC32BytesUsingUpdateInt(0xff01, 0xffff01, 0xffffff01));
+    assertEqual(0x909FB2F2L,
+                CRC32BytesUsingUpdateInt(0x1201, 0x123401, 0x12345601));
 
-    assertEqual(0xE33A9F71L, TestInt(0x0f, 0x0f, 0x0f));
-    assertEqual(0xE33A9F71L, TestInt(0x010f, 0x01000f, 0x0100000f));
-    assertEqual(0xE33A9F71L, TestInt(0xff0f, 0xffff0f, 0xffffff0f));
-    assertEqual(0xE33A9F71L, TestInt(0x120f, 0x12340f, 0x1234560f));
+    assertEqual(0xE33A9F71L, CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f));
+    assertEqual(0xE33A9F71L,
+                CRC32BytesUsingUpdateInt(0x010f, 0x01000f, 0x0100000f));
+    assertEqual(0xE33A9F71L,
+                CRC32BytesUsingUpdateInt(0xff0f, 0xffff0f, 0xffffff0f));
+    assertEqual(0xE33A9F71L,
+                CRC32BytesUsingUpdateInt(0x120f, 0x12340f, 0x1234560f));
 
-    assertEqual(0xFFFFFF00L, TestInt(0x0ff, 0x0ff, 0x0ff));
-    assertEqual(0xFFFFFF00L, TestInt(0x01ff, 0x0100ff, 0x010000ff));
-    assertEqual(0xFFFFFF00L, TestInt(0x00ffff, 0x00ffffff, 0xffffffff));
-    assertEqual(0xFFFFFF00L, TestInt(0x12ff, 0x1234ff, 0x123456ff));
+    assertEqual(0xFFFFFF00L, CRC32BytesUsingUpdateInt(0x0ff, 0x0ff, 0x0ff));
+    assertEqual(0xFFFFFF00L,
+                CRC32BytesUsingUpdateInt(0x01ff, 0x0100ff, 0x010000ff));
+    assertEqual(0xFFFFFF00L,
+                CRC32BytesUsingUpdateInt(0x00ffff, 0x00ffffff, 0xffffffff));
+    assertEqual(0xFFFFFF00L,
+                CRC32BytesUsingUpdateInt(0x12ff, 0x1234ff, 0x123456ff));
 
-    assertEqual(0xB6CC4292L, TestInt(0x01, 0x02));
+    assertEqual(0xB6CC4292L, CRC32BytesUsingUpdateInt(0x01, 0x02));
 
-    assertEqual(0xB2DE047CL, TestInt(0x0, -1, Integer.MIN_VALUE, Integer.MAX_VALUE));
+    assertEqual(0xB2DE047CL,
+                CRC32BytesUsingUpdateInt(0x0, -1, Integer.MIN_VALUE, Integer.MAX_VALUE));
+  }
+
+  private static long CRC32ByteArray(byte[] bytes, int off, int len) {
+    CRC32 crc32 = new CRC32();
+    crc32.update(bytes, off, len);
+    return crc32.getValue();
+  }
+
+  // This is used to test we generate correct code for constant offsets.
+  // In this case the offset is 0.
+  private static long CRC32ByteArray(byte[] bytes) {
+    CRC32 crc32 = new CRC32();
+    crc32.update(bytes);
+    return crc32.getValue();
+  }
+
+  private static long CRC32ByteAndByteArray(int value, byte[] bytes) {
+    CRC32 crc32 = new CRC32();
+    crc32.update(value);
+    crc32.update(bytes);
+    return crc32.getValue();
+  }
+
+  private static long CRC32ByteArrayAndByte(byte[] bytes, int value) {
+    CRC32 crc32 = new CRC32();
+    crc32.update(bytes);
+    crc32.update(value);
+    return crc32.getValue();
+  }
+
+  private static boolean CRC32ByteArrayThrowsAIOOBE(byte[] bytes, int off, int len) {
+    try {
+      CRC32 crc32 = new CRC32();
+      crc32.update(bytes, off, len);
+    } catch (ArrayIndexOutOfBoundsException ex) {
+      return true;
+    }
+    return false;
+  }
+
+  private static boolean CRC32ByteArrayThrowsNPE() {
+    try {
+      CRC32 crc32 = new CRC32();
+      crc32.update(null, 0, 0);
+      return false;
+    } catch (NullPointerException e) {}
+
+    try {
+      CRC32 crc32 = new CRC32();
+      crc32.update(null, 1, 2);
+      return false;
+    } catch (NullPointerException e) {}
+
+    try {
+      CRC32 crc32 = new CRC32();
+      crc32.update((byte[])null);
+      return false;
+    } catch (NullPointerException e) {}
+
+    return true;
+  }
+
+  private static long CRC32BytesUsingUpdateInt(byte[] bytes, int off, int len) {
+    CRC32 crc32 = new CRC32();
+    while (len-- > 0) {
+      crc32.update(bytes[off++]);
+    }
+    return crc32.getValue();
+  }
+
+  private static void TestCRC32UpdateBytes() {
+    assertEqual(0L, CRC32ByteArray(new byte[] {}));
+    assertEqual(0L, CRC32ByteArray(new byte[] {}, 0, 0));
+    assertEqual(0L, CRC32ByteArray(new byte[] {0}, 0, 0));
+    assertEqual(0L, CRC32ByteArray(new byte[] {0}, 1, 0));
+    assertEqual(0L, CRC32ByteArray(new byte[] {0, 0}, 1, 0));
+
+    assertEqual(true, CRC32ByteArrayThrowsNPE());
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, -1, 0));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {0}, -1, 1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {0}, 0, -1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 0, -1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 1, 0));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, -1, 1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 1, -1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 0, 1));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 0, 10));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {0}, 0, 10));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {}, 10, 10));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {0, 0, 0, 0}, 2, 3));
+    assertEqual(true, CRC32ByteArrayThrowsAIOOBE(new byte[] {0, 0, 0, 0}, 3, 2));
+
+    assertEqual(CRC32Byte(0), CRC32ByteArray(new byte[] {0}));
+    assertEqual(CRC32Byte(0), CRC32ByteArray(new byte[] {0}, 0, 1));
+    assertEqual(CRC32Byte(1), CRC32ByteArray(new byte[] {1}));
+    assertEqual(CRC32Byte(1), CRC32ByteArray(new byte[] {1}, 0, 1));
+    assertEqual(CRC32Byte(0x0f), CRC32ByteArray(new byte[] {0x0f}));
+    assertEqual(CRC32Byte(0x0f), CRC32ByteArray(new byte[] {0x0f}, 0, 1));
+    assertEqual(CRC32Byte(0xff), CRC32ByteArray(new byte[] {-1}));
+    assertEqual(CRC32Byte(0xff), CRC32ByteArray(new byte[] {-1}, 0, 1));
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteArray(new byte[] {0, 0, 0}));
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteArray(new byte[] {0, 0, 0}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteArray(new byte[] {1, 1, 1}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteArray(new byte[] {1, 1, 1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteArray(new byte[] {0x0f, 0x0f, 0x0f}));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteArray(new byte[] {0x0f, 0x0f, 0x0f}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteArray(new byte[] {-1, -1, -1}));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteArray(new byte[] {-1, -1, -1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2),
+                CRC32ByteArray(new byte[] {1, 2}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2),
+                CRC32ByteArray(new byte[] {1, 2}, 0, 2));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteArray(new byte[] {0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE}));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteArray(new byte[] {0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE}, 0, 4));
+
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteAndByteArray(0, new byte[] {0, 0}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteAndByteArray(1, new byte[] {1, 1}));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteAndByteArray(0x0f, new byte[] {0x0f, 0x0f}));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteAndByteArray(-1, new byte[] {-1, -1}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2, 3),
+                CRC32ByteAndByteArray(1, new byte[] {2, 3}));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteAndByteArray(0, new byte[] {-1, Byte.MIN_VALUE, Byte.MAX_VALUE}));
+
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteArrayAndByte(new byte[] {0, 0}, 0));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteArrayAndByte(new byte[] {1, 1}, 1));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteArrayAndByte(new byte[] {0x0f, 0x0f}, 0x0f));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteArrayAndByte(new byte[] {-1, -1}, -1));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2, 3),
+                CRC32ByteArrayAndByte(new byte[] {1, 2}, 3));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteArrayAndByte(new byte[] {0, -1, Byte.MIN_VALUE}, Byte.MAX_VALUE));
+
+    byte[] bytes = new byte[128 * 1024];
+    Random rnd = new Random(0);
+    rnd.nextBytes(bytes);
+
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, bytes.length),
+                CRC32ByteArray(bytes));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, 8 * 1024),
+                CRC32ByteArray(bytes, 0, 8 * 1024));
+
+    int off = rnd.nextInt(bytes.length / 2);
+    for (int len = 0; len <= 16; ++len) {
+      assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                  CRC32ByteArray(bytes, off, len));
+    }
+
+    // Check there are no issues with unaligned accesses.
+    for (int o = 1; o < 8; ++o) {
+      for (int l = 0; l <= 16; ++l) {
+        assertEqual(CRC32BytesUsingUpdateInt(bytes, o, l),
+                    CRC32ByteArray(bytes, o, l));
+      }
+    }
+
+    int len = bytes.length / 2;
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len - 1),
+                CRC32ByteArray(bytes, 0, len - 1));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len),
+                CRC32ByteArray(bytes, 0, len));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len + 1),
+                CRC32ByteArray(bytes, 0, len + 1));
+
+    len = rnd.nextInt(bytes.length + 1);
+    off = rnd.nextInt(bytes.length - len);
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                CRC32ByteArray(bytes, off, len));
+  }
+
+  public static void main(String args[]) {
+    TestCRC32Update();
+    TestCRC32UpdateBytes();
   }
 }