ART: Add CRC32.updateByteBuffer intrinsic for ARM64

Use crc32 instructions for
java.util.zip.CRC32.updateByteBuffer(int, long, int, int).

Note that CRC32 is an optional feature in ARMv8, this intrinsic
is only enabled for devices supporting the CRC32 instructions.

The performance of the intrinsic is the same as the performance of the
CRC32.updateBytes intrinsic. However the intrinsic does not have a
restriction on the size of the byte buffer. For big input data
the intrinsic will be faster than CRC32.updateBytes.

Test: m test-art-target-gtest
Test: m test-art-host-gtest
Test: art/test.py --target --optimizing
Test: art/test.py --host --optimizing
Test: 580-crc32

Change-Id: I6b6160b56e829731402bd5fc56bcac59664f634d
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0b17c9d..7fb69b7 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2954,58 +2954,20 @@
   __ Mvn(out, tmp);
 }
 
-// The threshold for sizes of arrays to use the library provided implementation
-// of CRC32.updateBytes instead of the intrinsic.
-static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
-
-void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
-  if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
-    return;
-  }
-
-  LocationSummary* locations
-      = new (allocator_) LocationSummary(invoke,
-                                         LocationSummary::kCallOnSlowPath,
-                                         kIntrinsified);
-
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
-  locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
-  locations->SetInAt(3, Location::RequiresRegister());
-  locations->AddTemp(Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister());
-}
-
-// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
+// Generate code using CRC32 instructions which calculates
+// a CRC32 value of a byte.
 //
-// Note: The intrinsic is not used if len exceeds a threshold.
-void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
-  DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
-
-  auto masm = GetVIXLAssembler();
-  auto locations = invoke->GetLocations();
-
-  auto slow_path =
-    new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  Register length = WRegisterFrom(locations->InAt(3));
-  __ Cmp(length, kCRC32UpdateBytesThreshold);
-  __ B(slow_path->GetEntryLabel(), hi);
-
-  const uint32_t array_data_offset =
-      mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
-  Register ptr = XRegisterFrom(locations->GetTemp(0));
-  Register array = XRegisterFrom(locations->InAt(1));
-  auto offset = locations->InAt(2);
-  if (offset.IsConstant()) {
-    int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
-    __ Add(ptr, array, array_data_offset + offset_value);
-  } else {
-    __ Add(ptr, array, array_data_offset);
-    __ Add(ptr, ptr, XRegisterFrom(offset));
-  }
-
+// Parameters:
+//   masm   - VIXL macro assembler
+//   crc    - a register holding an initial CRC value
+//   ptr    - a register holding a memory address of bytes
+//   length - a register holding a number of bytes to process
+//   out    - a register to put a result of calculation
+static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
+                                                        const Register& crc,
+                                                        const Register& ptr,
+                                                        const Register& length,
+                                                        const Register& out) {
   // The algorithm of CRC32 of bytes is:
   //   crc = ~crc
   //   process a few first bytes to make the array 8-byte aligned
@@ -3029,8 +2991,7 @@
   Register len = temps.AcquireW();
   Register array_elem = temps.AcquireW();
 
-  Register out = WRegisterFrom(locations->Out());
-  __ Mvn(out, WRegisterFrom(locations->InAt(0)));
+  __ Mvn(out, crc);
   __ Mov(len, length);
 
   __ Tbz(ptr, 0, &aligned2);
@@ -3095,10 +3056,111 @@
 
   __ Bind(&done);
   __ Mvn(out, out);
+}
+
+// The threshold for sizes of arrays to use the library provided implementation
+// of CRC32.updateBytes instead of the intrinsic.
+static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
+
+void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+  if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+    return;
+  }
+
+  LocationSummary* locations =
+      new (allocator_) LocationSummary(invoke,
+                                       LocationSummary::kCallOnSlowPath,
+                                       kIntrinsified);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
+//
+// Note: The intrinsic is not used if len exceeds a threshold.
+void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+  auto masm = GetVIXLAssembler();
+  auto locations = invoke->GetLocations();
+
+  auto slow_path =
+    new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  Register length = WRegisterFrom(locations->InAt(3));
+  __ Cmp(length, kCRC32UpdateBytesThreshold);
+  __ B(slow_path->GetEntryLabel(), hi);
+
+  const uint32_t array_data_offset =
+      mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
+  Register ptr = XRegisterFrom(locations->GetTemp(0));
+  Register array = XRegisterFrom(locations->InAt(1));
+  auto offset = locations->InAt(2);
+  if (offset.IsConstant()) {
+    int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
+    __ Add(ptr, array, array_data_offset + offset_value);
+  } else {
+    __ Add(ptr, array, array_data_offset);
+    __ Add(ptr, ptr, XRegisterFrom(offset));
+  }
+
+  Register crc = WRegisterFrom(locations->InAt(0));
+  Register out = WRegisterFrom(locations->Out());
+
+  GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
 
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
+  if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+    return;
+  }
+
+  LocationSummary* locations =
+      new (allocator_) LocationSummary(invoke,
+                                       LocationSummary::kNoCall,
+                                       kIntrinsified);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
+//
+// There is no need to generate code checking if addr is 0.
+// The method updateByteBuffer is a private method of java.util.zip.CRC32.
+// This guarantees no calls outside of the CRC32 class.
+// An address of DirectBuffer is always passed to the call of updateByteBuffer.
+// It might be an implementation of an empty DirectBuffer which can use a zero
+// address but it must have the length to be zero. The current generated code
+// correctly works with the zero length.
+void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+  auto masm = GetVIXLAssembler();
+  auto locations = invoke->GetLocations();
+
+  Register addr = XRegisterFrom(locations->InAt(1));
+  Register ptr = XRegisterFrom(locations->GetTemp(0));
+  __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
+
+  Register crc = WRegisterFrom(locations->InAt(0));
+  Register length = WRegisterFrom(locations->InAt(3));
+  Register out = WRegisterFrom(locations->Out());
+  GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 
 UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 88f1457..95752fc 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -3061,6 +3061,7 @@
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 08ba0a0..8092a1c 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2698,6 +2698,7 @@
 
 UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 59d3ba2..f5577c3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -2348,6 +2348,7 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer)
 
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 1d94950..5ad9469 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -3072,6 +3072,7 @@
 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
 
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4f0b61d..62ccd49 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -2739,6 +2739,7 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
 UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
 
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
 UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
diff --git a/runtime/hidden_api.h b/runtime/hidden_api.h
index 13bead2..a0eeae2 100644
--- a/runtime/hidden_api.h
+++ b/runtime/hidden_api.h
@@ -236,6 +236,7 @@
       case Intrinsics::kUnsafeFullFence:
       case Intrinsics::kCRC32Update:
       case Intrinsics::kCRC32UpdateBytes:
+      case Intrinsics::kCRC32UpdateByteBuffer:
       case Intrinsics::kStringNewStringFromBytes:
       case Intrinsics::kStringNewStringFromChars:
       case Intrinsics::kStringNewStringFromString:
diff --git a/runtime/image.cc b/runtime/image.cc
index fe1d88f..b6bb0b1 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -29,7 +29,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '3', '\0' };  // Image reservation.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '7', '4', '\0' };  // CRC32UpdateBB intrinsic
 
 ImageHeader::ImageHeader(uint32_t image_reservation_size,
                          uint32_t component_count,
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
index 16e118c..2127f1d 100644
--- a/runtime/interpreter/interpreter_intrinsics.cc
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -560,6 +560,7 @@
     UNIMPLEMENTED_CASE(ThreadInterrupted /* ()Z */)
     UNIMPLEMENTED_CASE(CRC32Update /* (II)I */)
     UNIMPLEMENTED_CASE(CRC32UpdateBytes /* (I[BII)I */)
+    UNIMPLEMENTED_CASE(CRC32UpdateByteBuffer /* (IJII)I */)
     INTRINSIC_CASE(VarHandleFullFence)
     INTRINSIC_CASE(VarHandleAcquireFence)
     INTRINSIC_CASE(VarHandleReleaseFence)
diff --git a/runtime/intrinsics_list.h b/runtime/intrinsics_list.h
index db43b24..57e81a7 100644
--- a/runtime/intrinsics_list.h
+++ b/runtime/intrinsics_list.h
@@ -221,6 +221,7 @@
   V(ReachabilityFence, kStatic, kNeedsEnvironmentOrCache, kWriteSideEffects, kNoThrow, "Ljava/lang/ref/Reference;", "reachabilityFence", "(Ljava/lang/Object;)V") \
   V(CRC32Update, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/util/zip/CRC32;", "update", "(II)I") \
   V(CRC32UpdateBytes, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/util/zip/CRC32;", "updateBytes", "(I[BII)I") \
+  V(CRC32UpdateByteBuffer, kStatic, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/util/zip/CRC32;", "updateByteBuffer", "(IJII)I") \
   SIGNATURE_POLYMORPHIC_INTRINSICS_LIST(V)
 
 #endif  // ART_RUNTIME_INTRINSICS_LIST_H_
diff --git a/test/580-crc32/src/Main.java b/test/580-crc32/src/Main.java
index 6199e9b..dfc0b3c 100644
--- a/test/580-crc32/src/Main.java
+++ b/test/580-crc32/src/Main.java
@@ -16,6 +16,7 @@
 
 import java.util.zip.CRC32;
 import java.util.Random;
+import java.nio.ByteBuffer;
 
 /**
  * The ART compiler can use intrinsics for the java.util.zip.CRC32 methods:
@@ -343,8 +344,193 @@
                 CRC32ByteArray(bytes, off, len));
   }
 
+  private static long CRC32ByteBuffer(byte[] bytes, int off, int len) {
+    ByteBuffer buf = ByteBuffer.wrap(bytes, 0, off + len);
+    buf.position(off);
+    CRC32 crc32 = new CRC32();
+    crc32.update(buf);
+    return crc32.getValue();
+  }
+
+  private static void TestCRC32UpdateByteBuffer() {
+    assertEqual(0L, CRC32ByteBuffer(new byte[] {}, 0, 0));
+    assertEqual(0L, CRC32ByteBuffer(new byte[] {0}, 0, 0));
+    assertEqual(0L, CRC32ByteBuffer(new byte[] {0}, 1, 0));
+    assertEqual(0L, CRC32ByteBuffer(new byte[] {0, 0}, 1, 0));
+
+    assertEqual(CRC32Byte(0), CRC32ByteBuffer(new byte[] {0}, 0, 1));
+    assertEqual(CRC32Byte(1), CRC32ByteBuffer(new byte[] {1}, 0, 1));
+    assertEqual(CRC32Byte(0x0f), CRC32ByteBuffer(new byte[] {0x0f}, 0, 1));
+    assertEqual(CRC32Byte(0xff), CRC32ByteBuffer(new byte[] {-1}, 0, 1));
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteBuffer(new byte[] {0, 0, 0}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteBuffer(new byte[] {1, 1, 1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteBuffer(new byte[] {0x0f, 0x0f, 0x0f}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteBuffer(new byte[] {-1, -1, -1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2),
+                CRC32ByteBuffer(new byte[] {1, 2}, 0, 2));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteBuffer(new byte[] {0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE}, 0, 4));
+
+    byte[] bytes = new byte[128 * 1024];
+    Random rnd = new Random(0);
+    rnd.nextBytes(bytes);
+
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, 8 * 1024),
+                CRC32ByteBuffer(bytes, 0, 8 * 1024));
+
+    int off = rnd.nextInt(bytes.length / 2);
+    for (int len = 0; len <= 16; ++len) {
+      assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                  CRC32ByteBuffer(bytes, off, len));
+    }
+
+    // Check there are no issues with unaligned accesses.
+    for (int o = 1; o < 8; ++o) {
+      for (int l = 0; l <= 16; ++l) {
+        assertEqual(CRC32BytesUsingUpdateInt(bytes, o, l),
+                    CRC32ByteBuffer(bytes, o, l));
+      }
+    }
+
+    int len = bytes.length / 2;
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len - 1),
+                CRC32ByteBuffer(bytes, 0, len - 1));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len),
+                CRC32ByteBuffer(bytes, 0, len));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len + 1),
+                CRC32ByteBuffer(bytes, 0, len + 1));
+
+    len = rnd.nextInt(bytes.length + 1);
+    off = rnd.nextInt(bytes.length - len);
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                CRC32ByteBuffer(bytes, off, len));
+  }
+
+  private static long CRC32DirectByteBuffer(byte[] bytes, int off, int len) {
+    final int total_len = off + len;
+    ByteBuffer buf = ByteBuffer.allocateDirect(total_len).put(bytes, 0, total_len);
+    buf.position(off);
+    CRC32 crc32 = new CRC32();
+    crc32.update(buf);
+    return crc32.getValue();
+  }
+
+  private static long CRC32ByteAndDirectByteBuffer(int value, byte[] bytes) {
+    ByteBuffer buf = ByteBuffer.allocateDirect(bytes.length).put(bytes);
+    buf.position(0);
+    CRC32 crc32 = new CRC32();
+    crc32.update(value);
+    crc32.update(buf);
+    return crc32.getValue();
+  }
+
+  private static long CRC32DirectByteBufferAndByte(byte[] bytes, int value) {
+    ByteBuffer buf = ByteBuffer.allocateDirect(bytes.length).put(bytes);
+    buf.position(0);
+    CRC32 crc32 = new CRC32();
+    crc32.update(buf);
+    crc32.update(value);
+    return crc32.getValue();
+  }
+
+  private static void TestCRC32UpdateDirectByteBuffer() {
+    assertEqual(0L, CRC32DirectByteBuffer(new byte[] {}, 0, 0));
+    assertEqual(0L, CRC32DirectByteBuffer(new byte[] {0}, 0, 0));
+    assertEqual(0L, CRC32DirectByteBuffer(new byte[] {0}, 1, 0));
+    assertEqual(0L, CRC32DirectByteBuffer(new byte[] {0, 0}, 1, 0));
+
+    assertEqual(CRC32Byte(0), CRC32DirectByteBuffer(new byte[] {0}, 0, 1));
+    assertEqual(CRC32Byte(1), CRC32DirectByteBuffer(new byte[] {1}, 0, 1));
+    assertEqual(CRC32Byte(0x0f), CRC32DirectByteBuffer(new byte[] {0x0f}, 0, 1));
+    assertEqual(CRC32Byte(0xff), CRC32DirectByteBuffer(new byte[] {-1}, 0, 1));
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32DirectByteBuffer(new byte[] {0, 0, 0}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32DirectByteBuffer(new byte[] {1, 1, 1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32DirectByteBuffer(new byte[] {0x0f, 0x0f, 0x0f}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32DirectByteBuffer(new byte[] {-1, -1, -1}, 0, 3));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2),
+                CRC32DirectByteBuffer(new byte[] {1, 2}, 0, 2));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32DirectByteBuffer(new byte[] {0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE}, 0, 4));
+
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32ByteAndDirectByteBuffer(0, new byte[] {0, 0}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32ByteAndDirectByteBuffer(1, new byte[] {1, 1}));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32ByteAndDirectByteBuffer(0x0f, new byte[] {0x0f, 0x0f}));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32ByteAndDirectByteBuffer(-1, new byte[] {-1, -1}));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2, 3),
+                CRC32ByteAndDirectByteBuffer(1, new byte[] {2, 3}));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32ByteAndDirectByteBuffer(0, new byte[] {-1, Byte.MIN_VALUE, Byte.MAX_VALUE}));
+
+    assertEqual(CRC32BytesUsingUpdateInt(0, 0, 0),
+                CRC32DirectByteBufferAndByte(new byte[] {0, 0}, 0));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 1, 1),
+                CRC32DirectByteBufferAndByte(new byte[] {1, 1}, 1));
+    assertEqual(CRC32BytesUsingUpdateInt(0x0f, 0x0f, 0x0f),
+                CRC32DirectByteBufferAndByte(new byte[] {0x0f, 0x0f}, 0x0f));
+    assertEqual(CRC32BytesUsingUpdateInt(0xff, 0xff, 0xff),
+                CRC32DirectByteBufferAndByte(new byte[] {-1, -1}, -1));
+    assertEqual(CRC32BytesUsingUpdateInt(1, 2, 3),
+                CRC32DirectByteBufferAndByte(new byte[] {1, 2}, 3));
+    assertEqual(
+        CRC32BytesUsingUpdateInt(0, -1, Byte.MIN_VALUE, Byte.MAX_VALUE),
+        CRC32DirectByteBufferAndByte(new byte[] {0, -1, Byte.MIN_VALUE}, Byte.MAX_VALUE));
+
+    byte[] bytes = new byte[128 * 1024];
+    Random rnd = new Random(0);
+    rnd.nextBytes(bytes);
+
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, bytes.length),
+                CRC32DirectByteBuffer(bytes, 0, bytes.length));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, 8 * 1024),
+                CRC32DirectByteBuffer(bytes, 0, 8 * 1024));
+
+    int off = rnd.nextInt(bytes.length / 2);
+    for (int len = 0; len <= 16; ++len) {
+      assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                  CRC32DirectByteBuffer(bytes, off, len));
+    }
+
+    // Check there are no issues with unaligned accesses.
+    for (int o = 1; o < 8; ++o) {
+      for (int l = 0; l <= 16; ++l) {
+        assertEqual(CRC32BytesUsingUpdateInt(bytes, o, l),
+                    CRC32DirectByteBuffer(bytes, o, l));
+      }
+    }
+
+    int len = bytes.length / 2;
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len - 1),
+                CRC32DirectByteBuffer(bytes, 0, len - 1));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len),
+                CRC32DirectByteBuffer(bytes, 0, len));
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, 0, len + 1),
+                CRC32DirectByteBuffer(bytes, 0, len + 1));
+
+    len = rnd.nextInt(bytes.length + 1);
+    off = rnd.nextInt(bytes.length - len);
+    assertEqual(CRC32BytesUsingUpdateInt(bytes, off, len),
+                CRC32DirectByteBuffer(bytes, off, len));
+  }
+
   public static void main(String args[]) {
     TestCRC32Update();
     TestCRC32UpdateBytes();
+    TestCRC32UpdateByteBuffer();
+    TestCRC32UpdateDirectByteBuffer();
   }
 }