Implement System.arraycopy intrinsic on x86.

Also remove wrong comments when doing the raw copying.

test:run-test, 537-checker-arraycopy, 610-arraycopy

Change-Id: I2495bc03cde8ccad03c93f7722dd29bf85138525
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index bfe4956..579fb9d 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -1365,7 +1365,6 @@
                           Register input,
                           Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
                           Register temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -1386,8 +1385,8 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ LoadFromOffset(kLoadWord, input_len, input, length_offset);
-      __ subs(temp, input_len, ShifterOperand(pos_const));
+      __ LoadFromOffset(kLoadWord, temp, input, length_offset);
+      __ subs(temp, temp, ShifterOperand(pos_const));
       __ b(slow_path->GetEntryLabel(), LT);
 
       // Check that (length(input) - pos) >= length.
@@ -1518,7 +1517,6 @@
                 length,
                 slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1528,7 +1526,6 @@
                 length,
                 slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1606,7 +1603,7 @@
 
   // Compute base source address, base destination address, and end source address.
 
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1632,8 +1629,7 @@
   }
 
   // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
+  // poison/unpoison.
   Label loop, done;
   __ cmp(temp1, ShifterOperand(temp3));
   __ b(&done, EQ);
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 549407e..30fa650 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -1840,7 +1840,6 @@
                                          const Register& input,
                                          const Location& length,
                                          SlowPathCodeARM64* slow_path,
-                                         const Register& input_len,
                                          const Register& temp,
                                          bool length_is_input_length = false) {
   const int32_t length_offset = mirror::Array::LengthOffset().Int32Value();
@@ -1855,8 +1854,8 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ Ldr(input_len, MemOperand(input, length_offset));
-      __ Subs(temp, input_len, pos_const);
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
       __ B(slow_path->GetEntryLabel(), lt);
 
       // Check that (length(input) - pos) >= length.
@@ -1967,7 +1966,6 @@
                                length,
                                slow_path,
                                src_curr_addr,
-                               dst_curr_addr,
                                false);
 
   CheckSystemArrayCopyPosition(masm,
@@ -1976,7 +1974,6 @@
                                length,
                                slow_path,
                                src_curr_addr,
-                               dst_curr_addr,
                                false);
 
   src_curr_addr = src_curr_addr.X();
@@ -2164,7 +2161,6 @@
                                length,
                                slow_path,
                                temp1,
-                               temp2,
                                optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -2174,7 +2170,6 @@
                                length,
                                slow_path,
                                temp1,
-                               temp2,
                                optimizations.GetCountIsDestinationLength());
   {
     // We use a block to end the scratch scope before the write barrier, thus
@@ -2270,8 +2265,7 @@
                                 src_stop_addr);
 
     // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison, nor do any read barrier as the next uses of the destination
-    // array will do it.
+    // poison/unpoison.
     vixl::Label loop, done;
     const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
     __ Bind(&loop);
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 031cd13..812bdf5 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1070,30 +1070,45 @@
 static void CheckPosition(X86Assembler* assembler,
                           Location pos,
                           Register input,
-                          Register length,
+                          Location length,
                           SlowPathCode* slow_path,
-                          Register input_len,
-                          Register temp) {
-  // Where is the length in the String?
+                          Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
   const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
 
   if (pos.IsConstant()) {
     int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
     if (pos_const == 0) {
-      // Check that length(input) >= length.
-      __ cmpl(Address(input, length_offset), length);
-      __ j(kLess, slow_path->GetEntryLabel());
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        if (length.IsConstant()) {
+          __ cmpl(Address(input, length_offset),
+                  Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+        } else {
+          __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
+        }
+        __ j(kLess, slow_path->GetEntryLabel());
+      }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
-      __ cmpl(temp, length);
+      if (length.IsConstant()) {
+        __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+      } else {
+        __ cmpl(temp, length.AsRegister<Register>());
+      }
       __ j(kLess, slow_path->GetEntryLabel());
     }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    Register pos_reg = pos.AsRegister<Register>();
+    __ testl(pos_reg, pos_reg);
+    __ j(kNotEqual, slow_path->GetEntryLabel());
   } else {
     // Check that pos >= 0.
     Register pos_reg = pos.AsRegister<Register>();
@@ -1107,7 +1122,11 @@
     // Check that (length(input) - pos) >= length.
     __ movl(temp, Address(input, length_offset));
     __ subl(temp, pos_reg);
-    __ cmpl(temp, length);
+    if (length.IsConstant()) {
+      __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
+    } else {
+      __ cmpl(temp, length.AsRegister<Register>());
+    }
     __ j(kLess, slow_path->GetEntryLabel());
   }
 }
@@ -1159,11 +1178,11 @@
     __ movl(count, length.AsRegister<Register>());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, srcPos, src, count, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, destPos, dest, count, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
 
   // Okay, everything checks out.  Finally time to do the copy.
   // Check assumption that sizeof(Char) is 2 (used in scaling below).
@@ -2646,8 +2665,274 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
+  return instruction->InputAt(input0) == instruction->InputAt(input1);
+}
+
+void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  if (kEmitCompilerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  if (invoke->GetLocations() != nullptr) {
+    // Need a byte register for marking.
+    invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
+
+    static constexpr size_t kSrc = 0;
+    static constexpr size_t kSrcPos = 1;
+    static constexpr size_t kDest = 2;
+    static constexpr size_t kDestPos = 3;
+    static constexpr size_t kLength = 4;
+
+    if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
+        !invoke->InputAt(kDestPos)->IsIntConstant() &&
+        !invoke->InputAt(kLength)->IsIntConstant()) {
+      if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
+          !IsSameInput(invoke, kSrcPos, kLength) &&
+          !IsSameInput(invoke, kDestPos, kLength) &&
+          !IsSameInput(invoke, kSrc, kDest)) {
+        // Not enough registers, make the length also take a stack slot.
+        invoke->GetLocations()->SetInAt(kLength, Location::Any());
+      }
+    }
+  }
+}
+
+void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
+  // TODO(rpl): Implement read barriers in the SystemArrayCopy
+  // intrinsic and re-enable it (b/29516905).
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  X86Assembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  Register src = locations->InAt(0).AsRegister<Register>();
+  Location src_pos = locations->InAt(1);
+  Register dest = locations->InAt(2).AsRegister<Register>();
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+  Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+
+  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
+  codegen_->AddSlowPath(slow_path);
+
+  NearLabel conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ cmpl(src, dest);
+        __ j(kEqual, slow_path->GetEntryLabel());
+      }
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ cmpl(src, dest);
+        __ j(kNotEqual, &conditions_on_positions_validated);
+      }
+      __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
+      __ j(kGreater, slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ cmpl(src, dest);
+      __ j(kNotEqual, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+      __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
+      __ j(kLess, slow_path->GetEntryLabel());
+    } else {
+      __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
+      __ j(kLess, slow_path->GetEntryLabel());
+    }
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ testl(src, src);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ testl(dest, dest);
+    __ j(kEqual, slow_path->GetEntryLabel());
+  }
+
+  Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+  if (length.IsStackSlot()) {
+    __ movl(temp3, Address(ESP, length.GetStackIndex()));
+    length = Location::RegisterLocation(temp3);
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
+    __ j(kLess, slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+    if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+      // /* HeapReference<Class> */ temp1 = temp1->klass_
+      __ movl(temp1, Address(src, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // Bail out if the source is not a non primitive array.
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      __ movl(temp1, Address(temp1, component_offset));
+      __ testl(temp1, temp1);
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+
+    if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+      // /* HeapReference<Class> */ temp1 = temp1->klass_
+      __ movl(temp1, Address(dest, class_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      // Bail out if the destination is not a non primitive array.
+      // /* HeapReference<Class> */ temp2 = temp1->component_type_
+      __ movl(temp2, Address(temp1, component_offset));
+      __ testl(temp2, temp2);
+      __ j(kEqual, slow_path->GetEntryLabel());
+      __ MaybeUnpoisonHeapReference(temp2);
+      __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      // Re-poison the heap reference to make the compare instruction below
+      // compare two poisoned references.
+      __ PoisonHeapReference(temp1);
+    } else {
+      // /* HeapReference<Class> */ temp1 = temp1->klass_
+      __ movl(temp1, Address(dest, class_offset));
+    }
+
+    // Note: if poisoning is on, we are here comparing two poisoned references.
+    __ cmpl(temp1, Address(src, class_offset));
+
+    if (optimizations.GetDestinationIsTypedObjectArray()) {
+      NearLabel do_copy;
+      __ j(kEqual, &do_copy);
+      __ MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp1 = temp1->component_type_
+      __ movl(temp1, Address(temp1, component_offset));
+      __ MaybeUnpoisonHeapReference(temp1);
+      __ cmpl(Address(temp1, super_offset), Immediate(0));
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+      __ Bind(&do_copy);
+    } else {
+      __ j(kNotEqual, slow_path->GetEntryLabel());
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    // /* HeapReference<Class> */ temp1 = src->klass_
+    __ movl(temp1, Address(src, class_offset));
+    __ MaybeUnpoisonHeapReference(temp1);
+    // /* HeapReference<Class> */ temp1 = temp1->component_type_
+    __ movl(temp1, Address(temp1, component_offset));
+    __ testl(temp1, temp1);
+    __ j(kEqual, slow_path->GetEntryLabel());
+    __ MaybeUnpoisonHeapReference(temp1);
+    __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
+    __ j(kNotEqual, slow_path->GetEntryLabel());
+  }
+
+  // Compute base source address, base destination address, and end source address.
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  DCHECK_EQ(element_size, 4);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp1, Address(src, element_size * constant + offset));
+  } else {
+    __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (dest_pos.IsConstant()) {
+    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp2, Address(dest, element_size * constant + offset));
+  } else {
+    __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
+  }
+
+  if (length.IsConstant()) {
+    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(temp3, Address(temp1, element_size * constant));
+  } else {
+    __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+  }
+
+  // Iterate over the arrays and do a raw copy of the objects. We don't need to
+  // poison/unpoison.
+  NearLabel loop, done;
+  __ cmpl(temp1, temp3);
+  __ j(kEqual, &done);
+  __ Bind(&loop);
+  __ pushl(Address(temp1, 0));
+  __ cfi().AdjustCFAOffset(4);
+  __ popl(Address(temp2, 0));
+  __ cfi().AdjustCFAOffset(-4);
+  __ addl(temp1, Immediate(element_size));
+  __ addl(temp2, Immediate(element_size));
+  __ cmpl(temp1, temp3);
+  __ j(kNotEqual, &loop);
+  __ Bind(&done);
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1,
+                       temp2,
+                       dest,
+                       Register(kNoRegister),
+                       /* value_can_be_null */ false);
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
-UNIMPLEMENTED_INTRINSIC(X86, SystemArrayCopy)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 5bd4964..891aaf5 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -922,7 +922,6 @@
                           CpuRegister input,
                           Location length,
                           SlowPathCode* slow_path,
-                          CpuRegister input_len,
                           CpuRegister temp,
                           bool length_is_input_length = false) {
   // Where is the length in the Array?
@@ -943,12 +942,11 @@
       }
     } else {
       // Check that length(input) >= pos.
-      __ movl(input_len, Address(input, length_offset));
-      __ cmpl(input_len, Immediate(pos_const));
+      __ movl(temp, Address(input, length_offset));
+      __ subl(temp, Immediate(pos_const));
       __ j(kLess, slow_path->GetEntryLabel());
 
       // Check that (length(input) - pos) >= length.
-      __ leal(temp, Address(input_len, -pos_const));
       if (length.IsConstant()) {
         __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
       } else {
@@ -1023,11 +1021,11 @@
     __ j(kLess, slow_path->GetEntryLabel());
   }
 
-  // Validity checks: source.
-  CheckPosition(assembler, src_pos, src, length, slow_path, src_base, dest_base);
+  // Validity checks: source. Use src_base as a temporary register.
+  CheckPosition(assembler, src_pos, src, length, slow_path, src_base);
 
-  // Validity checks: dest.
-  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base, dest_base);
+  // Validity checks: dest. Use src_base as a temporary register.
+  CheckPosition(assembler, dest_pos, dest, length, slow_path, src_base);
 
   // We need the count in RCX.
   if (length.IsConstant()) {
@@ -1169,7 +1167,6 @@
                 length,
                 slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsSourceLength());
 
   // Validity checks: dest.
@@ -1179,7 +1176,6 @@
                 length,
                 slow_path,
                 temp1,
-                temp2,
                 optimizations.GetCountIsDestinationLength());
 
   if (!optimizations.GetDoesNotNeedTypeCheck()) {
@@ -1258,7 +1254,7 @@
 
   // Compute base source address, base destination address, and end source address.
 
-  uint32_t element_size = sizeof(int32_t);
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
   uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
@@ -1282,8 +1278,7 @@
   }
 
   // Iterate over the arrays and do a raw copy of the objects. We don't need to
-  // poison/unpoison, nor do any read barrier as the next uses of the destination
-  // array will do it.
+  // poison/unpoison.
   NearLabel loop, done;
   __ cmpl(temp1, temp3);
   __ j(kEqual, &done);
diff --git a/test/537-checker-arraycopy/src/Main.java b/test/537-checker-arraycopy/src/Main.java
index 30ccc56..7c124ca 100644
--- a/test/537-checker-arraycopy/src/Main.java
+++ b/test/537-checker-arraycopy/src/Main.java
@@ -50,7 +50,7 @@
   }
 
   /// CHECK-START-X86_64: void Main.arraycopy() disassembly (after)
-  /// CHECK:          InvokeStaticOrDirect
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
   /// CHECK-NOT:      test
   /// CHECK-NOT:      call
   /// CHECK:          ReturnVoid
@@ -65,7 +65,36 @@
     System.arraycopy(obj, 1, obj, 0, 1);
   }
 
+  // Test case for having enough registers on x86 for the arraycopy intrinsic.
+  /// CHECK-START-X86: void Main.arraycopy(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          ReturnVoid
   public static void arraycopy(Object[] obj, int pos) {
     System.arraycopy(obj, pos, obj, 0, obj.length);
   }
+
+  // Test case for having enough registers on x86 for the arraycopy intrinsic
+  // when an input is passed twice.
+  /// CHECK-START-X86: int Main.arraycopy2(java.lang.Object[], int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK-NOT:      mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy2(Object[] obj, int pos) {
+    System.arraycopy(obj, pos, obj, pos - 1, obj.length);
+    return pos;
+  }
+
+  // Test case for not having enough registers on x86. The arraycopy intrinsic
+  // will ask for length to be in stack and load it.
+  /// CHECK-START-X86: int Main.arraycopy3(java.lang.Object[], java.lang.Object[], int, int, int) disassembly (after)
+  /// CHECK:          InvokeStaticOrDirect intrinsic:SystemArrayCopy
+  /// CHECK:          mov {{[a-z]+}}, [esp + {{[0-9]+}}]
+  /// CHECK:          Return
+  public static int arraycopy3(Object[] obj1, Object[] obj2, int input1, int input3, int input4) {
+    System.arraycopy(obj1, input1, obj2, input3, input4);
+    System.out.println(obj1);
+    System.out.println(obj2);
+    return input1 + input3 + input4;
+  }
 }