Fix ARM64 SystemArrayCopy intrinsic with large constant dest position.

Make sure we do not deplete the whole VIXL scratch register pool, so
that VIXL can still use IP0 as a temporary when emitting
macro-instructions.

Test: art/test/testrunner/testrunner.py --optimizing --target --64
Bug: 37256530
Change-Id: I5da22e552297fad87db5763e2dab60ae6a7a43af
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 423fd3c..8485c32 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -2755,9 +2755,17 @@
         // Make sure `tmp` is not IP0, as it is clobbered by
         // ReadBarrierMarkRegX entry points in
         // ReadBarrierSystemArrayCopySlowPathARM64.
+        DCHECK(temps.IsAvailable(ip0));
         temps.Exclude(ip0);
         Register tmp = temps.AcquireW();
         DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+        // Put IP0 back in the pool so that VIXL has at least one
+        // scratch register available to emit macro-instructions (note
+        // that IP1 is already used for `tmp`). Indeed some
+        // macro-instructions used in GenSystemArrayCopyAddresses
+        // (invoked hereunder) may require a scratch register (for
+        // instance to emit a load with a large constant offset).
+        temps.Include(ip0);
 
         // /* int32_t */ monitor = src->monitor_
         __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
diff --git a/test/646-checker-arraycopy-large-cst-pos/expected.txt b/test/646-checker-arraycopy-large-cst-pos/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/646-checker-arraycopy-large-cst-pos/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/646-checker-arraycopy-large-cst-pos/info.txt b/test/646-checker-arraycopy-large-cst-pos/info.txt
new file mode 100644
index 0000000..9ac21db
--- /dev/null
+++ b/test/646-checker-arraycopy-large-cst-pos/info.txt
@@ -0,0 +1,4 @@
+Regression test for an issue with a depleted VIXL scratch register
+pool during the emission of a SystemArrayCopy intrinsic with a large
+constant destination position, on ARM64, with read barriers
+(b/37256530).
diff --git a/test/646-checker-arraycopy-large-cst-pos/src/Main.java b/test/646-checker-arraycopy-large-cst-pos/src/Main.java
new file mode 100644
index 0000000..3144fc1
--- /dev/null
+++ b/test/646-checker-arraycopy-large-cst-pos/src/Main.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    System.out.println("passed");
+  }
+
+  /// CHECK-START-ARM64: void Main.test() disassembly (after)
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK-NOT:    blr
+  /// CHECK: ReturnVoid
+
+  static void test() {
+    Object[] src = new Object[1024];
+    Object[] dst = new Object[2048];
+    // The length of the copied data must not be too large (smaller
+    // than kSystemArrayCopyThreshold = 128) for the call to
+    // System.arraycopy to be intrinsified.
+    System.arraycopy(src, 0, dst, 1024, 64);
+  }
+
+}