Revert "Revert "Matcher: Avoid excessive String copies.""

This reverts commit 53e4e15bb0fc2f789274998f3ce62c4fc021fbe8.

Fixes a memory leak introduced in 53e4e15bb0f. Also :
- switches to using ScopedStringChars to simplify the code further.
- better ICU error checking.

Bug: 36366255
Bug: 36818684

Test: cts -m CtsLibcoreTestCases
Test: ScannerTest on host/x86

(cherry picked from commit 9084fb6f7c4da5d42c6b58b523d71e8ad65dd754)

Change-Id: Iad7c5b07647542bcaa2e6621107c0cd206249bae
(cherry picked from commit 285e53bf2b7c81134ae8bdb4044df216610bed17)
diff --git a/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java
new file mode 100644
index 0000000..73e7979
--- /dev/null
+++ b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package benchmarks.regression;
+
+import com.google.caliper.Param;
+
+public class StringReplaceAllBenchmark {
+    // NOTE: These estimates of MOVEABLE / NON_MOVEABLE are based on a knowledge of
+    // ART implementation details. They make a difference here because JNI calls related
+    // to strings took different paths depending on whether the String in question was
+    // moveable or not.
+    enum StringLengths {
+        EMPTY(""),
+        MOVEABLE_16(makeString(16)),
+        MOVEABLE_256(makeString(256)),
+        MOVEABLE_1024(makeString(1024)),
+        NON_MOVEABLE(makeString(64 * 1024)),
+        BOOT_IMAGE(java.util.jar.JarFile.MANIFEST_NAME);
+
+        private final String value;
+
+        StringLengths(String s) {
+            this.value = s;
+        }
+    }
+
+    private static final String makeString(int length) {
+        final String sequence8 = "abcdefghijklmnop";
+        final int numAppends = (length / 16) - 1;
+        StringBuilder stringBuilder = new StringBuilder(length);
+
+        // (n-1) occurences of "abcdefghijklmnop"
+        for (int i = 0; i < numAppends; ++i) {
+            stringBuilder.append(sequence8);
+        }
+
+        // and one final occurence of qrstuvwx.
+        stringBuilder.append("qrstuvwx");
+
+        return stringBuilder.toString();
+    }
+
+    @Param private StringLengths s;
+
+    public void timeReplaceAllTrivialPatternNonExistent(int reps) {
+        for (int i = 0; i < reps; ++i) {
+            s.value.replaceAll("fish", "0");
+        }
+    }
+
+    public void timeReplaceTrivialPatternAllRepeated(int reps) {
+        for (int i = 0; i < reps; ++i) {
+            s.value.replaceAll("jklm", "0");
+        }
+    }
+
+    public void timeReplaceAllTrivialPatternSingleOccurence(int reps) {
+        for (int i = 0; i < reps; ++i) {
+            s.value.replaceAll("qrst", "0");
+        }
+    }
+}
diff --git a/luni/src/main/native/java_util_regex_Matcher.cpp b/luni/src/main/native/java_util_regex_Matcher.cpp
index 2461afc..2271ba8 100644
--- a/luni/src/main/native/java_util_regex_Matcher.cpp
+++ b/luni/src/main/native/java_util_regex_Matcher.cpp
@@ -16,75 +16,114 @@
 
 #define LOG_TAG "Matcher"
 
+#include <memory>
 #include <stdlib.h>
 
+#include <android-base/logging.h>
+
 #include "IcuUtilities.h"
 #include "JNIHelp.h"
 #include "JniConstants.h"
 #include "JniException.h"
-#include "ScopedPrimitiveArray.h"
 #include "ScopedJavaUnicodeString.h"
+#include "ScopedPrimitiveArray.h"
+#include "ScopedStringChars.h"
 #include "jni.h"
 #include "unicode/parseerr.h"
 #include "unicode/regex.h"
 
 // ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
 
-static icu::RegexMatcher* toRegexMatcher(jlong address) {
-    return reinterpret_cast<icu::RegexMatcher*>(static_cast<uintptr_t>(address));
-}
-
 /**
- * We use ICU4C's RegexMatcher class, but our input is on the Java heap and potentially moving
- * around between calls. This wrapper class ensures that our RegexMatcher is always pointing at
- * the current location of the char[]. Earlier versions of Android simply copied the data to the
- * native heap, but that's wasteful and hides allocations from the garbage collector.
+ * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
+ * the input it's currently operating on in the native heap.
+ *
+ * Rationale: We choose to make a copy here because it turns out to be a lot
+ * cheaper when a moving GC and/or string compression is enabled. This is
+ * because env->GetStringChars() always copies in this scenario. This becomes
+ * especially bad when the String in question is long and/or contains a large
+ * number of matches.
+ *
+ * Drawbacks: The native allocation associated with this class is no longer
+ * fixed size, so we're effectively lying to the NativeAllocationRegistry about
+ * the size of the object(s) we're allocating on the native heap. The peak
+ * memory usage doesn't change though, given that GetStringChars would have
+ * made an allocation of precisely the same size.
  */
-class MatcherAccessor {
+class MatcherState {
 public:
-    MatcherAccessor(JNIEnv* env, jlong address, jstring javaInput, bool reset) {
-        init(env, address);
+    MatcherState(icu::RegexMatcher* matcher) :
+        mMatcher(matcher),
+        mUChars(nullptr),
+        mUText(nullptr),
+        mStatus(U_ZERO_ERROR) {
+    }
 
-        mJavaInput = javaInput;
-        mChars = env->GetStringChars(mJavaInput, NULL);
-        if (mChars == NULL) {
-            return;
+    bool updateInput(JNIEnv* env, jstring input) {
+        // First, close the UText struct, since we're about to allocate a new one.
+        if (mUText != nullptr) {
+            utext_close(mUText);
+            mUText = nullptr;
         }
 
-        mUText = utext_openUChars(NULL, mChars, env->GetStringLength(mJavaInput), &mStatus);
-        if (mUText == NULL) {
-            return;
+        // Then delete the UChar* associated with the UText struct..
+        mUChars.reset(nullptr);
+
+        // TODO: We should investigate whether we can avoid an additional copy
+        // in the native heap when is_copy == JNI_TRUE. The problem with doing
+        // that is that we might call ReleaseStringChars with a different
+        // JNIEnv* on a different downcall. This is currently safe as
+        // implemented in ART, but is unlikely to be portable and the spec stays
+        // silent on the matter.
+        ScopedStringChars inputChars(env, input);
+        if (inputChars.get() == nullptr) {
+            // There will be an exception pending if we get here.
+            return false;
         }
 
-        if (reset) {
-            mMatcher->reset(mUText);
-        } else {
-            mMatcher->refreshInputText(mUText, mStatus);
+        // Make a copy of |input| on the native heap. This copy will be live
+        // until the next call to updateInput or close.
+        mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
+        if (mUChars.get() == nullptr) {
+            env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
+            return false;
+        }
+
+        static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
+        memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
+
+        // Reset any errors that might have occurred on previous patches.
+        mStatus = U_ZERO_ERROR;
+        mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
+        if (mUText == nullptr) {
+            CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
+            return false;
+        }
+
+        // It is an error for ICU to have returned a non-null mUText but to
+        // still have indicated an error.
+        CHECK(U_SUCCESS(mStatus));
+
+        mMatcher->reset(mUText);
+        return true;
+    }
+
+    ~MatcherState() {
+        if (mUText != nullptr) {
+            utext_close(mUText);
         }
     }
 
-    MatcherAccessor(JNIEnv* env, jlong address) {
-        init(env, address);
-    }
-
-    ~MatcherAccessor() {
-        utext_close(mUText);
-        if (mJavaInput) {
-            mEnv->ReleaseStringChars(mJavaInput, mChars);
-        }
-        maybeThrowIcuException(mEnv, "utext_close", mStatus);
-    }
-
-    icu::RegexMatcher* operator->() {
-        return mMatcher;
+    icu::RegexMatcher* matcher() {
+        return mMatcher.get();
     }
 
     UErrorCode& status() {
         return mStatus;
     }
 
-    void updateOffsets(jintArray javaOffsets) {
-        ScopedIntArrayRW offsets(mEnv, javaOffsets);
+    void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
+        ScopedIntArrayRW offsets(env, javaOffsets);
         if (offsets.get() == NULL) {
             return;
         }
@@ -96,29 +135,23 @@
     }
 
 private:
-    void init(JNIEnv* env, jlong address) {
-        mEnv = env;
-        mJavaInput = NULL;
-        mMatcher = toRegexMatcher(address);
-        mChars = NULL;
-        mStatus = U_ZERO_ERROR;
-        mUText = NULL;
-    }
-
-    JNIEnv* mEnv;
-    jstring mJavaInput;
-    icu::RegexMatcher* mMatcher;
-    const jchar* mChars;
-    UErrorCode mStatus;
+    std::unique_ptr<icu::RegexMatcher> mMatcher;
+    std::unique_ptr<UChar[]> mUChars;
     UText* mUText;
+    UErrorCode mStatus;
 
     // Disallow copy and assignment.
-    MatcherAccessor(const MatcherAccessor&);
-    void operator=(const MatcherAccessor&);
+    MatcherState(const MatcherState&);
+    void operator=(const MatcherState&);
 };
 
+static inline MatcherState* toMatcherState(jlong address) {
+    return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
+}
+
 static void Matcher_free(void* address) {
-    delete reinterpret_cast<icu::RegexMatcher*>(address);
+    MatcherState* state = reinterpret_cast<MatcherState*>(address);
+    delete state;
 }
 
 static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
@@ -131,51 +164,48 @@
     return 200;  // Very rough guess based on a quick look at the implementation.
 }
 
-static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint startIndex, jintArray offsets) {
-    MatcherAccessor matcher(env, addr, javaText, false);
-    UBool result = matcher->find(startIndex, matcher.status());
+static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
+    MatcherState* state = toMatcherState(addr);
+    UBool result = state->matcher()->find(startIndex, state->status());
     if (result) {
-        matcher.updateOffsets(offsets);
+        state->updateOffsets(env, offsets);
     }
     return result;
 }
 
-static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
-    MatcherAccessor matcher(env, addr, javaText, false);
-    if (matcher.status() != U_ZERO_ERROR) {
-        return -1;
-    }
-    UBool result = matcher->find();
+static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+    MatcherState* state = toMatcherState(addr);
+    UBool result = state->matcher()->find();
     if (result) {
-        matcher.updateOffsets(offsets);
+        state->updateOffsets(env, offsets);
     }
     return result;
 }
 
-static jint Matcher_groupCountImpl(JNIEnv* env, jclass, jlong addr) {
-    MatcherAccessor matcher(env, addr);
-    return matcher->groupCount();
+static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
+    MatcherState* state = toMatcherState(addr);
+    return state->matcher()->groupCount();
 }
 
-static jint Matcher_hitEndImpl(JNIEnv* env, jclass, jlong addr) {
-    MatcherAccessor matcher(env, addr);
-    return matcher->hitEnd();
+static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
+    MatcherState* state = toMatcherState(addr);
+    return state->matcher()->hitEnd();
 }
 
-static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
-    MatcherAccessor matcher(env, addr, javaText, false);
-    UBool result = matcher->lookingAt(matcher.status());
+static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+    MatcherState* state = toMatcherState(addr);
+    UBool result = state->matcher()->lookingAt(state->status());
     if (result) {
-        matcher.updateOffsets(offsets);
+        state->updateOffsets(env, offsets);
     }
     return result;
 }
 
-static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
-    MatcherAccessor matcher(env, addr, javaText, false);
-    UBool result = matcher->matches(matcher.status());
+static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+    MatcherState* state = toMatcherState(addr);
+    UBool result = state->matcher()->matches(state->status());
     if (result) {
-        matcher.updateOffsets(offsets);
+        state->updateOffsets(env, offsets);
     }
     return result;
 }
@@ -184,28 +214,33 @@
     icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
     UErrorCode status = U_ZERO_ERROR;
     icu::RegexMatcher* result = pattern->matcher(status);
-    maybeThrowIcuException(env, "RegexPattern::matcher", status);
-    return reinterpret_cast<uintptr_t>(result);
+    if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
+        return 0;
+    }
+
+    return reinterpret_cast<uintptr_t>(new MatcherState(result));
 }
 
-static jint Matcher_requireEndImpl(JNIEnv* env, jclass, jlong addr) {
-    MatcherAccessor matcher(env, addr);
-    return matcher->requireEnd();
+static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
+    MatcherState* state = toMatcherState(addr);
+    return state->matcher()->requireEnd();
 }
 
 static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
-    MatcherAccessor matcher(env, addr, javaText, true);
-    matcher->region(start, end, matcher.status());
+    MatcherState* state = toMatcherState(addr);
+    if (state->updateInput(env, javaText)) {
+        state->matcher()->region(start, end, state->status());
+    }
 }
 
-static void Matcher_useAnchoringBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
-    MatcherAccessor matcher(env, addr);
-    matcher->useAnchoringBounds(value);
+static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
+    MatcherState* state = toMatcherState(addr);
+    state->matcher()->useAnchoringBounds(value);
 }
 
-static void Matcher_useTransparentBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
-    MatcherAccessor matcher(env, addr);
-    matcher->useTransparentBounds(value);
+static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
+    MatcherState* state = toMatcherState(addr);
+    state->matcher()->useTransparentBounds(value);
 }
 
 static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
@@ -227,13 +262,13 @@
 
 static JNINativeMethod gMethods[] = {
     NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
-    NATIVE_METHOD(Matcher, findImpl, "(JLjava/lang/String;I[I)Z"),
-    NATIVE_METHOD(Matcher, findNextImpl, "(JLjava/lang/String;[I)Z"),
+    NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
+    NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
     NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
     NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
     NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
-    NATIVE_METHOD(Matcher, lookingAtImpl, "(JLjava/lang/String;[I)Z"),
-    NATIVE_METHOD(Matcher, matchesImpl, "(JLjava/lang/String;[I)Z"),
+    NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
+    NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
     NATIVE_METHOD(Matcher, nativeSize, "()I"),
     NATIVE_METHOD(Matcher, openImpl, "(J)J"),
     NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
diff --git a/ojluni/src/main/java/java/util/regex/Matcher.java b/ojluni/src/main/java/java/util/regex/Matcher.java
index 3d79ba7..8e5b728 100644
--- a/ojluni/src/main/java/java/util/regex/Matcher.java
+++ b/ojluni/src/main/java/java/util/regex/Matcher.java
@@ -444,7 +444,7 @@
      */
     public boolean matches() {
         synchronized (this) {
-            matchFound = matchesImpl(address, input, matchOffsets);
+            matchFound = matchesImpl(address, matchOffsets);
         }
         return matchFound;
     }
@@ -466,7 +466,7 @@
      */
     public boolean find() {
         synchronized (this) {
-            matchFound = findNextImpl(address, input, matchOffsets);
+            matchFound = findNextImpl(address, matchOffsets);
         }
         return matchFound;
     }
@@ -495,7 +495,7 @@
         }
 
         synchronized (this) {
-            matchFound = findImpl(address, input, start, matchOffsets);
+            matchFound = findImpl(address, start, matchOffsets);
         }
         return matchFound;
     }
@@ -516,7 +516,7 @@
      */
     public boolean lookingAt() {
         synchronized (this) {
-            matchFound = lookingAtImpl(address, input, matchOffsets);
+            matchFound = lookingAtImpl(address, matchOffsets);
         }
         return matchFound;
     }
@@ -1181,13 +1181,13 @@
     }
 
     private static native int getMatchedGroupIndex0(long patternAddr, String name);
-    private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets);
-    private static native boolean findNextImpl(long addr, String s, int[] offsets);
+    private static native boolean findImpl(long addr, int startIndex, int[] offsets);
+    private static native boolean findNextImpl(long addr, int[] offsets);
     private static native long getNativeFinalizer();
     private static native int groupCountImpl(long addr);
     private static native boolean hitEndImpl(long addr);
-    private static native boolean lookingAtImpl(long addr, String s, int[] offsets);
-    private static native boolean matchesImpl(long addr, String s, int[] offsets);
+    private static native boolean lookingAtImpl(long addr, int[] offsets);
+    private static native boolean matchesImpl(long addr, int[] offsets);
     private static native int nativeSize();
     private static native long openImpl(long patternAddr);
     private static native boolean requireEndImpl(long addr);