Revert "Revert "Matcher: Avoid excessive String copies.""
This reverts commit 53e4e15bb0fc2f789274998f3ce62c4fc021fbe8.
Fixes a memory leak introduced in 53e4e15bb0f. Also :
- switches to using ScopedStringChars to simplify the code further.
- better ICU error checking.
Bug: 36366255
Bug: 36818684
Test: cts -m CtsLibcoreTestCases
Test: ScannerTest on host/x86
(cherry picked from commit 9084fb6f7c4da5d42c6b58b523d71e8ad65dd754)
Change-Id: Iad7c5b07647542bcaa2e6621107c0cd206249bae
(cherry picked from commit 285e53bf2b7c81134ae8bdb4044df216610bed17)
diff --git a/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java
new file mode 100644
index 0000000..73e7979
--- /dev/null
+++ b/benchmarks/src/benchmarks/regression/StringReplaceAllBenchmark.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License
+ */
+
+package benchmarks.regression;
+
+import com.google.caliper.Param;
+
+public class StringReplaceAllBenchmark {
+ // NOTE: These estimates of MOVEABLE / NON_MOVEABLE are based on a knowledge of
+ // ART implementation details. They make a difference here because JNI calls related
+ // to strings took different paths depending on whether the String in question was
+ // moveable or not.
+ enum StringLengths {
+ EMPTY(""),
+ MOVEABLE_16(makeString(16)),
+ MOVEABLE_256(makeString(256)),
+ MOVEABLE_1024(makeString(1024)),
+ NON_MOVEABLE(makeString(64 * 1024)),
+ BOOT_IMAGE(java.util.jar.JarFile.MANIFEST_NAME);
+
+ private final String value;
+
+ StringLengths(String s) {
+ this.value = s;
+ }
+ }
+
+ private static final String makeString(int length) {
+ final String sequence8 = "abcdefghijklmnop";
+ final int numAppends = (length / 16) - 1;
+ StringBuilder stringBuilder = new StringBuilder(length);
+
+ // (n-1) occurences of "abcdefghijklmnop"
+ for (int i = 0; i < numAppends; ++i) {
+ stringBuilder.append(sequence8);
+ }
+
+ // and one final occurence of qrstuvwx.
+ stringBuilder.append("qrstuvwx");
+
+ return stringBuilder.toString();
+ }
+
+ @Param private StringLengths s;
+
+ public void timeReplaceAllTrivialPatternNonExistent(int reps) {
+ for (int i = 0; i < reps; ++i) {
+ s.value.replaceAll("fish", "0");
+ }
+ }
+
+ public void timeReplaceTrivialPatternAllRepeated(int reps) {
+ for (int i = 0; i < reps; ++i) {
+ s.value.replaceAll("jklm", "0");
+ }
+ }
+
+ public void timeReplaceAllTrivialPatternSingleOccurence(int reps) {
+ for (int i = 0; i < reps; ++i) {
+ s.value.replaceAll("qrst", "0");
+ }
+ }
+}
diff --git a/luni/src/main/native/java_util_regex_Matcher.cpp b/luni/src/main/native/java_util_regex_Matcher.cpp
index 2461afc..2271ba8 100644
--- a/luni/src/main/native/java_util_regex_Matcher.cpp
+++ b/luni/src/main/native/java_util_regex_Matcher.cpp
@@ -16,75 +16,114 @@
#define LOG_TAG "Matcher"
+#include <memory>
#include <stdlib.h>
+#include <android-base/logging.h>
+
#include "IcuUtilities.h"
#include "JNIHelp.h"
#include "JniConstants.h"
#include "JniException.h"
-#include "ScopedPrimitiveArray.h"
#include "ScopedJavaUnicodeString.h"
+#include "ScopedPrimitiveArray.h"
+#include "ScopedStringChars.h"
#include "jni.h"
#include "unicode/parseerr.h"
#include "unicode/regex.h"
// ICU documentation: http://icu-project.org/apiref/icu4c/classRegexMatcher.html
-static icu::RegexMatcher* toRegexMatcher(jlong address) {
- return reinterpret_cast<icu::RegexMatcher*>(static_cast<uintptr_t>(address));
-}
-
/**
- * We use ICU4C's RegexMatcher class, but our input is on the Java heap and potentially moving
- * around between calls. This wrapper class ensures that our RegexMatcher is always pointing at
- * the current location of the char[]. Earlier versions of Android simply copied the data to the
- * native heap, but that's wasteful and hides allocations from the garbage collector.
+ * Encapsulates an instance of ICU4C's RegexMatcher class along with a copy of
+ * the input it's currently operating on in the native heap.
+ *
+ * Rationale: We choose to make a copy here because it turns out to be a lot
+ * cheaper when a moving GC and/or string compression is enabled. This is
+ * because env->GetStringChars() always copies in this scenario. This becomes
+ * especially bad when the String in question is long and/or contains a large
+ * number of matches.
+ *
+ * Drawbacks: The native allocation associated with this class is no longer
+ * fixed size, so we're effectively lying to the NativeAllocationRegistry about
+ * the size of the object(s) we're allocating on the native heap. The peak
+ * memory usage doesn't change though, given that GetStringChars would have
+ * made an allocation of precisely the same size.
*/
-class MatcherAccessor {
+class MatcherState {
public:
- MatcherAccessor(JNIEnv* env, jlong address, jstring javaInput, bool reset) {
- init(env, address);
+ MatcherState(icu::RegexMatcher* matcher) :
+ mMatcher(matcher),
+ mUChars(nullptr),
+ mUText(nullptr),
+ mStatus(U_ZERO_ERROR) {
+ }
- mJavaInput = javaInput;
- mChars = env->GetStringChars(mJavaInput, NULL);
- if (mChars == NULL) {
- return;
+ bool updateInput(JNIEnv* env, jstring input) {
+ // First, close the UText struct, since we're about to allocate a new one.
+ if (mUText != nullptr) {
+ utext_close(mUText);
+ mUText = nullptr;
}
- mUText = utext_openUChars(NULL, mChars, env->GetStringLength(mJavaInput), &mStatus);
- if (mUText == NULL) {
- return;
+ // Then delete the UChar* associated with the UText struct..
+ mUChars.reset(nullptr);
+
+ // TODO: We should investigate whether we can avoid an additional copy
+ // in the native heap when is_copy == JNI_TRUE. The problem with doing
+ // that is that we might call ReleaseStringChars with a different
+ // JNIEnv* on a different downcall. This is currently safe as
+ // implemented in ART, but is unlikely to be portable and the spec stays
+ // silent on the matter.
+ ScopedStringChars inputChars(env, input);
+ if (inputChars.get() == nullptr) {
+ // There will be an exception pending if we get here.
+ return false;
}
- if (reset) {
- mMatcher->reset(mUText);
- } else {
- mMatcher->refreshInputText(mUText, mStatus);
+ // Make a copy of |input| on the native heap. This copy will be live
+ // until the next call to updateInput or close.
+ mUChars.reset(new (std::nothrow) UChar[inputChars.size()]);
+ if (mUChars.get() == nullptr) {
+ env->ThrowNew(env->FindClass("Ljava/lang/OutOfMemoryError;"), "Out of memory");
+ return false;
+ }
+
+ static_assert(sizeof(UChar) == sizeof(jchar), "sizeof(Uchar) != sizeof(jchar)");
+ memcpy(mUChars.get(), inputChars.get(), inputChars.size() * sizeof(jchar));
+
+ // Reset any errors that might have occurred on previous patches.
+ mStatus = U_ZERO_ERROR;
+ mUText = utext_openUChars(nullptr, mUChars.get(), inputChars.size(), &mStatus);
+ if (mUText == nullptr) {
+ CHECK(maybeThrowIcuException(env, "utext_openUChars", mStatus));
+ return false;
+ }
+
+ // It is an error for ICU to have returned a non-null mUText but to
+ // still have indicated an error.
+ CHECK(U_SUCCESS(mStatus));
+
+ mMatcher->reset(mUText);
+ return true;
+ }
+
+ ~MatcherState() {
+ if (mUText != nullptr) {
+ utext_close(mUText);
}
}
- MatcherAccessor(JNIEnv* env, jlong address) {
- init(env, address);
- }
-
- ~MatcherAccessor() {
- utext_close(mUText);
- if (mJavaInput) {
- mEnv->ReleaseStringChars(mJavaInput, mChars);
- }
- maybeThrowIcuException(mEnv, "utext_close", mStatus);
- }
-
- icu::RegexMatcher* operator->() {
- return mMatcher;
+ icu::RegexMatcher* matcher() {
+ return mMatcher.get();
}
UErrorCode& status() {
return mStatus;
}
- void updateOffsets(jintArray javaOffsets) {
- ScopedIntArrayRW offsets(mEnv, javaOffsets);
+ void updateOffsets(JNIEnv* env, jintArray javaOffsets) {
+ ScopedIntArrayRW offsets(env, javaOffsets);
if (offsets.get() == NULL) {
return;
}
@@ -96,29 +135,23 @@
}
private:
- void init(JNIEnv* env, jlong address) {
- mEnv = env;
- mJavaInput = NULL;
- mMatcher = toRegexMatcher(address);
- mChars = NULL;
- mStatus = U_ZERO_ERROR;
- mUText = NULL;
- }
-
- JNIEnv* mEnv;
- jstring mJavaInput;
- icu::RegexMatcher* mMatcher;
- const jchar* mChars;
- UErrorCode mStatus;
+ std::unique_ptr<icu::RegexMatcher> mMatcher;
+ std::unique_ptr<UChar[]> mUChars;
UText* mUText;
+ UErrorCode mStatus;
// Disallow copy and assignment.
- MatcherAccessor(const MatcherAccessor&);
- void operator=(const MatcherAccessor&);
+ MatcherState(const MatcherState&);
+ void operator=(const MatcherState&);
};
+static inline MatcherState* toMatcherState(jlong address) {
+ return reinterpret_cast<MatcherState*>(static_cast<uintptr_t>(address));
+}
+
static void Matcher_free(void* address) {
- delete reinterpret_cast<icu::RegexMatcher*>(address);
+ MatcherState* state = reinterpret_cast<MatcherState*>(address);
+ delete state;
}
static jlong Matcher_getNativeFinalizer(JNIEnv*, jclass) {
@@ -131,51 +164,48 @@
return 200; // Very rough guess based on a quick look at the implementation.
}
-static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint startIndex, jintArray offsets) {
- MatcherAccessor matcher(env, addr, javaText, false);
- UBool result = matcher->find(startIndex, matcher.status());
+static jint Matcher_findImpl(JNIEnv* env, jclass, jlong addr, jint startIndex, jintArray offsets) {
+ MatcherState* state = toMatcherState(addr);
+ UBool result = state->matcher()->find(startIndex, state->status());
if (result) {
- matcher.updateOffsets(offsets);
+ state->updateOffsets(env, offsets);
}
return result;
}
-static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
- MatcherAccessor matcher(env, addr, javaText, false);
- if (matcher.status() != U_ZERO_ERROR) {
- return -1;
- }
- UBool result = matcher->find();
+static jint Matcher_findNextImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+ MatcherState* state = toMatcherState(addr);
+ UBool result = state->matcher()->find();
if (result) {
- matcher.updateOffsets(offsets);
+ state->updateOffsets(env, offsets);
}
return result;
}
-static jint Matcher_groupCountImpl(JNIEnv* env, jclass, jlong addr) {
- MatcherAccessor matcher(env, addr);
- return matcher->groupCount();
+static jint Matcher_groupCountImpl(JNIEnv*, jclass, jlong addr) {
+ MatcherState* state = toMatcherState(addr);
+ return state->matcher()->groupCount();
}
-static jint Matcher_hitEndImpl(JNIEnv* env, jclass, jlong addr) {
- MatcherAccessor matcher(env, addr);
- return matcher->hitEnd();
+static jint Matcher_hitEndImpl(JNIEnv*, jclass, jlong addr) {
+ MatcherState* state = toMatcherState(addr);
+ return state->matcher()->hitEnd();
}
-static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
- MatcherAccessor matcher(env, addr, javaText, false);
- UBool result = matcher->lookingAt(matcher.status());
+static jint Matcher_lookingAtImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+ MatcherState* state = toMatcherState(addr);
+ UBool result = state->matcher()->lookingAt(state->status());
if (result) {
- matcher.updateOffsets(offsets);
+ state->updateOffsets(env, offsets);
}
return result;
}
-static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jintArray offsets) {
- MatcherAccessor matcher(env, addr, javaText, false);
- UBool result = matcher->matches(matcher.status());
+static jint Matcher_matchesImpl(JNIEnv* env, jclass, jlong addr, jintArray offsets) {
+ MatcherState* state = toMatcherState(addr);
+ UBool result = state->matcher()->matches(state->status());
if (result) {
- matcher.updateOffsets(offsets);
+ state->updateOffsets(env, offsets);
}
return result;
}
@@ -184,28 +214,33 @@
icu::RegexPattern* pattern = reinterpret_cast<icu::RegexPattern*>(static_cast<uintptr_t>(patternAddr));
UErrorCode status = U_ZERO_ERROR;
icu::RegexMatcher* result = pattern->matcher(status);
- maybeThrowIcuException(env, "RegexPattern::matcher", status);
- return reinterpret_cast<uintptr_t>(result);
+ if (maybeThrowIcuException(env, "RegexPattern::matcher", status)) {
+ return 0;
+ }
+
+ return reinterpret_cast<uintptr_t>(new MatcherState(result));
}
-static jint Matcher_requireEndImpl(JNIEnv* env, jclass, jlong addr) {
- MatcherAccessor matcher(env, addr);
- return matcher->requireEnd();
+static jint Matcher_requireEndImpl(JNIEnv*, jclass, jlong addr) {
+ MatcherState* state = toMatcherState(addr);
+ return state->matcher()->requireEnd();
}
static void Matcher_setInputImpl(JNIEnv* env, jclass, jlong addr, jstring javaText, jint start, jint end) {
- MatcherAccessor matcher(env, addr, javaText, true);
- matcher->region(start, end, matcher.status());
+ MatcherState* state = toMatcherState(addr);
+ if (state->updateInput(env, javaText)) {
+ state->matcher()->region(start, end, state->status());
+ }
}
-static void Matcher_useAnchoringBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
- MatcherAccessor matcher(env, addr);
- matcher->useAnchoringBounds(value);
+static void Matcher_useAnchoringBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
+ MatcherState* state = toMatcherState(addr);
+ state->matcher()->useAnchoringBounds(value);
}
-static void Matcher_useTransparentBoundsImpl(JNIEnv* env, jclass, jlong addr, jboolean value) {
- MatcherAccessor matcher(env, addr);
- matcher->useTransparentBounds(value);
+static void Matcher_useTransparentBoundsImpl(JNIEnv*, jclass, jlong addr, jboolean value) {
+ MatcherState* state = toMatcherState(addr);
+ state->matcher()->useTransparentBounds(value);
}
static jint Matcher_getMatchedGroupIndex0(JNIEnv* env, jclass, jlong patternAddr, jstring javaGroupName) {
@@ -227,13 +262,13 @@
static JNINativeMethod gMethods[] = {
NATIVE_METHOD(Matcher, getMatchedGroupIndex0, "(JLjava/lang/String;)I"),
- NATIVE_METHOD(Matcher, findImpl, "(JLjava/lang/String;I[I)Z"),
- NATIVE_METHOD(Matcher, findNextImpl, "(JLjava/lang/String;[I)Z"),
+ NATIVE_METHOD(Matcher, findImpl, "(JI[I)Z"),
+ NATIVE_METHOD(Matcher, findNextImpl, "(J[I)Z"),
NATIVE_METHOD(Matcher, getNativeFinalizer, "()J"),
NATIVE_METHOD(Matcher, groupCountImpl, "(J)I"),
NATIVE_METHOD(Matcher, hitEndImpl, "(J)Z"),
- NATIVE_METHOD(Matcher, lookingAtImpl, "(JLjava/lang/String;[I)Z"),
- NATIVE_METHOD(Matcher, matchesImpl, "(JLjava/lang/String;[I)Z"),
+ NATIVE_METHOD(Matcher, lookingAtImpl, "(J[I)Z"),
+ NATIVE_METHOD(Matcher, matchesImpl, "(J[I)Z"),
NATIVE_METHOD(Matcher, nativeSize, "()I"),
NATIVE_METHOD(Matcher, openImpl, "(J)J"),
NATIVE_METHOD(Matcher, requireEndImpl, "(J)Z"),
diff --git a/ojluni/src/main/java/java/util/regex/Matcher.java b/ojluni/src/main/java/java/util/regex/Matcher.java
index 3d79ba7..8e5b728 100644
--- a/ojluni/src/main/java/java/util/regex/Matcher.java
+++ b/ojluni/src/main/java/java/util/regex/Matcher.java
@@ -444,7 +444,7 @@
*/
public boolean matches() {
synchronized (this) {
- matchFound = matchesImpl(address, input, matchOffsets);
+ matchFound = matchesImpl(address, matchOffsets);
}
return matchFound;
}
@@ -466,7 +466,7 @@
*/
public boolean find() {
synchronized (this) {
- matchFound = findNextImpl(address, input, matchOffsets);
+ matchFound = findNextImpl(address, matchOffsets);
}
return matchFound;
}
@@ -495,7 +495,7 @@
}
synchronized (this) {
- matchFound = findImpl(address, input, start, matchOffsets);
+ matchFound = findImpl(address, start, matchOffsets);
}
return matchFound;
}
@@ -516,7 +516,7 @@
*/
public boolean lookingAt() {
synchronized (this) {
- matchFound = lookingAtImpl(address, input, matchOffsets);
+ matchFound = lookingAtImpl(address, matchOffsets);
}
return matchFound;
}
@@ -1181,13 +1181,13 @@
}
private static native int getMatchedGroupIndex0(long patternAddr, String name);
- private static native boolean findImpl(long addr, String s, int startIndex, int[] offsets);
- private static native boolean findNextImpl(long addr, String s, int[] offsets);
+ private static native boolean findImpl(long addr, int startIndex, int[] offsets);
+ private static native boolean findNextImpl(long addr, int[] offsets);
private static native long getNativeFinalizer();
private static native int groupCountImpl(long addr);
private static native boolean hitEndImpl(long addr);
- private static native boolean lookingAtImpl(long addr, String s, int[] offsets);
- private static native boolean matchesImpl(long addr, String s, int[] offsets);
+ private static native boolean lookingAtImpl(long addr, int[] offsets);
+ private static native boolean matchesImpl(long addr, int[] offsets);
private static native int nativeSize();
private static native long openImpl(long patternAddr);
private static native boolean requireEndImpl(long addr);