Add inline version of String.indexOf().
This provides an inline-native version of String.indexOf(int) and
String.indexOf(int, int), i.e. the functions that work like strchr().
Has a fairly solid impact on specific benchmarks. Might give a boost to
an app somewhere.
Added some indexOf tests to 020-string.
Added hard-coded field offsets for String. These are verified during
startup. Improves some of our String micro-benchmarks by ~10%.
diff --git a/tests/020-string/expected.txt b/tests/020-string/expected.txt
index 67b7159..081fea3 100644
--- a/tests/020-string/expected.txt
+++ b/tests/020-string/expected.txt
@@ -3,3 +3,5 @@
Compare result is 32
Compare unicode: -65302
Got expected exception
+subStr is 'uick brown fox jumps over the lazy '
+Indexes are: 0:-1:0:43:33:-1:18:13:13:-1:18:18:-1:13:-1:-1:-1
diff --git a/tests/020-string/src/Main.java b/tests/020-string/src/Main.java
index 4a57fc6..bb8ce1f 100644
--- a/tests/020-string/src/Main.java
+++ b/tests/020-string/src/Main.java
@@ -19,6 +19,11 @@
*/
public class Main {
public static void main(String args[]) {
+ basicTest();
+ indexTest();
+ }
+
+ public static void basicTest() {
String baseStr = "*** This is a very nice string!!!";
String testStr;
int i;
@@ -49,4 +54,31 @@
System.out.println("Got expected exception");
}
}
+
+ public static void indexTest() {
+ String baseStr = "The quick brown fox jumps over the lazy dog!";
+ String subStr;
+
+ subStr = baseStr.substring(5, baseStr.length() - 4);
+ System.out.println("subStr is '" + subStr + "'");
+
+ System.out.println("Indexes are: " +
+ baseStr.indexOf('T') + ":" +
+ subStr.indexOf('T') + ":" +
+ subStr.indexOf('u') + ":" +
+ baseStr.indexOf('!') + ":" +
+ subStr.indexOf('y') + ":" +
+ subStr.indexOf('d') + ":" +
+ baseStr.indexOf('x') + ":" +
+ subStr.indexOf('x', 0) + ":" +
+ subStr.indexOf('x', -1) + ":" +
+ subStr.indexOf('x', 200) + ":" +
+ baseStr.indexOf('x', 17) + ":" +
+ baseStr.indexOf('x', 18) + ":" +
+ baseStr.indexOf('x', 19) + ":" +
+ subStr.indexOf('x', 13) + ":" +
+ subStr.indexOf('x', 14) + ":" +
+ subStr.indexOf('&') + ":" +
+ baseStr.indexOf(0x12341234));
+ }
}
diff --git a/vm/DalvikVersion.h b/vm/DalvikVersion.h
index efbb393..dfc95ce 100644
--- a/vm/DalvikVersion.h
+++ b/vm/DalvikVersion.h
@@ -32,6 +32,6 @@
* way classes load changes, e.g. field ordering or vtable layout. Changing
* this guarantees that the optimized form of the DEX file is regenerated.
*/
-#define DALVIK_VM_BUILD 17
+#define DALVIK_VM_BUILD 18
#endif /*_DALVIK_VERSION*/
diff --git a/vm/InlineNative.c b/vm/InlineNative.c
index f829360..fd28708 100644
--- a/vm/InlineNative.c
+++ b/vm/InlineNative.c
@@ -129,14 +129,14 @@
return false;
//LOGI("String.charAt this=0x%08x index=%d\n", arg0, arg1);
- count = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count);
+ count = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT);
if ((s4) arg1 < 0 || (s4) arg1 >= count) {
dvmThrowException("Ljava/lang/StringIndexOutOfBoundsException;", NULL);
return false;
} else {
- offset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset);
+ offset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET);
chars = (ArrayObject*)
- dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE);
pResult->i = ((const u2*) chars->contents)[arg1 + offset];
return true;
@@ -157,17 +157,17 @@
int thisOffset, compOffset, thisCount, compCount;
thisCount =
- dvmGetFieldInt((Object*) thisStrObj, gDvm.offJavaLangString_count);
+ dvmGetFieldInt((Object*) thisStrObj, STRING_FIELDOFF_COUNT);
compCount =
- dvmGetFieldInt((Object*) compStrObj, gDvm.offJavaLangString_count);
+ dvmGetFieldInt((Object*) compStrObj, STRING_FIELDOFF_COUNT);
thisOffset =
- dvmGetFieldInt((Object*) thisStrObj, gDvm.offJavaLangString_offset);
+ dvmGetFieldInt((Object*) thisStrObj, STRING_FIELDOFF_OFFSET);
compOffset =
- dvmGetFieldInt((Object*) compStrObj, gDvm.offJavaLangString_offset);
+ dvmGetFieldInt((Object*) compStrObj, STRING_FIELDOFF_OFFSET);
thisArray = (ArrayObject*)
- dvmGetFieldObject((Object*) thisStrObj, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) thisStrObj, STRING_FIELDOFF_VALUE);
compArray = (ArrayObject*)
- dvmGetFieldObject((Object*) compStrObj, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) compStrObj, STRING_FIELDOFF_VALUE);
thisStr = dvmCreateCstrFromString(thisStrObj);
compStr = dvmCreateCstrFromString(compStrObj);
@@ -221,16 +221,16 @@
const u2* compChars;
int i, minCount, countDiff;
- thisCount = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count);
- compCount = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_count);
+ thisCount = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT);
+ compCount = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_COUNT);
countDiff = thisCount - compCount;
minCount = (countDiff < 0) ? thisCount : compCount;
- thisOffset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset);
- compOffset = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_offset);
+ thisOffset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET);
+ compOffset = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_OFFSET);
thisArray = (ArrayObject*)
- dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE);
compArray = (ArrayObject*)
- dvmGetFieldObject((Object*) arg1, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) arg1, STRING_FIELDOFF_VALUE);
thisChars = ((const u2*) thisArray->contents) + thisOffset;
compChars = ((const u2*) compArray->contents) + compOffset;
@@ -321,19 +321,19 @@
int i;
/* quick length check */
- thisCount = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count);
- compCount = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_count);
+ thisCount = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT);
+ compCount = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_COUNT);
if (thisCount != compCount) {
pResult->i = false;
return true;
}
- thisOffset = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_offset);
- compOffset = dvmGetFieldInt((Object*) arg1, gDvm.offJavaLangString_offset);
+ thisOffset = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_OFFSET);
+ compOffset = dvmGetFieldInt((Object*) arg1, STRING_FIELDOFF_OFFSET);
thisArray = (ArrayObject*)
- dvmGetFieldObject((Object*) arg0, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) arg0, STRING_FIELDOFF_VALUE);
compArray = (ArrayObject*)
- dvmGetFieldObject((Object*) arg1, gDvm.offJavaLangString_value);
+ dvmGetFieldObject((Object*) arg1, STRING_FIELDOFF_VALUE);
thisChars = ((const u2*) thisArray->contents) + thisOffset;
compChars = ((const u2*) compArray->contents) + compOffset;
@@ -383,7 +383,90 @@
if (!dvmValidateObject((Object*) arg0))
return false;
- pResult->i = dvmGetFieldInt((Object*) arg0, gDvm.offJavaLangString_count);
+ pResult->i = dvmGetFieldInt((Object*) arg0, STRING_FIELDOFF_COUNT);
+ return true;
+}
+
+/*
+ * Determine the index of the first character matching "ch". The string
+ * to search is described by "chars", "offset", and "count".
+ *
+ * The "ch" parameter is allowed to be > 0xffff. Our Java-language
+ * implementation does not currently handle this, so neither do we.
+ *
+ * The "start" parameter must be clamped to [0..count].
+ *
+ * Returns -1 if no match is found.
+ */
+static inline int indexOfCommon(Object* strObj, int ch, int start)
+{
+ //if ((ch & 0xffff) != ch) /* 32-bit code point */
+ // return -1;
+
+ /* pull out the basic elements */
+ ArrayObject* charArray =
+ (ArrayObject*) dvmGetFieldObject(strObj, STRING_FIELDOFF_VALUE);
+ const u2* chars = (const u2*) charArray->contents;
+ int offset = dvmGetFieldInt(strObj, STRING_FIELDOFF_OFFSET);
+ int count = dvmGetFieldInt(strObj, STRING_FIELDOFF_COUNT);
+ //LOGI("String.indexOf(0x%08x, 0x%04x, %d) off=%d count=%d\n",
+ // (u4) strObj, ch, start, offset, count);
+
+ /* factor out the offset */
+ chars += offset;
+
+ if (start < 0)
+ start = 0;
+
+#if 0
+ /* 16-bit loop, simple */
+ while (start < count) {
+ if (chars[start] == ch)
+ return start;
+ start++;
+ }
+#else
+ /* 16-bit loop, slightly better on ARM */
+ const u2* ptr = chars + start;
+ const u2* endPtr = chars + count;
+ while (ptr < endPtr) {
+ if (*ptr++ == ch)
+ return (ptr-1) - chars;
+ }
+#endif
+
+ return -1;
+}
+
+/*
+ * public int indexOf(int c)
+ *
+ * Scan forward through the string for a matching character.
+ */
+static bool javaLangString_indexOf_I(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+ JValue* pResult)
+{
+ /* null reference check on "this" */
+ if (!dvmValidateObject((Object*) arg0))
+ return false;
+
+ pResult->i = indexOfCommon((Object*) arg0, arg1, 0);
+ return true;
+}
+
+/*
+ * public int indexOf(int c, int start)
+ *
+ * Scan forward through the string for a matching character.
+ */
+static bool javaLangString_indexOf_II(u4 arg0, u4 arg1, u4 arg2, u4 arg3,
+ JValue* pResult)
+{
+ /* null reference check on "this" */
+ if (!dvmValidateObject((Object*) arg0))
+ return false;
+
+ pResult->i = indexOfCommon((Object*) arg0, arg1, arg2);
return true;
}
@@ -564,6 +647,10 @@
"Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I" },
{ javaLangString_equals,
"Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z" },
+ { javaLangString_indexOf_I,
+ "Ljava/lang/String;", "indexOf", "(I)I" },
+ { javaLangString_indexOf_II,
+ "Ljava/lang/String;", "indexOf", "(II)I" },
{ javaLangString_length,
"Ljava/lang/String;", "length", "()I" },
@@ -587,7 +674,6 @@
"Ljava/lang/Math;", "sin", "(D)D" },
};
-
/*
* Allocate some tables.
*/
@@ -696,3 +782,4 @@
#endif
return (*gDvmInlineOpsTable[opIndex].func)(arg0, arg1, arg2, arg3, pResult);
}
+
diff --git a/vm/UtfString.c b/vm/UtfString.c
index dfb76bc..8e20a0f 100644
--- a/vm/UtfString.c
+++ b/vm/UtfString.c
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/*
* UTF-8 and Unicode string manipulation, plus java/lang/String convenience
* functions.
@@ -69,6 +70,30 @@
return false;
}
+ bool badValue = false;
+ if (gDvm.offJavaLangString_value != STRING_FIELDOFF_VALUE) {
+ LOGE("InlineNative: String.value offset = %d, expected %d\n",
+ gDvm.offJavaLangString_value, STRING_FIELDOFF_VALUE);
+ badValue = true;
+ }
+ if (gDvm.offJavaLangString_count != STRING_FIELDOFF_COUNT) {
+ LOGE("InlineNative: String.count offset = %d, expected %d\n",
+ gDvm.offJavaLangString_count, STRING_FIELDOFF_COUNT);
+ badValue = true;
+ }
+ if (gDvm.offJavaLangString_offset != STRING_FIELDOFF_OFFSET) {
+ LOGE("InlineNative: String.offset offset = %d, expected %d\n",
+ gDvm.offJavaLangString_offset, STRING_FIELDOFF_OFFSET);
+ badValue = true;
+ }
+ if (gDvm.offJavaLangString_hashCode != STRING_FIELDOFF_HASHCODE) {
+ LOGE("InlineNative: String.hashCode offset = %d, expected %d\n",
+ gDvm.offJavaLangString_hashCode, STRING_FIELDOFF_HASHCODE);
+ badValue = true;
+ }
+ if (badValue)
+ return false;
+
gDvm.javaLangStringReady = 1;
return true;
@@ -213,11 +238,11 @@
}
u4 dvmComputeStringHash(StringObject* strObj) {
ArrayObject* chars = (ArrayObject*) dvmGetFieldObject((Object*) strObj,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
int offset, len;
- len = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_count);
- offset = dvmGetFieldInt((Object*) strObj, gDvm.offJavaLangString_offset);
+ len = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_COUNT);
+ offset = dvmGetFieldInt((Object*) strObj, STRING_FIELDOFF_OFFSET);
return dvmComputeUtf16Hash((u2*) chars->contents + offset, len);
}
@@ -285,11 +310,11 @@
dvmConvertUtf8ToUtf16((u2*)chars->contents, utf8Str);
hashCode = dvmComputeUtf16Hash((u2*) chars->contents, utf16Length);
- dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value,
+ dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
(Object*)chars);
dvmReleaseTrackedAllocIFN((Object*) chars, NULL, allocFlags);
- dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, utf16Length);
- dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode);
+ dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, utf16Length);
+ dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
/* leave offset set to zero */
/* debugging stuff */
@@ -339,11 +364,11 @@
memcpy(chars->contents, unichars, len * sizeof(u2));
hashCode = dvmComputeUtf16Hash((u2*) chars->contents, len);
- dvmSetFieldObject((Object*)newObj, gDvm.offJavaLangString_value,
+ dvmSetFieldObject((Object*)newObj, STRING_FIELDOFF_VALUE,
(Object*)chars);
dvmReleaseTrackedAlloc((Object*) chars, NULL);
- dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_count, len);
- dvmSetFieldInt((Object*)newObj, gDvm.offJavaLangString_hashCode, hashCode);
+ dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_COUNT, len);
+ dvmSetFieldInt((Object*)newObj, STRING_FIELDOFF_HASHCODE, hashCode);
/* leave offset set to zero */
/* debugging stuff */
@@ -371,10 +396,10 @@
if (jstr == NULL)
return NULL;
- len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count);
- offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset);
+ len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
+ offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
data = (const u2*) chars->contents + offset;
assert(offset + len <= (int) chars->length);
@@ -416,10 +441,10 @@
if (jstr == NULL)
return 0; // should we throw something? assert?
- len = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count);
- offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset);
+ len = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
+ offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
data = (const u2*) chars->contents + offset;
assert(offset + len <= (int) chars->length);
@@ -431,7 +456,7 @@
*/
int dvmStringLen(StringObject* jstr)
{
- return dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_count);
+ return dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_COUNT);
}
/*
@@ -440,7 +465,7 @@
ArrayObject* dvmStringCharArray(StringObject* jstr)
{
return (ArrayObject*) dvmGetFieldObject((Object*) jstr,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
}
/*
@@ -451,9 +476,9 @@
ArrayObject* chars;
int offset;
- offset = dvmGetFieldInt((Object*) jstr, gDvm.offJavaLangString_offset);
+ offset = dvmGetFieldInt((Object*) jstr, STRING_FIELDOFF_OFFSET);
chars = (ArrayObject*) dvmGetFieldObject((Object*) jstr,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
return (const u2*) chars->contents + offset;
}
@@ -476,17 +501,17 @@
assert(gDvm.javaLangStringReady > 0);
/* get offset and length into char array; all values are in 16-bit units */
- len1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_count);
- offset1 = dvmGetFieldInt((Object*) strObj1, gDvm.offJavaLangString_offset);
- len2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_count);
- offset2 = dvmGetFieldInt((Object*) strObj2, gDvm.offJavaLangString_offset);
+ len1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_COUNT);
+ offset1 = dvmGetFieldInt((Object*) strObj1, STRING_FIELDOFF_OFFSET);
+ len2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_COUNT);
+ offset2 = dvmGetFieldInt((Object*) strObj2, STRING_FIELDOFF_OFFSET);
if (len1 != len2)
return len1 - len2;
chars1 = (ArrayObject*) dvmGetFieldObject((Object*) strObj1,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
chars2 = (ArrayObject*) dvmGetFieldObject((Object*) strObj2,
- gDvm.offJavaLangString_value);
+ STRING_FIELDOFF_VALUE);
/* damage here actually indicates a broken java/lang/String */
assert(offset1 + len1 <= (int) chars1->length);
diff --git a/vm/UtfString.h b/vm/UtfString.h
index ca500a7..8f0f972 100644
--- a/vm/UtfString.h
+++ b/vm/UtfString.h
@@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
/*
* UTF-8 and Unicode string manipulation functions, plus convenience
* functions for working with java/lang/String.
@@ -21,6 +22,30 @@
#define _DALVIK_STRING
/*
+ * (This is private to UtfString.c, but we cheat a bit and also use it
+ * for InlineNative.c. Not really worth creating a separate header.)
+ *
+ * We can avoid poking around in gDvm by hard-coding the expected values of
+ * the String field offsets. This will be annoying if String is in flux
+ * or the VM field layout is changing, so we use defines here to make it
+ * easy to switch back to the gDvm version.
+ *
+ * The values are checked for correctness during startup.
+ */
+//#define USE_GLOBAL_STRING_DEFS
+#ifdef USE_GLOBAL_STRING_DEFS
+# define STRING_FIELDOFF_VALUE gDvm.offJavaLangString_value
+# define STRING_FIELDOFF_OFFSET gDvm.offJavaLangString_offset
+# define STRING_FIELDOFF_COUNT gDvm.offJavaLangString_count
+# define STRING_FIELDOFF_HASHCODE gDvm.offJavaLangString_hashCode
+#else
+# define STRING_FIELDOFF_VALUE 8
+# define STRING_FIELDOFF_HASHCODE 12
+# define STRING_FIELDOFF_OFFSET 16
+# define STRING_FIELDOFF_COUNT 20
+#endif
+
+/*
* Hash function for modified UTF-8 strings.
*/
u4 dvmComputeUtf8Hash(const char* str);