blob: a4cb52c7e54e0c7b9d2d98c15ed33962b7ddf191 [file] [log] [blame]
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package java.util.regex;
import java.util.ArrayList;
import java.util.List;
import libcore.util.EmptyArray;
/**
* Used to make {@code String.split} fast (and to help {@code Pattern.split} too).
* @hide
*/
public class Splitter {
// The RI allows regular expressions beginning with ] or }, but that's probably a bug.
private static final String METACHARACTERS = "\\?*+[](){}^$.|";
private Splitter() {
}
/**
* Returns a result equivalent to {@code s.split(separator, limit)} if it's able
* to compute it more cheaply than ICU, or null if the caller should fall back to
* using ICU.
*/
public static String[] fastSplit(String re, String input, int limit) {
// Can we do it cheaply?
int len = re.length();
if (len == 0) {
return null;
}
char ch = re.charAt(0);
if (len == 1 && METACHARACTERS.indexOf(ch) == -1) {
// We're looking for a single non-metacharacter. Easy.
} else if (len == 2 && ch == '\\') {
// We're looking for a quoted character.
// Quoted metacharacters are effectively single non-metacharacters.
ch = re.charAt(1);
if (METACHARACTERS.indexOf(ch) == -1) {
return null;
}
} else {
return null;
}
// We can do this cheaply...
// Unlike Perl, which considers the result of splitting the empty string to be the empty
// array, Java returns an array containing the empty string.
if (input.isEmpty()) {
return new String[] { "" };
}
// Count separators
int separatorCount = 0;
int begin = 0;
int end;
while (separatorCount + 1 != limit && (end = input.indexOf(ch, begin)) != -1) {
++separatorCount;
begin = end + 1;
}
int lastPartEnd = input.length();
if (limit == 0 && begin == lastPartEnd) {
// Last part is empty for limit == 0, remove all trailing empty matches.
if (separatorCount == lastPartEnd) {
// Input contains only separators.
return EmptyArray.STRING;
}
// Find the beginning of trailing separators.
do {
--begin;
} while (input.charAt(begin - 1) == ch);
// Reduce separatorCount and fix lastPartEnd.
separatorCount -= input.length() - begin;
lastPartEnd = begin;
}
// Collect the result parts.
String[] result = new String[separatorCount + 1];
begin = 0;
for (int i = 0; i != separatorCount; ++i) {
end = input.indexOf(ch, begin);
result[i] = input.substring(begin, end);
begin = end + 1;
}
// Add last part.
result[separatorCount] = input.substring(begin, lastPartEnd);
return result;
}
public static String[] split(Pattern pattern, String re, String input, int limit) {
String[] fastResult = fastSplit(re, input, limit);
if (fastResult != null) {
return fastResult;
}
// Unlike Perl, which considers the result of splitting the empty string to be the empty
// array, Java returns an array containing the empty string.
if (input.isEmpty()) {
return new String[] { "" };
}
// Collect text preceding each occurrence of the separator, while there's enough space.
ArrayList<String> list = new ArrayList<String>();
Matcher matcher = new Matcher(pattern, input);
int begin = 0;
while (list.size() + 1 != limit && matcher.find()) {
list.add(input.substring(begin, matcher.start()));
begin = matcher.end();
}
return finishSplit(list, input, begin, limit);
}
private static String[] finishSplit(List<String> list, String input, int begin, int limit) {
// Add trailing text.
if (begin < input.length()) {
list.add(input.substring(begin));
} else if (limit != 0) {
list.add("");
} else {
// Remove all trailing empty matches in the limit == 0 case.
int i = list.size() - 1;
while (i >= 0 && list.get(i).isEmpty()) {
list.remove(i);
i--;
}
}
// Convert to an array.
return list.toArray(new String[list.size()]);
}
}