blob: 5d4934d7f6d6a4e851fd1807ec00c385acdd82c6 [file] [log] [blame]
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package xmlkit; // -*- mode: java; indent-tabs-mode: nil -*-
import java.util.*;
/**
* A List of Strings each representing a word or token.
* This object itself is a CharSequence whose characters consist
* of all the tokens, separated by blanks.
*
* @author jrose
*/
public class TokenList extends ArrayList<String> implements CharSequence {
protected String separator;
protected boolean frozen;
public TokenList() {
this.separator = " ";
}
public TokenList(Collection<? extends Object> tokens) {
super(tokens.size());
this.separator = " ";
addTokens(tokens);
}
public TokenList(Collection<? extends Object> tokens, String separator) {
super(tokens.size());
this.separator = separator;
addTokens(tokens);
}
public TokenList(Object[] tokens) {
super(tokens.length);
this.separator = " ";
addTokens(tokens, 0, tokens.length);
}
public TokenList(Object[] tokens, int beg, int end) {
super(end - beg); // capacity
this.separator = " ";
addTokens(tokens, beg, end);
}
public TokenList(Object[] tokens, int beg, int end, String separator) {
super(end - beg); // capacity
this.separator = separator;
addTokens(tokens, beg, end);
}
public TokenList(String tokenStr) {
this(tokenStr, " ", false);
}
public TokenList(String tokenStr, String separator) {
this(tokenStr, separator, true);
}
public TokenList(String tokenStr, String separator, boolean allowNulls) {
super(tokenStr.length() / 5);
this.separator = separator;
addTokens(tokenStr, allowNulls);
}
static public final TokenList EMPTY;
static {
TokenList tl = new TokenList(new Object[0]);
tl.freeze();
EMPTY = tl;
}
public void freeze() {
if (!frozen) {
for (ListIterator<String> i = listIterator(); i.hasNext();) {
i.set(i.next().toString());
}
trimToSize();
frozen = true;
}
}
public boolean isFrozen() {
return frozen;
}
void checkNotFrozen() {
if (isFrozen()) {
throw new UnsupportedOperationException("cannot modify frozen TokenList");
}
}
public String getSeparator() {
return separator;
}
public void setSeparator(String separator) {
checkNotFrozen();
this.separator = separator;
}
/// All normal List mutators must check the frozen bit:
public String set(int index, String o) {
checkNotFrozen();
return super.set(index, o);
}
public boolean add(String o) {
checkNotFrozen();
return super.add(o);
}
public void add(int index, String o) {
checkNotFrozen();
super.add(index, o);
}
public boolean addAll(Collection<? extends String> c) {
checkNotFrozen();
return super.addAll(c);
}
public boolean addAll(int index, Collection<? extends String> c) {
checkNotFrozen();
return super.addAll(index, c);
}
public boolean remove(Object o) {
checkNotFrozen();
return super.remove(o);
}
public String remove(int index) {
checkNotFrozen();
return super.remove(index);
}
public void clear() {
checkNotFrozen();
super.clear();
}
/** Add a collection of tokens to the list, applying toString to each. */
public boolean addTokens(Collection<? extends Object> tokens) {
// Note that if this sequence is empty, no tokens are added.
// This is different from adding a null string, which is
// a single token.
boolean added = false;
for (Object token : tokens) {
add(token.toString());
added = true;
}
return added;
}
public boolean addTokens(Object[] tokens, int beg, int end) {
boolean added = false;
for (int i = beg; i < end; i++) {
add(tokens[i].toString());
added = true;
}
return added;
}
public boolean addTokens(String tokenStr) {
return addTokens(tokenStr, false);
}
public boolean addTokens(String tokenStr, boolean allowNulls) {
boolean added = false;
int pos = 0, limit = tokenStr.length(), sep = limit;
while (pos < limit) {
sep = tokenStr.indexOf(separator, pos);
if (sep < 0) {
sep = limit;
}
if (sep == pos) {
if (allowNulls) {
add("");
added = true;
}
pos += separator.length();
} else {
add(tokenStr.substring(pos, sep));
added = true;
pos = sep + separator.length();
}
}
if (allowNulls && sep < limit) {
// Input was something like "tok1 tok2 ".
add("");
added = true;
}
return added;
}
public boolean addToken(Object token) {
return add(token.toString());
}
/** Format the token string, using quotes and escapes.
* Quotes must contain an odd number of 3 or more elements,
* a sequence of begin/end quote pairs, plus a superquote.
* For each token, the first begin/end pair is used for
* which the end quote does not occur in the token.
* If the token contains all end quotes, the last pair
* is used, with all occurrences of the end quote replaced
* by the superquote. If an end quote is the empty string,
* the separator is used instead.
*/
public String format(String separator, String[] quotes) {
return ""; //@@
}
protected int[] lengths;
protected static final int MODC = 0, HINT = 1, BEG0 = 2, END0 = 3;
// Layout of lengths:
// { modCount, hint, -1==beg[0], end[0]==beg[1], ..., length }
// Note that each beg[i]..end[i] span includes a leading separator,
// which is not part of the corresponding token.
protected final CharSequence getCS(int i) {
return (CharSequence) get(i);
}
// Produce (and cache) an table of indexes for each token.
protected int[] getLengths() {
int[] lengths = this.lengths;
;
int sepLength = separator.length();
if (lengths == null || lengths[MODC] != modCount) {
int size = this.size();
lengths = new int[END0 + size + (size == 0 ? 1 : 0)];
lengths[MODC] = modCount;
int end = -sepLength; // cancels leading separator
lengths[BEG0] = end;
for (int i = 0; i < size; i++) {
end += sepLength; // count leading separator
end += getCS(i).length();
lengths[END0 + i] = end;
}
this.lengths = lengths;
}
return lengths;
}
public int length() {
int[] lengths = getLengths();
return lengths[lengths.length - 1];
}
// Which token does the given index belong to?
protected int which(int i) {
if (i < 0) {
return -1;
}
int[] lengths = getLengths();
for (int hint = lengths[HINT];; hint = 0) {
for (int wh = hint; wh < lengths.length - END0; wh++) {
int beg = lengths[BEG0 + wh];
int end = lengths[END0 + wh];
if (i >= beg && i < end) {
lengths[HINT] = wh;
return wh;
}
}
if (hint == 0) {
return size(); // end of the line
}
}
}
public char charAt(int i) {
if (i < 0) {
return "".charAt(i);
}
int wh = which(i);
int beg = lengths[BEG0 + wh];
int j = i - beg;
int sepLength = separator.length();
if (j < sepLength) {
return separator.charAt(j);
}
return getCS(wh).charAt(j - sepLength);
}
public CharSequence subSequence(int beg, int end) {
//System.out.println("i: "+beg+".."+end);
if (beg == end) {
return "";
}
if (beg < 0) {
charAt(beg); // raise exception
}
if (beg > end) {
charAt(-1); // raise exception
}
int begWh = which(beg);
int endWh = which(end);
if (endWh == size() || end == lengths[BEG0 + endWh]) {
--endWh;
}
//System.out.println("wh: "+begWh+".."+endWh);
int begBase = lengths[BEG0 + begWh];
int endBase = lengths[BEG0 + endWh];
int sepLength = separator.length();
int begFrag = 0;
if ((beg - begBase) < sepLength) {
begFrag = sepLength - (beg - begBase);
beg += begFrag;
}
int endFrag = 0;
if ((end - endBase) < sepLength) {
endFrag = (end - endBase);
end = endBase;
endBase = lengths[BEG0 + --endWh];
}
if (false) {
System.out.print("beg[wbf]end[wbf]");
int pr[] = {begWh, begBase, begFrag, beg, endWh, endBase, endFrag, end};
for (int k = 0; k < pr.length; k++) {
System.out.print((k == 4 ? " " : " ") + (pr[k]));
}
System.out.println();
}
if (begFrag > 0 && (end + endFrag) - begBase <= sepLength) {
// Special case: Slice the separator.
beg -= begFrag;
end += endFrag;
return separator.substring(beg - begBase, end - begBase);
}
if (begWh == endWh && (begFrag + endFrag) == 0) {
// Special case: Slice a single token.
return getCS(begWh).subSequence(beg - begBase - sepLength,
end - endBase - sepLength);
}
Object[] subTokens = new Object[1 + (endWh - begWh) + 1];
int fillp = 0;
if (begFrag == sepLength) {
// Insert a leading null token to force an initial separator.
subTokens[fillp++] = "";
begFrag = 0;
}
for (int wh = begWh; wh <= endWh; wh++) {
CharSequence cs = getCS(wh);
if (wh == begWh || wh == endWh) {
// Slice it.
int csBeg = (wh == begWh) ? (beg - begBase) - sepLength : 0;
int csEnd = (wh == endWh) ? (end - endBase) - sepLength : cs.length();
cs = cs.subSequence(csBeg, csEnd);
if (begFrag > 0 && wh == begWh) {
cs = separator.substring(sepLength - begFrag) + cs;
}
if (endFrag > 0 && wh == endWh) {
cs = cs.toString() + separator.substring(0, endFrag);
}
}
subTokens[fillp++] = cs;
}
return new TokenList(subTokens, 0, fillp, separator);
}
/** Returns the concatenation of all tokens,
* with intervening separator characters.
*/
public String toString() {
StringBuilder buf = new StringBuilder(length());
int size = this.size();
for (int i = 0; i < size; i++) {
if (i > 0) {
buf.append(separator);
}
buf.append(get(i));
}
return buf.toString();
}
/*---- TESTING CODE ----
public static void main(String[] av) {
if (av.length == 0) av = new String[]{"one", "2", "", "four"};
TokenList ts = new TokenList();
final String SEP = ", ";
ts.setSeparator(SEP);
for (int i = -1; i < av.length; i++) {
if (i >= 0) ts.addToken(av[i]);
{
TokenList tsCopy = new TokenList(ts.toString(), SEP);
if (!tsCopy.equals(ts)) {
tsCopy.setSeparator(")(");
System.out.println("!= ("+tsCopy+")");
}
}
{
TokenList tsBar = new TokenList(ts, "|");
tsBar.add(0, "[");
tsBar.add("]");
System.out.println(tsBar);
}
if (false) {
int[] ls = ts.getLengths();
System.out.println("ts: "+ts);
System.out.print("ls: {");
for (int j = 0; j < ls.length; j++) System.out.print(" "+ls[j]);
System.out.println(" }");
}
assert0(ts.size() == i+1);
assert0(i < 0 || ts.get(i) == av[i]);
String tss = ts.toString();
int tslen = tss.length();
assert0(ts.length() == tss.length());
for (int n = 0; n < tslen; n++) {
assert0(ts.charAt(n) == tss.charAt(n));
}
for (int j = 0; j < tslen; j++) {
for (int k = tslen; k >= j; k--) {
CharSequence sub = ts.subSequence(j, k);
//System.out.println("|"+sub+"|");
assert0(sub.toString().equals(tss.substring(j, k)));
}
}
}
}
static void assert0(boolean z) {
if (!z) throw new RuntimeException("assert failed");
}
// ---- TESTING CODE ----*/
}