blob: 1fa1718b257906b99e3b3d686e82f7fcf3db2762 [file] [log] [blame]
/*
* Copyright (C) 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.streamhtmlparser.util;
import com.google.common.base.Preconditions;
import java.util.Arrays;
/**
* Implements a circular (ring) buffer of characters with specialized
* application logic in order to determine the context of some
* Javascript content that is being parsed.
*
* This is a specialized class - of no use to external code -
* which aims to be 100% compatible with the corresponding logic
* in the C-version of the HtmlParser, specifically
* <code>jsparser.c</code>. In particular:
* <ul>
* <li> The API is odd, using negative indexes to access content in
* the buffer. Changing the API would mean changing the test
* cases and have more difficulty determining whether we are
* remaining compatible with the C-version. It is left as an
* exercise for once the code is very stable and proven.
* <li> Repeated whitespace is folded into just one character to
* use the space available efficiently.
* <li> The buffer size is fixed. There is currently no need to
* make it variable so we avoid the need for constructors.
* </ul>
*/
public class JavascriptTokenBuffer {
/**
* Size of the ring buffer used to lookup the last token in the javascript
* stream. The size is somewhat arbitrary but must be larger than
* the biggest token we want to lookup plus three: Two delimiters plus
* an empty ring buffer slot.
*/
private static final int BUFFER_SIZE = 18;
/** Storage implementing the circular buffer. */
private final char[] buffer;
/** Index of the first item in our circular buffer. */
private int startIndex;
/** Index of the last item in our circular buffer. */
private int endIndex;
/**
* Constructs an empty javascript token buffer. The size is fixed,
* see {@link #BUFFER_SIZE}.
*/
public JavascriptTokenBuffer() {
buffer = new char[BUFFER_SIZE];
startIndex = 0;
endIndex = 0;
}
/**
* Constructs a javascript token buffer that is identical to
* the one given. In particular, it has the same size and contents.
*
* @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy
*/
public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) {
buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer,
aJavascriptTokenBuffer.buffer.length);
startIndex = aJavascriptTokenBuffer.startIndex;
endIndex = aJavascriptTokenBuffer.endIndex;
}
/**
* A simple wrapper over <code>appendChar</code>, it appends a string
* to the buffer. Sequences of whitespace and newlines
* are folded into one character to save space. Null strings are
* not allowed.
*
* @param input the {@code String} to append, cannot be {@code null}
*/
// TODO: Move to testing since not used in code.
public void appendString(String input) {
if (input == null) {
throw new NullPointerException("input == null is not allowed");
}
for (int i = 0; i < input.length(); i++) {
appendChar(input.charAt(i));
}
}
/**
* Appends a character to the buffer. We fold sequences of whitespace and
* newlines into one to save space.
*
* @param input the {@code char} to append
*/
public void appendChar(char input) {
if (HtmlUtils.isJavascriptWhitespace(input) &&
HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
return;
}
buffer[endIndex] = input;
endIndex = (endIndex + 1) % buffer.length;
if (endIndex == startIndex) {
startIndex = (endIndex + 1) % buffer.length;
}
}
/**
* Returns the last character in the buffer and removes it from the buffer
* or the NUL character '\0' if the buffer is empty.
*
* @return last character in the buffer or '\0' if the buffer is empty
*/
public char popChar() {
if (startIndex == endIndex) {
return '\0';
}
endIndex--;
if (endIndex < 0) {
endIndex += buffer.length;
}
return buffer[endIndex];
}
/**
* Returns the character at a given index in the buffer or nul ('\0')
* if the index is outside the range of the buffer. Such could happen
* if the buffer is not filled enough or the index is larger than the
* size of the buffer.
*
* <p>Position must be negative where -1 is the index of the last
* character in the buffer.
*
* @param position The index into the buffer
*
* @return character at the requested index
*/
public char getChar(int position) {
assert(position < 0); // Developer error if it triggers.
int absolutePosition = getAbsolutePosition(position);
if (absolutePosition < 0) {
return '\0';
}
return buffer[absolutePosition];
}
/**
* Sets the given {@code input} at the given {@code position} of the buffer.
* Returns {@code true} if we succeeded or {@code false} if we
* failed (i.e. the write was beyond the buffer boundary).
*
* <p>Index positions are negative where -1 is the index of the
* last character in the buffer.
*
* @param position The index at which to set the character
* @param input The character to set in the buffer
* @return {@code true} if we succeeded, {@code false} otherwise
*/
public boolean setChar(int position, char input) {
assert(position < 0); // Developer error if it triggers.
int absolutePosition = getAbsolutePosition(position);
if (absolutePosition < 0) {
return false;
}
buffer[absolutePosition] = input;
return true;
}
/**
* Returns the last javascript identifier/keyword in the buffer.
*
* @return the last identifier or {@code null} if none was found
*/
public String getLastIdentifier() {
int end = -1;
if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) {
end--;
}
int position;
for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position));
position--) {
}
if ((position + 1) >= end) {
return null;
}
return slice(position + 1, end);
}
/**
* Returns a slice of the buffer delimited by the given indices.
*
* The start and end indexes represent the start and end of the
* slice to copy. If the start argument extends beyond the beginning
* of the buffer, the slice will only contain characters
* starting from the beginning of the buffer.
*
* @param start The index of the first character the copy
* @param end the index of the last character to copy
*
* @return {@code String} between the given indices
*/
public String slice(int start, int end) {
// Developer error if any of the asserts below fail.
Preconditions.checkArgument(start <= end);
Preconditions.checkArgument(start < 0);
Preconditions.checkArgument(end < 0);
StringBuffer output = new StringBuffer();
for (int position = start; position <= end; position++) {
char c = getChar(position);
if (c != '\0') {
output.append(c);
}
}
return new String(output);
}
/**
* Returns the position relative to the start of the buffer or -1
* if the position is past the size of the buffer.
*
* @param position the index to be translated
* @return the position relative to the start of the buffer
*/
private int getAbsolutePosition(int position) {
assert (position < 0); // Developer error if it triggers.
if (position <= -buffer.length) {
return -1;
}
int len = endIndex - startIndex;
if (len < 0) {
len += buffer.length;
}
if (position < -len) {
return -1;
}
int absolutePosition = (position + endIndex) % buffer.length;
if (absolutePosition < 0) {
absolutePosition += buffer.length;
}
return absolutePosition;
}
}