| /* |
| * Copyright (C) 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.streamhtmlparser.util; |
| |
| import com.google.common.base.Preconditions; |
| |
| import java.util.Arrays; |
| |
| /** |
| * Implements a circular (ring) buffer of characters with specialized |
| * application logic in order to determine the context of some |
| * Javascript content that is being parsed. |
| * |
| * This is a specialized class - of no use to external code - |
| * which aims to be 100% compatible with the corresponding logic |
| * in the C-version of the HtmlParser, specifically |
| * <code>jsparser.c</code>. In particular: |
| * <ul> |
| * <li> The API is odd, using negative indexes to access content in |
| * the buffer. Changing the API would mean changing the test |
| * cases and have more difficulty determining whether we are |
| * remaining compatible with the C-version. It is left as an |
| * exercise for once the code is very stable and proven. |
| * <li> Repeated whitespace is folded into just one character to |
| * use the space available efficiently. |
| * <li> The buffer size is fixed. There is currently no need to |
| * make it variable so we avoid the need for constructors. |
| * </ul> |
| */ |
| public class JavascriptTokenBuffer { |
| |
| /** |
| * Size of the ring buffer used to lookup the last token in the javascript |
| * stream. The size is somewhat arbitrary but must be larger than |
| * the biggest token we want to lookup plus three: Two delimiters plus |
| * an empty ring buffer slot. |
| */ |
| private static final int BUFFER_SIZE = 18; |
| |
| /** Storage implementing the circular buffer. */ |
| private final char[] buffer; |
| |
| /** Index of the first item in our circular buffer. */ |
| private int startIndex; |
| |
| /** Index of the last item in our circular buffer. */ |
| private int endIndex; |
| |
| /** |
| * Constructs an empty javascript token buffer. The size is fixed, |
| * see {@link #BUFFER_SIZE}. |
| */ |
| public JavascriptTokenBuffer() { |
| buffer = new char[BUFFER_SIZE]; |
| startIndex = 0; |
| endIndex = 0; |
| } |
| |
| /** |
| * Constructs a javascript token buffer that is identical to |
| * the one given. In particular, it has the same size and contents. |
| * |
| * @param aJavascriptTokenBuffer the {@code JavascriptTokenBuffer} to copy |
| */ |
| public JavascriptTokenBuffer(JavascriptTokenBuffer aJavascriptTokenBuffer) { |
| buffer = Arrays.copyOf(aJavascriptTokenBuffer.buffer, |
| aJavascriptTokenBuffer.buffer.length); |
| startIndex = aJavascriptTokenBuffer.startIndex; |
| endIndex = aJavascriptTokenBuffer.endIndex; |
| } |
| |
| /** |
| * A simple wrapper over <code>appendChar</code>, it appends a string |
| * to the buffer. Sequences of whitespace and newlines |
| * are folded into one character to save space. Null strings are |
| * not allowed. |
| * |
| * @param input the {@code String} to append, cannot be {@code null} |
| */ |
| // TODO: Move to testing since not used in code. |
| public void appendString(String input) { |
| if (input == null) { |
| throw new NullPointerException("input == null is not allowed"); |
| } |
| for (int i = 0; i < input.length(); i++) { |
| appendChar(input.charAt(i)); |
| } |
| } |
| |
| /** |
| * Appends a character to the buffer. We fold sequences of whitespace and |
| * newlines into one to save space. |
| * |
| * @param input the {@code char} to append |
| */ |
| public void appendChar(char input) { |
| if (HtmlUtils.isJavascriptWhitespace(input) && |
| HtmlUtils.isJavascriptWhitespace(getChar(-1))) { |
| return; |
| } |
| buffer[endIndex] = input; |
| endIndex = (endIndex + 1) % buffer.length; |
| if (endIndex == startIndex) { |
| startIndex = (endIndex + 1) % buffer.length; |
| } |
| } |
| |
| /** |
| * Returns the last character in the buffer and removes it from the buffer |
| * or the NUL character '\0' if the buffer is empty. |
| * |
| * @return last character in the buffer or '\0' if the buffer is empty |
| */ |
| public char popChar() { |
| if (startIndex == endIndex) { |
| return '\0'; |
| } |
| endIndex--; |
| if (endIndex < 0) { |
| endIndex += buffer.length; |
| } |
| return buffer[endIndex]; |
| } |
| |
| /** |
| * Returns the character at a given index in the buffer or nul ('\0') |
| * if the index is outside the range of the buffer. Such could happen |
| * if the buffer is not filled enough or the index is larger than the |
| * size of the buffer. |
| * |
| * <p>Position must be negative where -1 is the index of the last |
| * character in the buffer. |
| * |
| * @param position The index into the buffer |
| * |
| * @return character at the requested index |
| */ |
| public char getChar(int position) { |
| assert(position < 0); // Developer error if it triggers. |
| |
| int absolutePosition = getAbsolutePosition(position); |
| if (absolutePosition < 0) { |
| return '\0'; |
| } |
| |
| return buffer[absolutePosition]; |
| } |
| |
| /** |
| * Sets the given {@code input} at the given {@code position} of the buffer. |
| * Returns {@code true} if we succeeded or {@code false} if we |
| * failed (i.e. the write was beyond the buffer boundary). |
| * |
| * <p>Index positions are negative where -1 is the index of the |
| * last character in the buffer. |
| * |
| * @param position The index at which to set the character |
| * @param input The character to set in the buffer |
| * @return {@code true} if we succeeded, {@code false} otherwise |
| */ |
| public boolean setChar(int position, char input) { |
| assert(position < 0); // Developer error if it triggers. |
| |
| int absolutePosition = getAbsolutePosition(position); |
| if (absolutePosition < 0) { |
| return false; |
| } |
| |
| buffer[absolutePosition] = input; |
| return true; |
| } |
| |
| |
| /** |
| * Returns the last javascript identifier/keyword in the buffer. |
| * |
| * @return the last identifier or {@code null} if none was found |
| */ |
| public String getLastIdentifier() { |
| int end = -1; |
| |
| if (HtmlUtils.isJavascriptWhitespace(getChar(-1))) { |
| end--; |
| } |
| int position; |
| for (position = end; HtmlUtils.isJavascriptIdentifier(getChar(position)); |
| position--) { |
| } |
| if ((position + 1) >= end) { |
| return null; |
| } |
| return slice(position + 1, end); |
| } |
| |
| /** |
| * Returns a slice of the buffer delimited by the given indices. |
| * |
| * The start and end indexes represent the start and end of the |
| * slice to copy. If the start argument extends beyond the beginning |
| * of the buffer, the slice will only contain characters |
| * starting from the beginning of the buffer. |
| * |
| * @param start The index of the first character the copy |
| * @param end the index of the last character to copy |
| * |
| * @return {@code String} between the given indices |
| */ |
| public String slice(int start, int end) { |
| // Developer error if any of the asserts below fail. |
| Preconditions.checkArgument(start <= end); |
| Preconditions.checkArgument(start < 0); |
| Preconditions.checkArgument(end < 0); |
| |
| StringBuffer output = new StringBuffer(); |
| for (int position = start; position <= end; position++) { |
| char c = getChar(position); |
| if (c != '\0') { |
| output.append(c); |
| } |
| } |
| return new String(output); |
| } |
| |
| /** |
| * Returns the position relative to the start of the buffer or -1 |
| * if the position is past the size of the buffer. |
| * |
| * @param position the index to be translated |
| * @return the position relative to the start of the buffer |
| */ |
| private int getAbsolutePosition(int position) { |
| assert (position < 0); // Developer error if it triggers. |
| if (position <= -buffer.length) { |
| return -1; |
| } |
| int len = endIndex - startIndex; |
| if (len < 0) { |
| len += buffer.length; |
| } |
| if (position < -len) { |
| return -1; |
| } |
| int absolutePosition = (position + endIndex) % buffer.length; |
| if (absolutePosition < 0) { |
| absolutePosition += buffer.length; |
| } |
| return absolutePosition; |
| } |
| } |