blob: a2479f17f4e7d6296d4b0f062bbbaac48a04e855 [file] [log] [blame]
/*
Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public License
along with this library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#ifndef SegmentedString_h
#define SegmentedString_h
#include "platform/PlatformExport.h"
#include "wtf/Deque.h"
#include "wtf/text/StringBuilder.h"
#include "wtf/text/TextPosition.h"
#include "wtf/text/WTFString.h"
namespace blink {
class SegmentedString;
class PLATFORM_EXPORT SegmentedSubstring {
public:
SegmentedSubstring()
: m_length(0)
, m_doNotExcludeLineNumbers(true)
, m_is8Bit(false)
{
m_data.string16Ptr = 0;
}
SegmentedSubstring(const String& str)
: m_length(str.length())
, m_doNotExcludeLineNumbers(true)
, m_string(str)
{
if (m_length) {
if (m_string.is8Bit()) {
m_is8Bit = true;
m_data.string8Ptr = m_string.characters8();
} else {
m_is8Bit = false;
m_data.string16Ptr = m_string.characters16();
}
} else {
m_is8Bit = false;
}
}
void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
bool is8Bit() { return m_is8Bit; }
bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
void appendTo(StringBuilder& builder) const
{
int offset = m_string.length() - m_length;
if (!offset) {
if (m_length)
builder.append(m_string);
} else {
builder.append(m_string.substring(offset, m_length));
}
}
UChar getCurrentChar8()
{
return *m_data.string8Ptr;
}
UChar getCurrentChar16()
{
return m_data.string16Ptr ? *m_data.string16Ptr : 0;
}
UChar incrementAndGetCurrentChar8()
{
ASSERT(m_data.string8Ptr);
return *++m_data.string8Ptr;
}
UChar incrementAndGetCurrentChar16()
{
ASSERT(m_data.string16Ptr);
return *++m_data.string16Ptr;
}
String currentSubString(unsigned length)
{
int offset = m_string.length() - m_length;
return m_string.substring(offset, length);
}
ALWAYS_INLINE UChar getCurrentChar()
{
ASSERT(m_length);
if (is8Bit())
return getCurrentChar8();
return getCurrentChar16();
}
ALWAYS_INLINE UChar incrementAndGetCurrentChar()
{
ASSERT(m_length);
if (is8Bit())
return incrementAndGetCurrentChar8();
return incrementAndGetCurrentChar16();
}
public:
union {
const LChar* string8Ptr;
const UChar* string16Ptr;
} m_data;
int m_length;
private:
bool m_doNotExcludeLineNumbers;
bool m_is8Bit;
String m_string;
};
class PLATFORM_EXPORT SegmentedString {
public:
SegmentedString()
: m_pushedChar1(0)
, m_pushedChar2(0)
, m_currentChar(0)
, m_numberOfCharactersConsumedPriorToCurrentString(0)
, m_numberOfCharactersConsumedPriorToCurrentLine(0)
, m_currentLine(0)
, m_closed(false)
, m_empty(true)
, m_fastPathFlags(NoFastPath)
, m_advanceFunc(&SegmentedString::advanceEmpty)
, m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
{
}
SegmentedString(const String& str)
: m_pushedChar1(0)
, m_pushedChar2(0)
, m_currentString(str)
, m_currentChar(0)
, m_numberOfCharactersConsumedPriorToCurrentString(0)
, m_numberOfCharactersConsumedPriorToCurrentLine(0)
, m_currentLine(0)
, m_closed(false)
, m_empty(!str.length())
, m_fastPathFlags(NoFastPath)
{
if (m_currentString.m_length)
m_currentChar = m_currentString.getCurrentChar();
updateAdvanceFunctionPointers();
}
void clear();
void close();
void append(const SegmentedString&);
void prepend(const SegmentedString&);
bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
void setExcludeLineNumbers();
void push(UChar c)
{
if (!m_pushedChar1) {
m_pushedChar1 = c;
m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
updateSlowCaseFunctionPointers();
} else {
ASSERT(!m_pushedChar2);
m_pushedChar2 = c;
}
}
bool isEmpty() const { return m_empty; }
unsigned length() const;
bool isClosed() const { return m_closed; }
enum LookAheadResult {
DidNotMatch,
DidMatch,
NotEnoughCharacters,
};
LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
void advance()
{
if (m_fastPathFlags & Use8BitAdvance) {
ASSERT(!m_pushedChar1);
bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
m_currentChar = m_currentString.incrementAndGetCurrentChar8();
if (!haveOneCharacterLeft)
return;
updateSlowCaseFunctionPointers();
return;
}
(this->*m_advanceFunc)();
}
inline void advanceAndUpdateLineNumber()
{
if (m_fastPathFlags & Use8BitAdvance) {
ASSERT(!m_pushedChar1);
bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
m_currentChar = m_currentString.incrementAndGetCurrentChar8();
if (!(haveNewLine | haveOneCharacterLeft))
return;
if (haveNewLine) {
++m_currentLine;
m_numberOfCharactersConsumedPriorToCurrentLine = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
}
if (haveOneCharacterLeft)
updateSlowCaseFunctionPointers();
return;
}
(this->*m_advanceAndUpdateLineNumberFunc)();
}
void advanceAndASSERT(UChar expectedCharacter)
{
ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
advance();
}
void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
{
ASSERT_UNUSED(expectedCharacter, WTF::Unicode::foldCase(currentChar()) == WTF::Unicode::foldCase(expectedCharacter));
advance();
}
void advancePastNonNewline()
{
ASSERT(currentChar() != '\n');
advance();
}
void advancePastNewlineAndUpdateLineNumber()
{
ASSERT(currentChar() == '\n');
if (!m_pushedChar1 && m_currentString.m_length > 1) {
int newLineFlag = m_currentString.doNotExcludeLineNumbers();
m_currentLine += newLineFlag;
if (newLineFlag)
m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
decrementAndCheckLength();
m_currentChar = m_currentString.incrementAndGetCurrentChar();
return;
}
advanceAndUpdateLineNumberSlowCase();
}
// Writes the consumed characters into consumedCharacters, which must
// have space for at least |count| characters.
void advance(unsigned count, UChar* consumedCharacters);
bool escaped() const { return m_pushedChar1; }
int numberOfCharactersConsumed() const
{
int numberOfPushedCharacters = 0;
if (m_pushedChar1) {
++numberOfPushedCharacters;
if (m_pushedChar2)
++numberOfPushedCharacters;
}
return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
}
String toString() const;
UChar currentChar() const { return m_currentChar; }
// The method is moderately slow, comparing to currentLine method.
OrdinalNumber currentColumn() const;
OrdinalNumber currentLine() const;
// Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
// which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
private:
enum FastPathFlags {
NoFastPath = 0,
Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
Use8BitAdvance = 1 << 1,
};
void append(const SegmentedSubstring&);
void prepend(const SegmentedSubstring&);
void advance8();
void advance16();
void advanceAndUpdateLineNumber8();
void advanceAndUpdateLineNumber16();
void advanceSlowCase();
void advanceAndUpdateLineNumberSlowCase();
void advanceEmpty();
void advanceSubstring();
void updateSlowCaseFunctionPointers();
void decrementAndCheckLength()
{
ASSERT(m_currentString.m_length > 1);
if (--m_currentString.m_length == 1)
updateSlowCaseFunctionPointers();
}
void updateAdvanceFunctionPointers()
{
if ((m_currentString.m_length > 1) && !m_pushedChar1) {
if (m_currentString.is8Bit()) {
m_advanceFunc = &SegmentedString::advance8;
m_fastPathFlags = Use8BitAdvance;
if (m_currentString.doNotExcludeLineNumbers()) {
m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
} else {
m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
}
return;
}
m_advanceFunc = &SegmentedString::advance16;
m_fastPathFlags = NoFastPath;
if (m_currentString.doNotExcludeLineNumbers())
m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
else
m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
return;
}
if (!m_currentString.m_length && !isComposite()) {
m_advanceFunc = &SegmentedString::advanceEmpty;
m_fastPathFlags = NoFastPath;
m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
}
updateSlowCaseFunctionPointers();
}
inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
{
if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
String currentSubstring = m_currentString.currentSubString(string.length());
if (currentSubstring.startsWith(string, caseSensitive))
return DidMatch;
return DidNotMatch;
}
return lookAheadSlowCase(string, caseSensitive);
}
LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
{
unsigned count = string.length();
if (count > length())
return NotEnoughCharacters;
UChar* consumedCharacters;
String consumedString = String::createUninitialized(count, consumedCharacters);
advance(count, consumedCharacters);
LookAheadResult result = DidNotMatch;
if (consumedString.startsWith(string, caseSensitive))
result = DidMatch;
prepend(SegmentedString(consumedString));
return result;
}
bool isComposite() const { return !m_substrings.isEmpty(); }
UChar m_pushedChar1;
UChar m_pushedChar2;
SegmentedSubstring m_currentString;
UChar m_currentChar;
int m_numberOfCharactersConsumedPriorToCurrentString;
int m_numberOfCharactersConsumedPriorToCurrentLine;
int m_currentLine;
Deque<SegmentedSubstring> m_substrings;
bool m_closed;
bool m_empty;
unsigned char m_fastPathFlags;
void (SegmentedString::*m_advanceFunc)();
void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
};
}
#endif