blob: a3e151b7e0c651d8b9d27490181289576a33a558 [file] [log] [blame]
/*
* Copyright (C) 2008 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef Parser_h
#define Parser_h
#include <wtf/Platform.h>
#if ENABLE(WREC)
#include "Escapes.h"
#include "Quantifier.h"
#include "UString.h"
#include "WRECGenerator.h"
#include <wtf/ASCIICType.h>
namespace JSC { namespace WREC {
struct CharacterClass;
class Parser {
typedef Generator::JumpList JumpList;
typedef Generator::ParenthesesType ParenthesesType;
friend class SavedState;
public:
Parser(const UString& pattern, bool ignoreCase, bool multiline)
: m_generator(*this)
, m_data(pattern.data())
, m_size(pattern.size())
, m_ignoreCase(ignoreCase)
, m_multiline(multiline)
{
reset();
}
Generator& generator() { return m_generator; }
bool ignoreCase() const { return m_ignoreCase; }
bool multiline() const { return m_multiline; }
void recordSubpattern() { ++m_numSubpatterns; }
unsigned numSubpatterns() const { return m_numSubpatterns; }
const char* error() const { return m_error; }
const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; }
void parsePattern(JumpList& failures)
{
reset();
parseDisjunction(failures);
if (peek() != EndOfPattern)
setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it.
}
void parseDisjunction(JumpList& failures);
void parseAlternative(JumpList& failures);
bool parseTerm(JumpList& failures);
bool parseNonCharacterEscape(JumpList& failures, const Escape&);
bool parseParentheses(JumpList& failures);
bool parseCharacterClass(JumpList& failures);
bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert);
bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId);
private:
class SavedState {
public:
SavedState(Parser& parser)
: m_parser(parser)
, m_index(parser.m_index)
{
}
void restore()
{
m_parser.m_index = m_index;
}
private:
Parser& m_parser;
unsigned m_index;
};
void reset()
{
m_index = 0;
m_numSubpatterns = 0;
m_error = 0;
}
void setError(const char* error)
{
if (m_error)
return;
m_error = error;
}
int peek()
{
if (m_index >= m_size)
return EndOfPattern;
return m_data[m_index];
}
int consume()
{
if (m_index >= m_size)
return EndOfPattern;
return m_data[m_index++];
}
bool peekIsDigit()
{
return WTF::isASCIIDigit(peek());
}
unsigned peekDigit()
{
ASSERT(peekIsDigit());
return peek() - '0';
}
unsigned consumeDigit()
{
ASSERT(peekIsDigit());
return consume() - '0';
}
unsigned consumeNumber()
{
int n = consumeDigit();
while (peekIsDigit()) {
n *= 10;
n += consumeDigit();
}
return n;
}
int consumeHex(int count)
{
int n = 0;
while (count--) {
if (!WTF::isASCIIHexDigit(peek()))
return -1;
n = (n << 4) | WTF::toASCIIHexValue(consume());
}
return n;
}
unsigned consumeOctal()
{
unsigned n = 0;
while (n < 32 && WTF::isASCIIOctalDigit(peek()))
n = n * 8 + consumeDigit();
return n;
}
ALWAYS_INLINE Quantifier consumeGreedyQuantifier();
Quantifier consumeQuantifier();
Escape consumeEscape(bool inCharacterClass);
ParenthesesType consumeParenthesesType();
static const int EndOfPattern = -1;
// Error messages.
static const char* QuantifierOutOfOrder;
static const char* QuantifierWithoutAtom;
static const char* ParenthesesUnmatched;
static const char* ParenthesesTypeInvalid;
static const char* ParenthesesNotSupported;
static const char* CharacterClassUnmatched;
static const char* CharacterClassOutOfOrder;
static const char* EscapeUnterminated;
Generator m_generator;
const UChar* m_data;
unsigned m_size;
unsigned m_index;
bool m_ignoreCase;
bool m_multiline;
unsigned m_numSubpatterns;
const char* m_error;
};
} } // namespace JSC::WREC
#endif // ENABLE(WREC)
#endif // Parser_h