/* | |
* Copyright (C) 2009 Apple Inc. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#include "config.h" | |
#include "LiteralParser.h" | |
#include "JSArray.h" | |
#include "JSString.h" | |
#include "Lexer.h" | |
#include "StringBuilder.h" | |
#include <wtf/ASCIICType.h> | |
#include <wtf/dtoa.h> | |
namespace JSC { | |
LiteralParser::TokenType LiteralParser::Lexer::lex(LiteralParserToken& token) | |
{ | |
while (m_ptr < m_end && isASCIISpace(*m_ptr)) | |
++m_ptr; | |
ASSERT(m_ptr <= m_end); | |
if (m_ptr >= m_end) { | |
token.type = TokEnd; | |
token.start = token.end = m_ptr; | |
return TokEnd; | |
} | |
token.type = TokError; | |
token.start = m_ptr; | |
switch (*m_ptr) { | |
case '[': | |
token.type = TokLBracket; | |
token.end = ++m_ptr; | |
return TokLBracket; | |
case ']': | |
token.type = TokRBracket; | |
token.end = ++m_ptr; | |
return TokRBracket; | |
case '(': | |
token.type = TokLParen; | |
token.end = ++m_ptr; | |
return TokLBracket; | |
case ')': | |
token.type = TokRParen; | |
token.end = ++m_ptr; | |
return TokRBracket; | |
case '{': | |
token.type = TokLBrace; | |
token.end = ++m_ptr; | |
return TokLBrace; | |
case '}': | |
token.type = TokRBrace; | |
token.end = ++m_ptr; | |
return TokRBrace; | |
case ',': | |
token.type = TokComma; | |
token.end = ++m_ptr; | |
return TokComma; | |
case ':': | |
token.type = TokColon; | |
token.end = ++m_ptr; | |
return TokColon; | |
case '"': | |
if (m_mode == StrictJSON) | |
return lexString<StrictJSON>(token); | |
return lexString<NonStrictJSON>(token); | |
case 't': | |
if (m_end - m_ptr >= 4 && m_ptr[1] == 'r' && m_ptr[2] == 'u' && m_ptr[3] == 'e') { | |
m_ptr += 4; | |
token.type = TokTrue; | |
token.end = m_ptr; | |
return TokTrue; | |
} | |
break; | |
case 'f': | |
if (m_end - m_ptr >= 5 && m_ptr[1] == 'a' && m_ptr[2] == 'l' && m_ptr[3] == 's' && m_ptr[4] == 'e') { | |
m_ptr += 5; | |
token.type = TokFalse; | |
token.end = m_ptr; | |
return TokFalse; | |
} | |
break; | |
case 'n': | |
if (m_end - m_ptr >= 4 && m_ptr[1] == 'u' && m_ptr[2] == 'l' && m_ptr[3] == 'l') { | |
m_ptr += 4; | |
token.type = TokNull; | |
token.end = m_ptr; | |
return TokNull; | |
} | |
break; | |
case '-': | |
case '0': | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': | |
return lexNumber(token); | |
} | |
return TokError; | |
} | |
template <LiteralParser::ParserMode mode> static inline bool isSafeStringCharacter(UChar c) | |
{ | |
return (c >= ' ' && (mode == LiteralParser::StrictJSON || c <= 0xff) && c != '\\' && c != '"') || c == '\t'; | |
} | |
// "inline" is required here to help WINSCW compiler resolve specialized argument in templated functions. | |
template <LiteralParser::ParserMode mode> inline LiteralParser::TokenType LiteralParser::Lexer::lexString(LiteralParserToken& token) | |
{ | |
++m_ptr; | |
const UChar* runStart; | |
StringBuilder builder; | |
do { | |
runStart = m_ptr; | |
while (m_ptr < m_end && isSafeStringCharacter<mode>(*m_ptr)) | |
++m_ptr; | |
if (runStart < m_ptr) | |
builder.append(runStart, m_ptr - runStart); | |
if ((mode == StrictJSON) && m_ptr < m_end && *m_ptr == '\\') { | |
++m_ptr; | |
if (m_ptr >= m_end) | |
return TokError; | |
switch (*m_ptr) { | |
case '"': | |
builder.append('"'); | |
m_ptr++; | |
break; | |
case '\\': | |
builder.append('\\'); | |
m_ptr++; | |
break; | |
case '/': | |
builder.append('/'); | |
m_ptr++; | |
break; | |
case 'b': | |
builder.append('\b'); | |
m_ptr++; | |
break; | |
case 'f': | |
builder.append('\f'); | |
m_ptr++; | |
break; | |
case 'n': | |
builder.append('\n'); | |
m_ptr++; | |
break; | |
case 'r': | |
builder.append('\r'); | |
m_ptr++; | |
break; | |
case 't': | |
builder.append('\t'); | |
m_ptr++; | |
break; | |
case 'u': | |
if ((m_end - m_ptr) < 5) // uNNNN == 5 characters | |
return TokError; | |
for (int i = 1; i < 5; i++) { | |
if (!isASCIIHexDigit(m_ptr[i])) | |
return TokError; | |
} | |
builder.append(JSC::Lexer::convertUnicode(m_ptr[1], m_ptr[2], m_ptr[3], m_ptr[4])); | |
m_ptr += 5; | |
break; | |
default: | |
return TokError; | |
} | |
} | |
} while ((mode == StrictJSON) && m_ptr != runStart && (m_ptr < m_end) && *m_ptr != '"'); | |
if (m_ptr >= m_end || *m_ptr != '"') | |
return TokError; | |
token.stringToken = builder.build(); | |
token.type = TokString; | |
token.end = ++m_ptr; | |
return TokString; | |
} | |
LiteralParser::TokenType LiteralParser::Lexer::lexNumber(LiteralParserToken& token) | |
{ | |
// ES5 and json.org define numbers as | |
// number | |
// int | |
// int frac? exp? | |
// | |
// int | |
// -? 0 | |
// -? digit1-9 digits? | |
// | |
// digits | |
// digit digits? | |
// | |
// -?(0 | [1-9][0-9]*) ('.' [0-9]+)? ([eE][+-]? [0-9]+)? | |
if (m_ptr < m_end && *m_ptr == '-') // -? | |
++m_ptr; | |
// (0 | [1-9][0-9]*) | |
if (m_ptr < m_end && *m_ptr == '0') // 0 | |
++m_ptr; | |
else if (m_ptr < m_end && *m_ptr >= '1' && *m_ptr <= '9') { // [1-9] | |
++m_ptr; | |
// [0-9]* | |
while (m_ptr < m_end && isASCIIDigit(*m_ptr)) | |
++m_ptr; | |
} else | |
return TokError; | |
// ('.' [0-9]+)? | |
if (m_ptr < m_end && *m_ptr == '.') { | |
++m_ptr; | |
// [0-9]+ | |
if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) | |
return TokError; | |
++m_ptr; | |
while (m_ptr < m_end && isASCIIDigit(*m_ptr)) | |
++m_ptr; | |
} | |
// ([eE][+-]? [0-9]+)? | |
if (m_ptr < m_end && (*m_ptr == 'e' || *m_ptr == 'E')) { // [eE] | |
++m_ptr; | |
// [-+]? | |
if (m_ptr < m_end && (*m_ptr == '-' || *m_ptr == '+')) | |
++m_ptr; | |
// [0-9]+ | |
if (m_ptr >= m_end || !isASCIIDigit(*m_ptr)) | |
return TokError; | |
++m_ptr; | |
while (m_ptr < m_end && isASCIIDigit(*m_ptr)) | |
++m_ptr; | |
} | |
token.type = TokNumber; | |
token.end = m_ptr; | |
Vector<char, 64> buffer(token.end - token.start + 1); | |
int i; | |
for (i = 0; i < token.end - token.start; i++) { | |
ASSERT(static_cast<char>(token.start[i]) == token.start[i]); | |
buffer[i] = static_cast<char>(token.start[i]); | |
} | |
buffer[i] = 0; | |
char* end; | |
token.numberToken = WTF::strtod(buffer.data(), &end); | |
ASSERT(buffer.data() + (token.end - token.start) == end); | |
return TokNumber; | |
} | |
JSValue LiteralParser::parse(ParserState initialState) | |
{ | |
ParserState state = initialState; | |
MarkedArgumentBuffer objectStack; | |
JSValue lastValue; | |
Vector<ParserState, 16> stateStack; | |
Vector<Identifier, 16> identifierStack; | |
while (1) { | |
switch(state) { | |
startParseArray: | |
case StartParseArray: { | |
JSArray* array = constructEmptyArray(m_exec); | |
objectStack.append(array); | |
// fallthrough | |
} | |
doParseArrayStartExpression: | |
case DoParseArrayStartExpression: { | |
TokenType lastToken = m_lexer.currentToken().type; | |
if (m_lexer.next() == TokRBracket) { | |
if (lastToken == TokComma) | |
return JSValue(); | |
m_lexer.next(); | |
lastValue = objectStack.last(); | |
objectStack.removeLast(); | |
break; | |
} | |
stateStack.append(DoParseArrayEndExpression); | |
goto startParseExpression; | |
} | |
case DoParseArrayEndExpression: { | |
asArray(objectStack.last())->push(m_exec, lastValue); | |
if (m_lexer.currentToken().type == TokComma) | |
goto doParseArrayStartExpression; | |
if (m_lexer.currentToken().type != TokRBracket) | |
return JSValue(); | |
m_lexer.next(); | |
lastValue = objectStack.last(); | |
objectStack.removeLast(); | |
break; | |
} | |
startParseObject: | |
case StartParseObject: { | |
JSObject* object = constructEmptyObject(m_exec); | |
objectStack.append(object); | |
TokenType type = m_lexer.next(); | |
if (type == TokString) { | |
Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); | |
// Check for colon | |
if (m_lexer.next() != TokColon) | |
return JSValue(); | |
m_lexer.next(); | |
identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); | |
stateStack.append(DoParseObjectEndExpression); | |
goto startParseExpression; | |
} else if (type != TokRBrace) | |
return JSValue(); | |
m_lexer.next(); | |
lastValue = objectStack.last(); | |
objectStack.removeLast(); | |
break; | |
} | |
doParseObjectStartExpression: | |
case DoParseObjectStartExpression: { | |
TokenType type = m_lexer.next(); | |
if (type != TokString) | |
return JSValue(); | |
Lexer::LiteralParserToken identifierToken = m_lexer.currentToken(); | |
// Check for colon | |
if (m_lexer.next() != TokColon) | |
return JSValue(); | |
m_lexer.next(); | |
identifierStack.append(Identifier(m_exec, identifierToken.stringToken)); | |
stateStack.append(DoParseObjectEndExpression); | |
goto startParseExpression; | |
} | |
case DoParseObjectEndExpression: | |
{ | |
asObject(objectStack.last())->putDirect(identifierStack.last(), lastValue); | |
identifierStack.removeLast(); | |
if (m_lexer.currentToken().type == TokComma) | |
goto doParseObjectStartExpression; | |
if (m_lexer.currentToken().type != TokRBrace) | |
return JSValue(); | |
m_lexer.next(); | |
lastValue = objectStack.last(); | |
objectStack.removeLast(); | |
break; | |
} | |
startParseExpression: | |
case StartParseExpression: { | |
switch (m_lexer.currentToken().type) { | |
case TokLBracket: | |
goto startParseArray; | |
case TokLBrace: | |
goto startParseObject; | |
case TokString: { | |
Lexer::LiteralParserToken stringToken = m_lexer.currentToken(); | |
m_lexer.next(); | |
lastValue = jsString(m_exec, stringToken.stringToken); | |
break; | |
} | |
case TokNumber: { | |
Lexer::LiteralParserToken numberToken = m_lexer.currentToken(); | |
m_lexer.next(); | |
lastValue = jsNumber(m_exec, numberToken.numberToken); | |
break; | |
} | |
case TokNull: | |
m_lexer.next(); | |
lastValue = jsNull(); | |
break; | |
case TokTrue: | |
m_lexer.next(); | |
lastValue = jsBoolean(true); | |
break; | |
case TokFalse: | |
m_lexer.next(); | |
lastValue = jsBoolean(false); | |
break; | |
default: | |
// Error | |
return JSValue(); | |
} | |
break; | |
} | |
case StartParseStatement: { | |
switch (m_lexer.currentToken().type) { | |
case TokLBracket: | |
case TokNumber: | |
case TokString: | |
goto startParseExpression; | |
case TokLParen: { | |
m_lexer.next(); | |
stateStack.append(StartParseStatementEndStatement); | |
goto startParseExpression; | |
} | |
default: | |
return JSValue(); | |
} | |
} | |
case StartParseStatementEndStatement: { | |
ASSERT(stateStack.isEmpty()); | |
if (m_lexer.currentToken().type != TokRParen) | |
return JSValue(); | |
if (m_lexer.next() == TokEnd) | |
return lastValue; | |
return JSValue(); | |
} | |
default: | |
ASSERT_NOT_REACHED(); | |
} | |
if (stateStack.isEmpty()) | |
return lastValue; | |
state = stateStack.last(); | |
stateStack.removeLast(); | |
continue; | |
} | |
} | |
} |