/* | |
* Copyright (C) 1999-2000 Harri Porten (porten@kde.org) | |
* Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All Rights Reserved. | |
* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) | |
* | |
* This library is free software; you can redistribute it and/or | |
* modify it under the terms of the GNU Library General Public | |
* License as published by the Free Software Foundation; either | |
* version 2 of the License, or (at your option) any later version. | |
* | |
* This library is distributed in the hope that it will be useful, | |
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
* Library General Public License for more details. | |
* | |
* You should have received a copy of the GNU Library General Public License | |
* along with this library; see the file COPYING.LIB. If not, write to | |
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, | |
* Boston, MA 02110-1301, USA. | |
* | |
*/ | |
#include "config.h" | |
#include "Lexer.h" | |
#include "JSFunction.h" | |
#include "JSGlobalObjectFunctions.h" | |
#include "NodeInfo.h" | |
#include "Nodes.h" | |
#include "dtoa.h" | |
#include <ctype.h> | |
#include <limits.h> | |
#include <string.h> | |
#include <wtf/Assertions.h> | |
using namespace WTF; | |
using namespace Unicode; | |
// We can't specify the namespace in yacc's C output, so do it here instead. | |
using namespace JSC; | |
#include "Grammar.h" | |
#include "Lookup.h" | |
#include "Lexer.lut.h" | |
namespace JSC { | |
static const UChar byteOrderMark = 0xFEFF; | |
Lexer::Lexer(JSGlobalData* globalData) | |
: m_isReparsing(false) | |
, m_globalData(globalData) | |
, m_keywordTable(JSC::mainTable) | |
{ | |
m_buffer8.reserveInitialCapacity(initialReadBufferCapacity); | |
m_buffer16.reserveInitialCapacity(initialReadBufferCapacity); | |
} | |
Lexer::~Lexer() | |
{ | |
m_keywordTable.deleteTable(); | |
} | |
inline const UChar* Lexer::currentCharacter() const | |
{ | |
return m_code - 4; | |
} | |
inline int Lexer::currentOffset() const | |
{ | |
return currentCharacter() - m_codeStart; | |
} | |
ALWAYS_INLINE void Lexer::shift1() | |
{ | |
m_current = m_next1; | |
m_next1 = m_next2; | |
m_next2 = m_next3; | |
if (LIKELY(m_code < m_codeEnd)) | |
m_next3 = m_code[0]; | |
else | |
m_next3 = -1; | |
++m_code; | |
} | |
ALWAYS_INLINE void Lexer::shift2() | |
{ | |
m_current = m_next2; | |
m_next1 = m_next3; | |
if (LIKELY(m_code + 1 < m_codeEnd)) { | |
m_next2 = m_code[0]; | |
m_next3 = m_code[1]; | |
} else { | |
m_next2 = m_code < m_codeEnd ? m_code[0] : -1; | |
m_next3 = -1; | |
} | |
m_code += 2; | |
} | |
ALWAYS_INLINE void Lexer::shift3() | |
{ | |
m_current = m_next3; | |
if (LIKELY(m_code + 2 < m_codeEnd)) { | |
m_next1 = m_code[0]; | |
m_next2 = m_code[1]; | |
m_next3 = m_code[2]; | |
} else { | |
m_next1 = m_code < m_codeEnd ? m_code[0] : -1; | |
m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | |
m_next3 = -1; | |
} | |
m_code += 3; | |
} | |
ALWAYS_INLINE void Lexer::shift4() | |
{ | |
if (LIKELY(m_code + 3 < m_codeEnd)) { | |
m_current = m_code[0]; | |
m_next1 = m_code[1]; | |
m_next2 = m_code[2]; | |
m_next3 = m_code[3]; | |
} else { | |
m_current = m_code < m_codeEnd ? m_code[0] : -1; | |
m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1; | |
m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1; | |
m_next3 = -1; | |
} | |
m_code += 4; | |
} | |
void Lexer::setCode(const SourceCode& source, ParserArena& arena) | |
{ | |
m_arena = &arena.identifierArena(); | |
m_lineNumber = source.firstLine(); | |
m_delimited = false; | |
m_lastToken = -1; | |
const UChar* data = source.provider()->data(); | |
m_source = &source; | |
m_codeStart = data; | |
m_code = data + source.startOffset(); | |
m_codeEnd = data + source.endOffset(); | |
m_error = false; | |
m_atLineStart = true; | |
// ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters. | |
// See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details. | |
if (source.provider()->hasBOMs()) { | |
for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) { | |
if (UNLIKELY(*p == byteOrderMark)) { | |
copyCodeWithoutBOMs(); | |
break; | |
} | |
} | |
} | |
// Read the first characters into the 4-character buffer. | |
shift4(); | |
ASSERT(currentOffset() == source.startOffset()); | |
} | |
void Lexer::copyCodeWithoutBOMs() | |
{ | |
// Note: In this case, the character offset data for debugging will be incorrect. | |
// If it's important to correctly debug code with extraneous BOMs, then the caller | |
// should strip the BOMs when creating the SourceProvider object and do its own | |
// mapping of offsets within the stripped text to original text offset. | |
m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code); | |
for (const UChar* p = m_code; p < m_codeEnd; ++p) { | |
UChar c = *p; | |
if (c != byteOrderMark) | |
m_codeWithoutBOMs.append(c); | |
} | |
ptrdiff_t startDelta = m_codeStart - m_code; | |
m_code = m_codeWithoutBOMs.data(); | |
m_codeStart = m_code + startDelta; | |
m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size(); | |
} | |
void Lexer::shiftLineTerminator() | |
{ | |
ASSERT(isLineTerminator(m_current)); | |
// Allow both CRLF and LFCR. | |
if (m_current + m_next1 == '\n' + '\r') | |
shift2(); | |
else | |
shift1(); | |
++m_lineNumber; | |
} | |
ALWAYS_INLINE const Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length) | |
{ | |
return &m_arena->makeIdentifier(m_globalData, characters, length); | |
} | |
inline bool Lexer::lastTokenWasRestrKeyword() const | |
{ | |
return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW; | |
} | |
static NEVER_INLINE bool isNonASCIIIdentStart(int c) | |
{ | |
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other); | |
} | |
static inline bool isIdentStart(int c) | |
{ | |
return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c); | |
} | |
static NEVER_INLINE bool isNonASCIIIdentPart(int c) | |
{ | |
return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other | |
| Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector); | |
} | |
static inline bool isIdentPart(int c) | |
{ | |
return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c); | |
} | |
static inline int singleEscape(int c) | |
{ | |
switch (c) { | |
case 'b': | |
return 0x08; | |
case 't': | |
return 0x09; | |
case 'n': | |
return 0x0A; | |
case 'v': | |
return 0x0B; | |
case 'f': | |
return 0x0C; | |
case 'r': | |
return 0x0D; | |
default: | |
return c; | |
} | |
} | |
inline void Lexer::record8(int c) | |
{ | |
ASSERT(c >= 0); | |
ASSERT(c <= 0xFF); | |
m_buffer8.append(static_cast<char>(c)); | |
} | |
inline void Lexer::record16(UChar c) | |
{ | |
m_buffer16.append(c); | |
} | |
inline void Lexer::record16(int c) | |
{ | |
ASSERT(c >= 0); | |
ASSERT(c <= USHRT_MAX); | |
record16(UChar(static_cast<unsigned short>(c))); | |
} | |
int Lexer::lex(void* p1, void* p2) | |
{ | |
ASSERT(!m_error); | |
ASSERT(m_buffer8.isEmpty()); | |
ASSERT(m_buffer16.isEmpty()); | |
YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1); | |
YYLTYPE* llocp = static_cast<YYLTYPE*>(p2); | |
int token = 0; | |
m_terminator = false; | |
start: | |
while (isWhiteSpace(m_current)) | |
shift1(); | |
int startOffset = currentOffset(); | |
if (m_current == -1) { | |
#ifndef QT_BUILD_SCRIPT_LIB /* the parser takes cate about automatic semicolon. | |
this might add incorrect semicolons */ | |
//m_delimited and m_isReparsing are now useless | |
if (!m_terminator && !m_delimited && !m_isReparsing) { | |
// automatic semicolon insertion if program incomplete | |
token = ';'; | |
goto doneSemicolon; | |
} | |
#endif | |
return 0; | |
} | |
m_delimited = false; | |
switch (m_current) { | |
case '>': | |
if (m_next1 == '>' && m_next2 == '>') { | |
if (m_next3 == '=') { | |
shift4(); | |
token = URSHIFTEQUAL; | |
break; | |
} | |
shift3(); | |
token = URSHIFT; | |
break; | |
} | |
if (m_next1 == '>') { | |
if (m_next2 == '=') { | |
shift3(); | |
token = RSHIFTEQUAL; | |
break; | |
} | |
shift2(); | |
token = RSHIFT; | |
break; | |
} | |
if (m_next1 == '=') { | |
shift2(); | |
token = GE; | |
break; | |
} | |
shift1(); | |
token = '>'; | |
break; | |
case '=': | |
if (m_next1 == '=') { | |
if (m_next2 == '=') { | |
shift3(); | |
token = STREQ; | |
break; | |
} | |
shift2(); | |
token = EQEQ; | |
break; | |
} | |
shift1(); | |
token = '='; | |
break; | |
case '!': | |
if (m_next1 == '=') { | |
if (m_next2 == '=') { | |
shift3(); | |
token = STRNEQ; | |
break; | |
} | |
shift2(); | |
token = NE; | |
break; | |
} | |
shift1(); | |
token = '!'; | |
break; | |
case '<': | |
if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') { | |
// <!-- marks the beginning of a line comment (for www usage) | |
shift4(); | |
goto inSingleLineComment; | |
} | |
if (m_next1 == '<') { | |
if (m_next2 == '=') { | |
shift3(); | |
token = LSHIFTEQUAL; | |
break; | |
} | |
shift2(); | |
token = LSHIFT; | |
break; | |
} | |
if (m_next1 == '=') { | |
shift2(); | |
token = LE; | |
break; | |
} | |
shift1(); | |
token = '<'; | |
break; | |
case '+': | |
if (m_next1 == '+') { | |
shift2(); | |
if (m_terminator) { | |
token = AUTOPLUSPLUS; | |
break; | |
} | |
token = PLUSPLUS; | |
break; | |
} | |
if (m_next1 == '=') { | |
shift2(); | |
token = PLUSEQUAL; | |
break; | |
} | |
shift1(); | |
token = '+'; | |
break; | |
case '-': | |
if (m_next1 == '-') { | |
if (m_atLineStart && m_next2 == '>') { | |
shift3(); | |
goto inSingleLineComment; | |
} | |
shift2(); | |
if (m_terminator) { | |
token = AUTOMINUSMINUS; | |
break; | |
} | |
token = MINUSMINUS; | |
break; | |
} | |
if (m_next1 == '=') { | |
shift2(); | |
token = MINUSEQUAL; | |
break; | |
} | |
shift1(); | |
token = '-'; | |
break; | |
case '*': | |
if (m_next1 == '=') { | |
shift2(); | |
token = MULTEQUAL; | |
break; | |
} | |
shift1(); | |
token = '*'; | |
break; | |
case '/': | |
if (m_next1 == '/') { | |
shift2(); | |
goto inSingleLineComment; | |
} | |
if (m_next1 == '*') | |
goto inMultiLineComment; | |
if (m_next1 == '=') { | |
shift2(); | |
token = DIVEQUAL; | |
break; | |
} | |
shift1(); | |
token = '/'; | |
break; | |
case '&': | |
if (m_next1 == '&') { | |
shift2(); | |
token = AND; | |
break; | |
} | |
if (m_next1 == '=') { | |
shift2(); | |
token = ANDEQUAL; | |
break; | |
} | |
shift1(); | |
token = '&'; | |
break; | |
case '^': | |
if (m_next1 == '=') { | |
shift2(); | |
token = XOREQUAL; | |
break; | |
} | |
shift1(); | |
token = '^'; | |
break; | |
case '%': | |
if (m_next1 == '=') { | |
shift2(); | |
token = MODEQUAL; | |
break; | |
} | |
shift1(); | |
token = '%'; | |
break; | |
case '|': | |
if (m_next1 == '=') { | |
shift2(); | |
token = OREQUAL; | |
break; | |
} | |
if (m_next1 == '|') { | |
shift2(); | |
token = OR; | |
break; | |
} | |
shift1(); | |
token = '|'; | |
break; | |
case '.': | |
if (isASCIIDigit(m_next1)) { | |
record8('.'); | |
shift1(); | |
goto inNumberAfterDecimalPoint; | |
} | |
token = '.'; | |
shift1(); | |
break; | |
case ',': | |
case '~': | |
case '?': | |
case ':': | |
case '(': | |
case ')': | |
case '[': | |
case ']': | |
token = m_current; | |
shift1(); | |
break; | |
case ';': | |
shift1(); | |
m_delimited = true; | |
token = ';'; | |
break; | |
case '{': | |
lvalp->intValue = currentOffset(); | |
shift1(); | |
token = OPENBRACE; | |
break; | |
case '}': | |
lvalp->intValue = currentOffset(); | |
shift1(); | |
m_delimited = true; | |
token = CLOSEBRACE; | |
break; | |
case '\\': | |
goto startIdentifierWithBackslash; | |
case '0': | |
goto startNumberWithZeroDigit; | |
case '1': | |
case '2': | |
case '3': | |
case '4': | |
case '5': | |
case '6': | |
case '7': | |
case '8': | |
case '9': | |
goto startNumber; | |
case '"': | |
case '\'': | |
goto startString; | |
default: | |
if (isIdentStart(m_current)) | |
goto startIdentifierOrKeyword; | |
if (isLineTerminator(m_current)) { | |
shiftLineTerminator(); | |
m_atLineStart = true; | |
m_terminator = true; | |
if (lastTokenWasRestrKeyword()) { | |
token = ';'; | |
goto doneSemicolon; | |
} | |
goto start; | |
} | |
goto returnError; | |
} | |
m_atLineStart = false; | |
goto returnToken; | |
startString: { | |
int stringQuoteCharacter = m_current; | |
shift1(); | |
const UChar* stringStart = currentCharacter(); | |
while (m_current != stringQuoteCharacter) { | |
// Fast check for characters that require special handling. | |
// Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently | |
// as possible, and lets through all common ASCII characters. | |
if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) { | |
m_buffer16.append(stringStart, currentCharacter() - stringStart); | |
goto inString; | |
} | |
shift1(); | |
} | |
lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart); | |
shift1(); | |
m_atLineStart = false; | |
m_delimited = false; | |
token = STRING; | |
goto returnToken; | |
inString: | |
while (m_current != stringQuoteCharacter) { | |
if (m_current == '\\') | |
goto inStringEscapeSequence; | |
if (UNLIKELY(isLineTerminator(m_current))) | |
goto returnError; | |
if (UNLIKELY(m_current == -1)) | |
goto returnError; | |
record16(m_current); | |
shift1(); | |
} | |
goto doneString; | |
inStringEscapeSequence: | |
shift1(); | |
if (m_current == 'x') { | |
shift1(); | |
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) { | |
record16(convertHex(m_current, m_next1)); | |
shift2(); | |
goto inString; | |
} | |
record16('x'); | |
if (m_current == stringQuoteCharacter) | |
goto doneString; | |
goto inString; | |
} | |
if (m_current == 'u') { | |
shift1(); | |
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) { | |
record16(convertUnicode(m_current, m_next1, m_next2, m_next3)); | |
shift4(); | |
goto inString; | |
} | |
if (m_current == stringQuoteCharacter) { | |
record16('u'); | |
goto doneString; | |
} | |
goto returnError; | |
} | |
if (isASCIIOctalDigit(m_current)) { | |
if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) { | |
record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0'); | |
shift3(); | |
goto inString; | |
} | |
if (isASCIIOctalDigit(m_next1)) { | |
record16((m_current - '0') * 8 + m_next1 - '0'); | |
shift2(); | |
goto inString; | |
} | |
record16(m_current - '0'); | |
shift1(); | |
goto inString; | |
} | |
if (isLineTerminator(m_current)) { | |
shiftLineTerminator(); | |
goto inString; | |
} | |
if (m_current == -1) | |
goto returnError; | |
record16(singleEscape(m_current)); | |
shift1(); | |
goto inString; | |
} | |
startIdentifierWithBackslash: | |
shift1(); | |
if (UNLIKELY(m_current != 'u')) | |
goto returnError; | |
shift1(); | |
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | |
goto returnError; | |
token = convertUnicode(m_current, m_next1, m_next2, m_next3); | |
if (UNLIKELY(!isIdentStart(token))) | |
goto returnError; | |
goto inIdentifierAfterCharacterCheck; | |
startIdentifierOrKeyword: { | |
const UChar* identifierStart = currentCharacter(); | |
shift1(); | |
while (isIdentPart(m_current)) | |
shift1(); | |
if (LIKELY(m_current != '\\')) { | |
lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart); | |
goto doneIdentifierOrKeyword; | |
} | |
m_buffer16.append(identifierStart, currentCharacter() - identifierStart); | |
} | |
do { | |
shift1(); | |
if (UNLIKELY(m_current != 'u')) | |
goto returnError; | |
shift1(); | |
if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3))) | |
goto returnError; | |
token = convertUnicode(m_current, m_next1, m_next2, m_next3); | |
if (UNLIKELY(!isIdentPart(token))) | |
goto returnError; | |
inIdentifierAfterCharacterCheck: | |
record16(token); | |
shift4(); | |
while (isIdentPart(m_current)) { | |
record16(m_current); | |
shift1(); | |
} | |
} while (UNLIKELY(m_current == '\\')); | |
goto doneIdentifier; | |
inSingleLineComment: | |
while (!isLineTerminator(m_current)) { | |
if (UNLIKELY(m_current == -1)) | |
return 0; | |
shift1(); | |
} | |
shiftLineTerminator(); | |
m_atLineStart = true; | |
m_terminator = true; | |
if (lastTokenWasRestrKeyword()) | |
goto doneSemicolon; | |
goto start; | |
inMultiLineComment: | |
shift2(); | |
while (m_current != '*' || m_next1 != '/') { | |
if (isLineTerminator(m_current)) | |
shiftLineTerminator(); | |
else { | |
shift1(); | |
if (UNLIKELY(m_current == -1)) | |
goto returnError; | |
} | |
} | |
shift2(); | |
m_atLineStart = false; | |
goto start; | |
startNumberWithZeroDigit: | |
shift1(); | |
if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) { | |
shift1(); | |
goto inHex; | |
} | |
if (m_current == '.') { | |
record8('0'); | |
record8('.'); | |
shift1(); | |
goto inNumberAfterDecimalPoint; | |
} | |
if ((m_current | 0x20) == 'e') { | |
record8('0'); | |
record8('e'); | |
shift1(); | |
goto inExponentIndicator; | |
} | |
if (isASCIIOctalDigit(m_current)) | |
goto inOctal; | |
if (isASCIIDigit(m_current)) | |
goto startNumber; | |
lvalp->doubleValue = 0; | |
goto doneNumeric; | |
inNumberAfterDecimalPoint: | |
while (isASCIIDigit(m_current)) { | |
record8(m_current); | |
shift1(); | |
} | |
if ((m_current | 0x20) == 'e') { | |
record8('e'); | |
shift1(); | |
goto inExponentIndicator; | |
} | |
goto doneNumber; | |
inExponentIndicator: | |
if (m_current == '+' || m_current == '-') { | |
record8(m_current); | |
shift1(); | |
} | |
if (!isASCIIDigit(m_current)) | |
goto returnError; | |
do { | |
record8(m_current); | |
shift1(); | |
} while (isASCIIDigit(m_current)); | |
goto doneNumber; | |
inOctal: { | |
do { | |
record8(m_current); | |
shift1(); | |
} while (isASCIIOctalDigit(m_current)); | |
if (isASCIIDigit(m_current)) | |
goto startNumber; | |
double dval = 0; | |
const char* end = m_buffer8.end(); | |
for (const char* p = m_buffer8.data(); p < end; ++p) { | |
dval *= 8; | |
dval += *p - '0'; | |
} | |
if (dval >= mantissaOverflowLowerBound) | |
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8); | |
m_buffer8.resize(0); | |
lvalp->doubleValue = dval; | |
goto doneNumeric; | |
} | |
inHex: { | |
do { | |
record8(m_current); | |
shift1(); | |
} while (isASCIIHexDigit(m_current)); | |
double dval = 0; | |
const char* end = m_buffer8.end(); | |
for (const char* p = m_buffer8.data(); p < end; ++p) { | |
dval *= 16; | |
dval += toASCIIHexValue(*p); | |
} | |
if (dval >= mantissaOverflowLowerBound) | |
dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16); | |
m_buffer8.resize(0); | |
lvalp->doubleValue = dval; | |
goto doneNumeric; | |
} | |
startNumber: | |
record8(m_current); | |
shift1(); | |
while (isASCIIDigit(m_current)) { | |
record8(m_current); | |
shift1(); | |
} | |
if (m_current == '.') { | |
record8('.'); | |
shift1(); | |
goto inNumberAfterDecimalPoint; | |
} | |
if ((m_current | 0x20) == 'e') { | |
record8('e'); | |
shift1(); | |
goto inExponentIndicator; | |
} | |
// Fall through into doneNumber. | |
doneNumber: | |
// Null-terminate string for strtod. | |
m_buffer8.append('\0'); | |
lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0); | |
m_buffer8.resize(0); | |
// Fall through into doneNumeric. | |
doneNumeric: | |
// No identifiers allowed directly after numeric literal, e.g. "3in" is bad. | |
if (UNLIKELY(isIdentStart(m_current))) | |
goto returnError; | |
m_atLineStart = false; | |
m_delimited = false; | |
token = NUMBER; | |
goto returnToken; | |
doneSemicolon: | |
token = ';'; | |
m_delimited = true; | |
goto returnToken; | |
doneIdentifier: | |
m_atLineStart = false; | |
m_delimited = false; | |
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
m_buffer16.resize(0); | |
token = IDENT; | |
goto returnToken; | |
doneIdentifierOrKeyword: { | |
m_atLineStart = false; | |
m_delimited = false; | |
m_buffer16.resize(0); | |
const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident); | |
token = entry ? entry->lexerValue() : IDENT; | |
goto returnToken; | |
} | |
doneString: | |
// Atomize constant strings in case they're later used in property lookup. | |
shift1(); | |
m_atLineStart = false; | |
m_delimited = false; | |
lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
m_buffer16.resize(0); | |
token = STRING; | |
// Fall through into returnToken. | |
returnToken: { | |
int lineNumber = m_lineNumber; | |
llocp->first_line = lineNumber; | |
llocp->last_line = lineNumber; | |
llocp->first_column = startOffset; | |
llocp->last_column = currentOffset(); | |
m_lastToken = token; | |
return token; | |
} | |
returnError: | |
m_error = true; | |
return -1; | |
} | |
bool Lexer::scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix) | |
{ | |
ASSERT(m_buffer16.isEmpty()); | |
bool lastWasEscape = false; | |
bool inBrackets = false; | |
if (patternPrefix) { | |
ASSERT(!isLineTerminator(patternPrefix)); | |
ASSERT(patternPrefix != '/'); | |
ASSERT(patternPrefix != '['); | |
record16(patternPrefix); | |
} | |
while (true) { | |
int current = m_current; | |
if (isLineTerminator(current) || current == -1) { | |
m_buffer16.resize(0); | |
return false; | |
} | |
shift1(); | |
if (current == '/' && !lastWasEscape && !inBrackets) | |
break; | |
record16(current); | |
if (lastWasEscape) { | |
lastWasEscape = false; | |
continue; | |
} | |
switch (current) { | |
case '[': | |
inBrackets = true; | |
break; | |
case ']': | |
inBrackets = false; | |
break; | |
case '\\': | |
lastWasEscape = true; | |
break; | |
} | |
} | |
pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
m_buffer16.resize(0); | |
while (isIdentPart(m_current)) { | |
record16(m_current); | |
shift1(); | |
} | |
flags = makeIdentifier(m_buffer16.data(), m_buffer16.size()); | |
m_buffer16.resize(0); | |
return true; | |
} | |
bool Lexer::skipRegExp() | |
{ | |
bool lastWasEscape = false; | |
bool inBrackets = false; | |
while (true) { | |
int current = m_current; | |
if (isLineTerminator(current) || current == -1) | |
return false; | |
shift1(); | |
if (current == '/' && !lastWasEscape && !inBrackets) | |
break; | |
if (lastWasEscape) { | |
lastWasEscape = false; | |
continue; | |
} | |
switch (current) { | |
case '[': | |
inBrackets = true; | |
break; | |
case ']': | |
inBrackets = false; | |
break; | |
case '\\': | |
lastWasEscape = true; | |
break; | |
} | |
} | |
while (isIdentPart(m_current)) | |
shift1(); | |
return true; | |
} | |
void Lexer::clear() | |
{ | |
m_arena = 0; | |
m_codeWithoutBOMs.clear(); | |
Vector<char> newBuffer8; | |
newBuffer8.reserveInitialCapacity(initialReadBufferCapacity); | |
m_buffer8.swap(newBuffer8); | |
Vector<UChar> newBuffer16; | |
newBuffer16.reserveInitialCapacity(initialReadBufferCapacity); | |
m_buffer16.swap(newBuffer16); | |
m_isReparsing = false; | |
} | |
SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine) | |
{ | |
if (m_codeWithoutBOMs.isEmpty()) | |
return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine); | |
const UChar* data = m_source->provider()->data(); | |
ASSERT(openBrace < closeBrace); | |
int numBOMsBeforeOpenBrace = 0; | |
int numBOMsBetweenBraces = 0; | |
int i; | |
for (i = m_source->startOffset(); i < openBrace; ++i) | |
numBOMsBeforeOpenBrace += data[i] == byteOrderMark; | |
for (; i < closeBrace; ++i) | |
numBOMsBetweenBraces += data[i] == byteOrderMark; | |
return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace, | |
closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine); | |
} | |
} // namespace JSC |