blob: a5cae0ed5842741292db33c8f1a170d5e241c52d [file] [log] [blame]
/*
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.nashorn.internal.parser;
import static jdk.nashorn.internal.parser.TokenType.COLON;
import static jdk.nashorn.internal.parser.TokenType.COMMARIGHT;
import static jdk.nashorn.internal.parser.TokenType.EOF;
import static jdk.nashorn.internal.parser.TokenType.ESCSTRING;
import static jdk.nashorn.internal.parser.TokenType.RBRACE;
import static jdk.nashorn.internal.parser.TokenType.RBRACKET;
import static jdk.nashorn.internal.parser.TokenType.STRING;
import java.util.ArrayList;
import java.util.List;
import jdk.nashorn.internal.ir.Expression;
import jdk.nashorn.internal.ir.LiteralNode;
import jdk.nashorn.internal.ir.Node;
import jdk.nashorn.internal.ir.ObjectNode;
import jdk.nashorn.internal.ir.PropertyNode;
import jdk.nashorn.internal.ir.UnaryNode;
import jdk.nashorn.internal.runtime.ErrorManager;
import jdk.nashorn.internal.runtime.Source;
/**
* Parses JSON text and returns the corresponding IR node. This is derived from the objectLiteral production of the main parser.
*
* See: 15.12.1.2 The JSON Syntactic Grammar
*/
public class JSONParser extends AbstractParser {
/**
* Constructor
* @param source the source
* @param errors the error manager
*/
public JSONParser(final Source source, final ErrorManager errors) {
super(source, errors, false, 0);
}
/**
* Implementation of the Quote(value) operation as defined in the ECMA script spec
* It wraps a String value in double quotes and escapes characters within in
*
* @param value string to quote
*
* @return quoted and escaped string
*/
public static String quote(final String value) {
final StringBuilder product = new StringBuilder();
product.append("\"");
for (final char ch : value.toCharArray()) {
// TODO: should use a table?
switch (ch) {
case '\\':
product.append("\\\\");
break;
case '"':
product.append("\\\"");
break;
case '\b':
product.append("\\b");
break;
case '\f':
product.append("\\f");
break;
case '\n':
product.append("\\n");
break;
case '\r':
product.append("\\r");
break;
case '\t':
product.append("\\t");
break;
default:
if (ch < ' ') {
product.append(Lexer.unicodeEscape(ch));
break;
}
product.append(ch);
break;
}
}
product.append("\"");
return product.toString();
}
/**
* Public parsed method - start lexing a new token stream for
* a JSON script
*
* @return the JSON literal
*/
public Node parse() {
stream = new TokenStream();
lexer = new Lexer(source, stream) {
@Override
protected boolean skipComments() {
return false;
}
@Override
protected boolean isStringDelimiter(final char ch) {
return ch == '\"';
}
// ECMA 15.12.1.1 The JSON Lexical Grammar - JSONWhiteSpace
@Override
protected boolean isWhitespace(final char ch) {
return Lexer.isJsonWhitespace(ch);
}
@Override
protected boolean isEOL(final char ch) {
return Lexer.isJsonEOL(ch);
}
// ECMA 15.12.1.1 The JSON Lexical Grammar - JSONNumber
@Override
protected void scanNumber() {
// Record beginning of number.
final int startPosition = position;
// Assume value is a decimal.
TokenType valueType = TokenType.DECIMAL;
// floating point can't start with a "." with no leading digit before
if (ch0 == '.') {
error(Lexer.message("json.invalid.number"), STRING, position, limit);
}
// First digit of number.
final int digit = convertDigit(ch0, 10);
// skip first digit
skip(1);
if (digit != 0) {
// Skip over remaining digits.
while (convertDigit(ch0, 10) != -1) {
skip(1);
}
}
if (ch0 == '.' || ch0 == 'E' || ch0 == 'e') {
// Must be a double.
if (ch0 == '.') {
// Skip period.
skip(1);
boolean mantissa = false;
// Skip mantissa.
while (convertDigit(ch0, 10) != -1) {
mantissa = true;
skip(1);
}
if (! mantissa) {
// no digit after "."
error(Lexer.message("json.invalid.number"), STRING, position, limit);
}
}
// Detect exponent.
if (ch0 == 'E' || ch0 == 'e') {
// Skip E.
skip(1);
// Detect and skip exponent sign.
if (ch0 == '+' || ch0 == '-') {
skip(1);
}
boolean exponent = false;
// Skip exponent.
while (convertDigit(ch0, 10) != -1) {
exponent = true;
skip(1);
}
if (! exponent) {
// no digit after "E"
error(Lexer.message("json.invalid.number"), STRING, position, limit);
}
}
valueType = TokenType.FLOATING;
}
// Add number token.
add(valueType, startPosition);
}
// ECMA 15.12.1.1 The JSON Lexical Grammar - JSONEscapeCharacter
@Override
protected boolean isEscapeCharacter(final char ch) {
switch (ch) {
case '"':
case '/':
case '\\':
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
// could be unicode escape
case 'u':
return true;
default:
return false;
}
}
};
k = -1;
next();
final Node resultNode = jsonLiteral();
expect(EOF);
return resultNode;
}
@SuppressWarnings("fallthrough")
private LiteralNode<?> getStringLiteral() {
final LiteralNode<?> literal = getLiteral();
final String str = (String)literal.getValue();
for (int i = 0; i < str.length(); i++) {
final char ch = str.charAt(i);
switch (ch) {
default:
if (ch > 0x001f) {
break;
}
case '"':
case '\\':
throw error(AbstractParser.message("unexpected.token", str));
}
}
return literal;
}
/**
* Parse a JSON literal from the token stream
* @return the JSON literal as a Node
*/
private Expression jsonLiteral() {
final long literalToken = token;
switch (type) {
case STRING:
return getStringLiteral();
case ESCSTRING:
case DECIMAL:
case FLOATING:
return getLiteral();
case FALSE:
next();
return LiteralNode.newInstance(literalToken, finish, false);
case TRUE:
next();
return LiteralNode.newInstance(literalToken, finish, true);
case NULL:
next();
return LiteralNode.newInstance(literalToken, finish);
case LBRACKET:
return arrayLiteral();
case LBRACE:
return objectLiteral();
/*
* A.8.1 JSON Lexical Grammar
*
* JSONNumber :: See 15.12.1.1
* -opt DecimalIntegerLiteral JSONFractionopt ExponentPartopt
*/
case SUB:
next();
final long realToken = token;
final Object value = getValue();
if (value instanceof Number) {
next();
return new UnaryNode(literalToken, LiteralNode.newInstance(realToken, finish, (Number)value));
}
throw error(AbstractParser.message("expected", "number", type.getNameOrType()));
default:
break;
}
throw error(AbstractParser.message("expected", "json literal", type.getNameOrType()));
}
/**
* Parse an array literal from the token stream
* @return the array literal as a Node
*/
private LiteralNode<Expression[]> arrayLiteral() {
// Unlike JavaScript array literals, elison is not permitted in JSON.
// Capture LBRACKET token.
final long arrayToken = token;
// LBRACKET tested in caller.
next();
LiteralNode<Expression[]> result = null;
// Prepare to accummulating elements.
final List<Expression> elements = new ArrayList<>();
loop:
while (true) {
switch (type) {
case RBRACKET:
next();
result = LiteralNode.newInstance(arrayToken, finish, elements);
break loop;
case COMMARIGHT:
next();
// check for trailing comma - not allowed in JSON
if (type == RBRACKET) {
throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
}
break;
default:
// Add expression element.
elements.add(jsonLiteral());
// Comma between array elements is mandatory in JSON.
if (type != COMMARIGHT && type != RBRACKET) {
throw error(AbstractParser.message("expected", ", or ]", type.getNameOrType()));
}
break;
}
}
return result;
}
/**
* Parse an object literal from the token stream
* @return the object literal as a Node
*/
private ObjectNode objectLiteral() {
// Capture LBRACE token.
final long objectToken = token;
// LBRACE tested in caller.
next();
// Prepare to accumulate elements.
final List<PropertyNode> elements = new ArrayList<>();
// Create a block for the object literal.
loop:
while (true) {
switch (type) {
case RBRACE:
next();
break loop;
case COMMARIGHT:
next();
// check for trailing comma - not allowed in JSON
if (type == RBRACE) {
throw error(AbstractParser.message("trailing.comma.in.json", type.getNameOrType()));
}
break;
default:
// Get and add the next property.
final PropertyNode property = propertyAssignment();
elements.add(property);
// Comma between property assigments is mandatory in JSON.
if (type != RBRACE && type != COMMARIGHT) {
throw error(AbstractParser.message("expected", ", or }", type.getNameOrType()));
}
break;
}
}
// Construct new object literal.
return new ObjectNode(objectToken, finish, elements);
}
/**
* Parse a property assignment from the token stream
* @return the property assignment as a Node
*/
private PropertyNode propertyAssignment() {
// Capture firstToken.
final long propertyToken = token;
LiteralNode<?> name = null;
if (type == STRING) {
name = getStringLiteral();
} else if (type == ESCSTRING) {
name = getLiteral();
}
if (name != null) {
expect(COLON);
final Expression value = jsonLiteral();
return new PropertyNode(propertyToken, value.getFinish(), name, value, null, null);
}
// Raise an error.
throw error(AbstractParser.message("expected", "string", type.getNameOrType()));
}
}