/* | |
* Copyright 2005 Maksim Orlovich <maksim@kde.org> | |
* Copyright (C) 2006 Apple Computer, Inc. | |
* Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org> | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#include "config.h" | |
#include "XPathParser.h" | |
#if ENABLE(XPATH) | |
#include "ExceptionCode.h" | |
#include "StringHash.h" | |
#include "XPathEvaluator.h" | |
#include "XPathException.h" | |
#include "XPathNSResolver.h" | |
#include "XPathStep.h" | |
#include <wtf/StdLibExtras.h> | |
int xpathyyparse(void*); | |
using namespace WTF; | |
using namespace Unicode; | |
namespace WebCore { | |
namespace XPath { | |
class LocationPath; | |
#include "XPathGrammar.h" | |
Parser* Parser::currentParser = 0; | |
enum XMLCat { NameStart, NameCont, NotPartOfName }; | |
typedef HashMap<String, Step::Axis> AxisNamesMap; | |
static XMLCat charCat(UChar aChar) | |
{ | |
//### might need to add some special cases from the XML spec. | |
if (aChar == '_') | |
return NameStart; | |
if (aChar == '.' || aChar == '-') | |
return NameCont; | |
CharCategory category = Unicode::category(aChar); | |
if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter)) | |
return NameStart; | |
if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit)) | |
return NameCont; | |
return NotPartOfName; | |
} | |
static void setUpAxisNamesMap(AxisNamesMap& axisNames) | |
{ | |
struct AxisName { | |
const char* name; | |
Step::Axis axis; | |
}; | |
const AxisName axisNameList[] = { | |
{ "ancestor", Step::AncestorAxis }, | |
{ "ancestor-or-self", Step::AncestorOrSelfAxis }, | |
{ "attribute", Step::AttributeAxis }, | |
{ "child", Step::ChildAxis }, | |
{ "descendant", Step::DescendantAxis }, | |
{ "descendant-or-self", Step::DescendantOrSelfAxis }, | |
{ "following", Step::FollowingAxis }, | |
{ "following-sibling", Step::FollowingSiblingAxis }, | |
{ "namespace", Step::NamespaceAxis }, | |
{ "parent", Step::ParentAxis }, | |
{ "preceding", Step::PrecedingAxis }, | |
{ "preceding-sibling", Step::PrecedingSiblingAxis }, | |
{ "self", Step::SelfAxis } | |
}; | |
for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i) | |
axisNames.set(axisNameList[i].name, axisNameList[i].axis); | |
} | |
static bool isAxisName(const String& name, Step::Axis& type) | |
{ | |
DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ()); | |
if (axisNames.isEmpty()) | |
setUpAxisNamesMap(axisNames); | |
AxisNamesMap::iterator it = axisNames.find(name); | |
if (it == axisNames.end()) | |
return false; | |
type = it->second; | |
return true; | |
} | |
static bool isNodeTypeName(const String& name) | |
{ | |
DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ()); | |
if (nodeTypeNames.isEmpty()) { | |
nodeTypeNames.add("comment"); | |
nodeTypeNames.add("text"); | |
nodeTypeNames.add("processing-instruction"); | |
nodeTypeNames.add("node"); | |
} | |
return nodeTypeNames.contains(name); | |
} | |
/* Returns whether the last parsed token matches the [32] Operator rule | |
* (check http://www.w3.org/TR/xpath#exprlex). Necessary to disambiguate | |
* the tokens. | |
*/ | |
bool Parser::isOperatorContext() const | |
{ | |
if (m_nextPos == 0) | |
return false; | |
switch (m_lastTokenType) { | |
case AND: case OR: case MULOP: | |
case '/': case SLASHSLASH: case '|': case PLUS: case MINUS: | |
case EQOP: case RELOP: | |
case '@': case AXISNAME: case '(': case '[': | |
return false; | |
default: | |
return true; | |
} | |
} | |
void Parser::skipWS() | |
{ | |
while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos])) | |
++m_nextPos; | |
} | |
Token Parser::makeTokenAndAdvance(int code, int advance) | |
{ | |
m_nextPos += advance; | |
return Token(code); | |
} | |
Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance) | |
{ | |
m_nextPos += advance; | |
return Token(code, val); | |
} | |
Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance) | |
{ | |
m_nextPos += advance; | |
return Token(code, val); | |
} | |
// Returns next char if it's there and interesting, 0 otherwise | |
char Parser::peekAheadHelper() | |
{ | |
if (m_nextPos + 1 >= m_data.length()) | |
return 0; | |
UChar next = m_data[m_nextPos + 1]; | |
if (next >= 0xff) | |
return 0; | |
return next; | |
} | |
char Parser::peekCurHelper() | |
{ | |
if (m_nextPos >= m_data.length()) | |
return 0; | |
UChar next = m_data[m_nextPos]; | |
if (next >= 0xff) | |
return 0; | |
return next; | |
} | |
Token Parser::lexString() | |
{ | |
UChar delimiter = m_data[m_nextPos]; | |
int startPos = m_nextPos + 1; | |
for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) { | |
if (m_data[m_nextPos] == delimiter) { | |
String value = m_data.substring(startPos, m_nextPos - startPos); | |
if (value.isNull()) | |
value = ""; | |
++m_nextPos; // Consume the char. | |
return Token(LITERAL, value); | |
} | |
} | |
// Ouch, went off the end -- report error. | |
return Token(XPATH_ERROR); | |
} | |
Token Parser::lexNumber() | |
{ | |
int startPos = m_nextPos; | |
bool seenDot = false; | |
// Go until end or a non-digits character. | |
for (; m_nextPos < m_data.length(); ++m_nextPos) { | |
UChar aChar = m_data[m_nextPos]; | |
if (aChar >= 0xff) break; | |
if (aChar < '0' || aChar > '9') { | |
if (aChar == '.' && !seenDot) | |
seenDot = true; | |
else | |
break; | |
} | |
} | |
return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos)); | |
} | |
bool Parser::lexNCName(String& name) | |
{ | |
int startPos = m_nextPos; | |
if (m_nextPos >= m_data.length()) | |
return false; | |
if (charCat(m_data[m_nextPos]) != NameStart) | |
return false; | |
// Keep going until we get a character that's not good for names. | |
for (; m_nextPos < m_data.length(); ++m_nextPos) | |
if (charCat(m_data[m_nextPos]) == NotPartOfName) | |
break; | |
name = m_data.substring(startPos, m_nextPos - startPos); | |
return true; | |
} | |
bool Parser::lexQName(String& name) | |
{ | |
String n1; | |
if (!lexNCName(n1)) | |
return false; | |
skipWS(); | |
// If the next character is :, what we just got it the prefix, if not, | |
// it's the whole thing. | |
if (peekAheadHelper() != ':') { | |
name = n1; | |
return true; | |
} | |
String n2; | |
if (!lexNCName(n2)) | |
return false; | |
name = n1 + ":" + n2; | |
return true; | |
} | |
Token Parser::nextTokenInternal() | |
{ | |
skipWS(); | |
if (m_nextPos >= m_data.length()) | |
return Token(0); | |
char code = peekCurHelper(); | |
switch (code) { | |
case '(': case ')': case '[': case ']': | |
case '@': case ',': case '|': | |
return makeTokenAndAdvance(code); | |
case '\'': | |
case '\"': | |
return lexString(); | |
case '0': case '1': case '2': case '3': case '4': | |
case '5': case '6': case '7': case '8': case '9': | |
return lexNumber(); | |
case '.': { | |
char next = peekAheadHelper(); | |
if (next == '.') | |
return makeTokenAndAdvance(DOTDOT, 2); | |
if (next >= '0' && next <= '9') | |
return lexNumber(); | |
return makeTokenAndAdvance('.'); | |
} | |
case '/': | |
if (peekAheadHelper() == '/') | |
return makeTokenAndAdvance(SLASHSLASH, 2); | |
return makeTokenAndAdvance('/'); | |
case '+': | |
return makeTokenAndAdvance(PLUS); | |
case '-': | |
return makeTokenAndAdvance(MINUS); | |
case '=': | |
return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ); | |
case '!': | |
if (peekAheadHelper() == '=') | |
return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2); | |
return Token(XPATH_ERROR); | |
case '<': | |
if (peekAheadHelper() == '=') | |
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2); | |
return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT); | |
case '>': | |
if (peekAheadHelper() == '=') | |
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2); | |
return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT); | |
case '*': | |
if (isOperatorContext()) | |
return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul); | |
++m_nextPos; | |
return Token(NAMETEST, "*"); | |
case '$': { // $ QName | |
m_nextPos++; | |
String name; | |
if (!lexQName(name)) | |
return Token(XPATH_ERROR); | |
return Token(VARIABLEREFERENCE, name); | |
} | |
} | |
String name; | |
if (!lexNCName(name)) | |
return Token(XPATH_ERROR); | |
skipWS(); | |
// If we're in an operator context, check for any operator names | |
if (isOperatorContext()) { | |
if (name == "and") //### hash? | |
return Token(AND); | |
if (name == "or") | |
return Token(OR); | |
if (name == "mod") | |
return Token(MULOP, NumericOp::OP_Mod); | |
if (name == "div") | |
return Token(MULOP, NumericOp::OP_Div); | |
} | |
// See whether we are at a : | |
if (peekCurHelper() == ':') { | |
m_nextPos++; | |
// Any chance it's an axis name? | |
if (peekCurHelper() == ':') { | |
m_nextPos++; | |
//It might be an axis name. | |
Step::Axis axis; | |
if (isAxisName(name, axis)) | |
return Token(AXISNAME, axis); | |
// Ugh, :: is only valid in axis names -> error | |
return Token(XPATH_ERROR); | |
} | |
// Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest | |
skipWS(); | |
if (peekCurHelper() == '*') { | |
m_nextPos++; | |
return Token(NAMETEST, name + ":*"); | |
} | |
// Make a full qname. | |
String n2; | |
if (!lexNCName(n2)) | |
return Token(XPATH_ERROR); | |
name = name + ":" + n2; | |
} | |
skipWS(); | |
if (peekCurHelper() == '(') { | |
//note: we don't swallow the (here! | |
//either node type of function name | |
if (isNodeTypeName(name)) { | |
if (name == "processing-instruction") | |
return Token(PI, name); | |
return Token(NODETYPE, name); | |
} | |
//must be a function name. | |
return Token(FUNCTIONNAME, name); | |
} | |
// At this point, it must be NAMETEST. | |
return Token(NAMETEST, name); | |
} | |
Token Parser::nextToken() | |
{ | |
Token toRet = nextTokenInternal(); | |
m_lastTokenType = toRet.type; | |
return toRet; | |
} | |
Parser::Parser() | |
{ | |
reset(String()); | |
} | |
void Parser::reset(const String& data) | |
{ | |
m_nextPos = 0; | |
m_data = data; | |
m_lastTokenType = 0; | |
m_topExpr = 0; | |
m_gotNamespaceError = false; | |
} | |
int Parser::lex(void* data) | |
{ | |
YYSTYPE* yylval = static_cast<YYSTYPE*>(data); | |
Token tok = nextToken(); | |
switch (tok.type) { | |
case AXISNAME: | |
yylval->axis = tok.axis; | |
break; | |
case MULOP: | |
yylval->numop = tok.numop; | |
break; | |
case RELOP: | |
case EQOP: | |
yylval->eqop = tok.eqop; | |
break; | |
case NODETYPE: | |
case PI: | |
case FUNCTIONNAME: | |
case LITERAL: | |
case VARIABLEREFERENCE: | |
case NUMBER: | |
case NAMETEST: | |
yylval->str = new String(tok.str); | |
registerString(yylval->str); | |
break; | |
} | |
return tok.type; | |
} | |
bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI) | |
{ | |
int colon = qName.find(':'); | |
if (colon >= 0) { | |
if (!m_resolver) | |
return false; | |
namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon)); | |
if (namespaceURI.isNull()) | |
return false; | |
localName = qName.substring(colon + 1); | |
} else | |
localName = qName; | |
return true; | |
} | |
Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec) | |
{ | |
reset(statement); | |
m_resolver = resolver; | |
Parser* oldParser = currentParser; | |
currentParser = this; | |
int parseError = xpathyyparse(this); | |
currentParser = oldParser; | |
if (parseError) { | |
deleteAllValues(m_parseNodes); | |
m_parseNodes.clear(); | |
HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end(); | |
for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) { | |
deleteAllValues(**it); | |
delete *it; | |
} | |
m_predicateVectors.clear(); | |
HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end(); | |
for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) { | |
deleteAllValues(**it); | |
delete *it; | |
} | |
m_expressionVectors.clear(); | |
deleteAllValues(m_strings); | |
m_strings.clear(); | |
deleteAllValues(m_nodeTests); | |
m_nodeTests.clear(); | |
m_topExpr = 0; | |
if (m_gotNamespaceError) | |
ec = NAMESPACE_ERR; | |
else | |
ec = XPathException::INVALID_EXPRESSION_ERR; | |
return 0; | |
} | |
ASSERT(m_parseNodes.size() == 1); | |
ASSERT(*m_parseNodes.begin() == m_topExpr); | |
ASSERT(m_expressionVectors.size() == 0); | |
ASSERT(m_predicateVectors.size() == 0); | |
ASSERT(m_strings.size() == 0); | |
ASSERT(m_nodeTests.size() == 0); | |
m_parseNodes.clear(); | |
Expression* result = m_topExpr; | |
m_topExpr = 0; | |
return result; | |
} | |
void Parser::registerParseNode(ParseNode* node) | |
{ | |
if (node == 0) | |
return; | |
ASSERT(!m_parseNodes.contains(node)); | |
m_parseNodes.add(node); | |
} | |
void Parser::unregisterParseNode(ParseNode* node) | |
{ | |
if (node == 0) | |
return; | |
ASSERT(m_parseNodes.contains(node)); | |
m_parseNodes.remove(node); | |
} | |
void Parser::registerPredicateVector(Vector<Predicate*>* vector) | |
{ | |
if (vector == 0) | |
return; | |
ASSERT(!m_predicateVectors.contains(vector)); | |
m_predicateVectors.add(vector); | |
} | |
void Parser::deletePredicateVector(Vector<Predicate*>* vector) | |
{ | |
if (vector == 0) | |
return; | |
ASSERT(m_predicateVectors.contains(vector)); | |
m_predicateVectors.remove(vector); | |
delete vector; | |
} | |
void Parser::registerExpressionVector(Vector<Expression*>* vector) | |
{ | |
if (vector == 0) | |
return; | |
ASSERT(!m_expressionVectors.contains(vector)); | |
m_expressionVectors.add(vector); | |
} | |
void Parser::deleteExpressionVector(Vector<Expression*>* vector) | |
{ | |
if (vector == 0) | |
return; | |
ASSERT(m_expressionVectors.contains(vector)); | |
m_expressionVectors.remove(vector); | |
delete vector; | |
} | |
void Parser::registerString(String* s) | |
{ | |
if (s == 0) | |
return; | |
ASSERT(!m_strings.contains(s)); | |
m_strings.add(s); | |
} | |
void Parser::deleteString(String* s) | |
{ | |
if (s == 0) | |
return; | |
ASSERT(m_strings.contains(s)); | |
m_strings.remove(s); | |
delete s; | |
} | |
void Parser::registerNodeTest(Step::NodeTest* t) | |
{ | |
if (t == 0) | |
return; | |
ASSERT(!m_nodeTests.contains(t)); | |
m_nodeTests.add(t); | |
} | |
void Parser::deleteNodeTest(Step::NodeTest* t) | |
{ | |
if (t == 0) | |
return; | |
ASSERT(m_nodeTests.contains(t)); | |
m_nodeTests.remove(t); | |
delete t; | |
} | |
} | |
} | |
#endif // ENABLE(XPATH) |