blob: 59a2bda7fb93c7a44f05e4f6738b498e449a4c8f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ParserForXMLSchema.cpp 568078 2007-08-21 11:43:25Z amassari $
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/ParserForXMLSchema.hpp>
#include <xercesc/util/regx/TokenFactory.hpp>
#include <xercesc/util/regx/RangeToken.hpp>
#include <xercesc/util/regx/TokenInc.hpp>
#include <xercesc/util/regx/RegxDefs.hpp>
#include <xercesc/util/ParseException.hpp>
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/PlatformUtils.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// ParserForXMLSchema: Constructors and Destructors
// ---------------------------------------------------------------------------
ParserForXMLSchema::ParserForXMLSchema(MemoryManager* const manager)
: RegxParser(manager)
{
}
ParserForXMLSchema::~ParserForXMLSchema() {
}
// ---------------------------------------------------------------------------
// ParserForXMLSchema: Parsing/Processing methods
// ---------------------------------------------------------------------------
Token* ParserForXMLSchema::processCaret() {
processNext();
return getTokenFactory()->createChar(chCaret);
}
Token* ParserForXMLSchema::processDollar() {
processNext();
return getTokenFactory()->createChar(chDollarSign);
}
Token* ParserForXMLSchema::processPlus(Token* const tok) {
processNext();
return getTokenFactory()->createConcat(tok,
getTokenFactory()->createClosure(tok));
}
Token* ParserForXMLSchema::processStar(Token* const tok) {
processNext();
return getTokenFactory()->createClosure(tok);
}
Token* ParserForXMLSchema::processQuestion(Token* const tok) {
processNext();
TokenFactory* tokFactory = getTokenFactory();
Token* retTok = tokFactory->createUnion();
retTok->addChild(tok, tokFactory);
retTok->addChild(tokFactory->createToken(Token::T_EMPTY), tokFactory);
return retTok;
}
Token* ParserForXMLSchema::processParen() {
processNext();
Token* retTok = getTokenFactory()->createParenthesis(parseRegx(true), 0);
if (getState() != REGX_T_RPAREN) {
ThrowXMLwithMemMgr(ParseException, XMLExcepts::Parser_Factor1, getMemoryManager());
}
processNext();
return retTok;
}
RangeToken* ParserForXMLSchema::parseCharacterClass(const bool) {
setParseContext(S_INBRACKETS);
processNext();
RangeToken* base = 0;
RangeToken* tok = 0;
bool isNRange = false;
if (getState() == REGX_T_CHAR && getCharData() == chCaret) {
isNRange = true;
processNext();
base = getTokenFactory()->createRange();
base->addRange(0, Token::UTF16_MAX);
tok = getTokenFactory()->createRange();
}
else {
tok= getTokenFactory()->createRange();
}
int type;
bool firstLoop = true;
bool wasDecoded;
while ( (type = getState()) != REGX_T_EOF) {
wasDecoded = false;
// single range | from-to-range | subtraction
if (type == REGX_T_CHAR && getCharData() == chCloseSquare && !firstLoop) {
if (isNRange) {
base->subtractRanges(tok);
tok = base;
}
break;
}
XMLInt32 ch = getCharData();
bool end = false;
if (type == REGX_T_BACKSOLIDUS) {
switch(ch) {
case chLatin_d:
case chLatin_D:
case chLatin_w:
case chLatin_W:
case chLatin_s:
case chLatin_S:
{
tok->mergeRanges(getTokenForShorthand(ch));
end = true;
}
break;
case chLatin_i:
case chLatin_I:
case chLatin_c:
case chLatin_C:
{
ch = processCInCharacterClass(tok, ch);
if (ch < 0) {
end = true;
}
}
break;
case chLatin_p:
case chLatin_P:
{
RangeToken* tok2 = processBacksolidus_pP(ch);
if (tok2 == 0) {
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Atom5, getMemoryManager());
}
tok->mergeRanges(tok2);
end = true;
}
break;
case chDash:
wasDecoded = true;
// fall thru to default.
default:
ch = decodeEscaped();
}
} // end if REGX_T_BACKSOLIDUS
else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION && !firstLoop) {
if (isNRange) {
base->subtractRanges(tok);
tok = base;
}
RangeToken* rangeTok = parseCharacterClass(false);
tok->subtractRanges(rangeTok);
if (getState() != REGX_T_CHAR || getCharData() != chCloseSquare) {
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC5, getMemoryManager());
}
break;
} // end if REGX_T_XMLSCHEMA...
processNext();
if (!end) {
if (type == REGX_T_CHAR
&& (ch == chOpenSquare
|| ch == chCloseSquare
|| (ch == chDash && getCharData() == chCloseSquare && firstLoop))) {
// if regex = [-] then invalid...
// '[', ']', '-' not allowed and should be esacaped
XMLCh chStr[] = { ch, chNull };
ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager());
}
if (ch == chDash && getCharData() == chDash && getState() != REGX_T_BACKSOLIDUS && !wasDecoded) {
XMLCh chStr[] = { ch, chNull };
ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_CC6, chStr, chStr, getMemoryManager());
}
if (getState() != REGX_T_CHAR || getCharData() != chDash) {
tok->addRange(ch, ch);
}
else {
processNext();
if ((type = getState()) == REGX_T_EOF)
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager());
if (type == REGX_T_CHAR && getCharData() == chCloseSquare) {
tok->addRange(ch, ch);
tok->addRange(chDash, chDash);
}
else if (type == REGX_T_XMLSCHEMA_CC_SUBTRACTION) {
static const XMLCh dashStr[] = { chDash, chNull};
ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, dashStr, dashStr, getMemoryManager());
}
else {
XMLInt32 rangeEnd = getCharData();
XMLCh rangeEndStr[] = { rangeEnd, chNull };
if (type == REGX_T_CHAR) {
if (rangeEnd == chOpenSquare
|| rangeEnd == chCloseSquare
|| rangeEnd == chDash)
// '[', ']', '-' not allowed and should be esacaped
ThrowXMLwithMemMgr2(ParseException, XMLExcepts::Parser_CC6, rangeEndStr, rangeEndStr, getMemoryManager());
}
else if (type == REGX_T_BACKSOLIDUS) {
rangeEnd = decodeEscaped();
}
processNext();
if (ch > rangeEnd) {
XMLCh chStr[] = { ch, chNull };
ThrowXMLwithMemMgr2(ParseException,XMLExcepts::Parser_Ope3, rangeEndStr, chStr, getMemoryManager());
}
tok->addRange(ch, rangeEnd);
}
}
}
firstLoop = false;
}
if (getState() == REGX_T_EOF)
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_CC2, getMemoryManager());
tok->sortRanges();
tok->compactRanges();
setParseContext(S_NORMAL);
processNext();
return tok;
}
XMLInt32 ParserForXMLSchema::processCInCharacterClass(RangeToken* const tok,
const XMLInt32 ch)
{
tok->mergeRanges(getTokenForShorthand(ch));
return -1;
}
Token* ParserForXMLSchema::processLook(const unsigned short) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_A() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_B() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_b() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_C() {
processNext();
return getTokenForShorthand(chLatin_C);
}
Token* ParserForXMLSchema::processBacksolidus_c() {
processNext();
return getTokenForShorthand(chLatin_c);
}
Token* ParserForXMLSchema::processBacksolidus_g() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_gt() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_I() {
processNext();
return getTokenForShorthand(chLatin_I);
}
Token* ParserForXMLSchema::processBacksolidus_i() {
processNext();
return getTokenForShorthand(chLatin_i);
}
Token* ParserForXMLSchema::processBacksolidus_lt() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_X() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_Z() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBacksolidus_z() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processBackReference() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processCondition() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processIndependent() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processModifiers() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
Token* ParserForXMLSchema::processParen2() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
RangeToken* ParserForXMLSchema::parseSetOperations() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, getMemoryManager());
return 0; // for compilers that complain about no return value
}
// ---------------------------------------------------------------------------
// ParserForXMLSchema: Getter methods
// ---------------------------------------------------------------------------
Token* ParserForXMLSchema::getTokenForShorthand(const XMLInt32 ch) {
switch(ch) {
case chLatin_d:
return getTokenFactory()->getRange(fgXMLDigit);
case chLatin_D:
return getTokenFactory()->getRange(fgXMLDigit, true);
case chLatin_w:
return getTokenFactory()->getRange(fgXMLWord);
case chLatin_W:
return getTokenFactory()->getRange(fgXMLWord, true);
case chLatin_s:
return getTokenFactory()->getRange(fgXMLSpace);
case chLatin_S:
return getTokenFactory()->getRange(fgXMLSpace, true);
case chLatin_c:
return getTokenFactory()->getRange(fgXMLNameChar);
case chLatin_C:
return getTokenFactory()->getRange(fgXMLNameChar, true);
case chLatin_i:
return getTokenFactory()->getRange(fgXMLInitialNameChar);
case chLatin_I:
return getTokenFactory()->getRange(fgXMLInitialNameChar, true);
}
return 0;
}
// ---------------------------------------------------------------------------
// ParserForXMLSchema: Helper methods
// ---------------------------------------------------------------------------
bool ParserForXMLSchema::checkQuestion(const int) {
return false;
}
XMLInt32 ParserForXMLSchema::decodeEscaped() {
if (getState() != REGX_T_BACKSOLIDUS)
ThrowXMLwithMemMgr(ParseException,XMLExcepts::Parser_Next1, getMemoryManager());
XMLInt32 ch = getCharData();
switch (ch) {
case chLatin_n:
ch = chLF;
break;
case chLatin_r:
ch = chCR;
break;
case chLatin_t:
ch = chHTab;
break;
case chBackSlash:
case chPipe:
case chPeriod:
case chCaret:
case chDash:
case chQuestion:
case chAsterisk:
case chPlus:
case chOpenCurly:
case chCloseCurly:
case chOpenParen:
case chCloseParen:
case chOpenSquare:
case chCloseSquare:
break;
default:
{
XMLCh chString[] = {chBackSlash, ch, chNull};
ThrowXMLwithMemMgr1(ParseException,XMLExcepts::Parser_Process2, chString, getMemoryManager());
}
}
return ch;
}
XERCES_CPP_NAMESPACE_END
/**
* End of file ParserForXMLSchema.cpp
*/