/* | |
* Copyright (C) 2009 Apple Inc. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions | |
* are met: | |
* 1. Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* 2. Redistributions in binary form must reproduce the above copyright | |
* notice, this list of conditions and the following disclaimer in the | |
* documentation and/or other materials provided with the distribution. | |
* | |
* THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
#ifndef RegexPattern_h | |
#define RegexPattern_h | |
#if ENABLE(YARR) | |
#include <wtf/Vector.h> | |
#include <wtf/unicode/Unicode.h> | |
namespace JSC { namespace Yarr { | |
#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers. | |
#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers. | |
#define RegexStackSpaceForBackTrackInfoBackReference 2 | |
#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative. | |
#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1 | |
#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers. | |
#define RegexStackSpaceForBackTrackInfoParentheses 4 | |
struct PatternDisjunction; | |
struct CharacterRange { | |
UChar begin; | |
UChar end; | |
CharacterRange(UChar begin, UChar end) | |
: begin(begin) | |
, end(end) | |
{ | |
} | |
}; | |
struct CharacterClass : FastAllocBase { | |
Vector<UChar> m_matches; | |
Vector<CharacterRange> m_ranges; | |
Vector<UChar> m_matchesUnicode; | |
Vector<CharacterRange> m_rangesUnicode; | |
}; | |
enum QuantifierType { | |
QuantifierFixedCount, | |
QuantifierGreedy, | |
QuantifierNonGreedy, | |
}; | |
struct PatternTerm { | |
enum Type { | |
TypeAssertionBOL, | |
TypeAssertionEOL, | |
TypeAssertionWordBoundary, | |
TypePatternCharacter, | |
TypeCharacterClass, | |
TypeBackReference, | |
TypeForwardReference, | |
TypeParenthesesSubpattern, | |
TypeParentheticalAssertion, | |
} type; | |
bool invertOrCapture; | |
union { | |
UChar patternCharacter; | |
CharacterClass* characterClass; | |
unsigned subpatternId; | |
struct { | |
PatternDisjunction* disjunction; | |
unsigned subpatternId; | |
unsigned lastSubpatternId; | |
bool isCopy; | |
} parentheses; | |
}; | |
QuantifierType quantityType; | |
unsigned quantityCount; | |
int inputPosition; | |
unsigned frameLocation; | |
PatternTerm(UChar ch) | |
: type(PatternTerm::TypePatternCharacter) | |
{ | |
patternCharacter = ch; | |
quantityType = QuantifierFixedCount; | |
quantityCount = 1; | |
} | |
PatternTerm(CharacterClass* charClass, bool invert) | |
: type(PatternTerm::TypeCharacterClass) | |
, invertOrCapture(invert) | |
{ | |
characterClass = charClass; | |
quantityType = QuantifierFixedCount; | |
quantityCount = 1; | |
} | |
PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture) | |
: type(type) | |
, invertOrCapture(invertOrCapture) | |
{ | |
parentheses.disjunction = disjunction; | |
parentheses.subpatternId = subpatternId; | |
parentheses.isCopy = false; | |
quantityType = QuantifierFixedCount; | |
quantityCount = 1; | |
} | |
PatternTerm(Type type, bool invert = false) | |
: type(type) | |
, invertOrCapture(invert) | |
{ | |
quantityType = QuantifierFixedCount; | |
quantityCount = 1; | |
} | |
PatternTerm(unsigned spatternId) | |
: type(TypeBackReference) | |
, invertOrCapture(false) | |
{ | |
subpatternId = spatternId; | |
quantityType = QuantifierFixedCount; | |
quantityCount = 1; | |
} | |
static PatternTerm ForwardReference() | |
{ | |
return PatternTerm(TypeForwardReference); | |
} | |
static PatternTerm BOL() | |
{ | |
return PatternTerm(TypeAssertionBOL); | |
} | |
static PatternTerm EOL() | |
{ | |
return PatternTerm(TypeAssertionEOL); | |
} | |
static PatternTerm WordBoundary(bool invert) | |
{ | |
return PatternTerm(TypeAssertionWordBoundary, invert); | |
} | |
bool invert() | |
{ | |
return invertOrCapture; | |
} | |
bool capture() | |
{ | |
return invertOrCapture; | |
} | |
void quantify(unsigned count, QuantifierType type) | |
{ | |
quantityCount = count; | |
quantityType = type; | |
} | |
}; | |
struct PatternAlternative : FastAllocBase { | |
PatternAlternative(PatternDisjunction* disjunction) | |
: m_parent(disjunction) | |
{ | |
} | |
PatternTerm& lastTerm() | |
{ | |
ASSERT(m_terms.size()); | |
return m_terms[m_terms.size() - 1]; | |
} | |
void removeLastTerm() | |
{ | |
ASSERT(m_terms.size()); | |
m_terms.shrink(m_terms.size() - 1); | |
} | |
Vector<PatternTerm> m_terms; | |
PatternDisjunction* m_parent; | |
unsigned m_minimumSize; | |
bool m_hasFixedSize; | |
}; | |
struct PatternDisjunction : FastAllocBase { | |
PatternDisjunction(PatternAlternative* parent = 0) | |
: m_parent(parent) | |
{ | |
} | |
~PatternDisjunction() | |
{ | |
deleteAllValues(m_alternatives); | |
} | |
PatternAlternative* addNewAlternative() | |
{ | |
PatternAlternative* alternative = new PatternAlternative(this); | |
m_alternatives.append(alternative); | |
return alternative; | |
} | |
Vector<PatternAlternative*> m_alternatives; | |
PatternAlternative* m_parent; | |
unsigned m_minimumSize; | |
unsigned m_callFrameSize; | |
bool m_hasFixedSize; | |
}; | |
// You probably don't want to be calling these functions directly | |
// (please to be calling newlineCharacterClass() et al on your | |
// friendly neighborhood RegexPattern instance to get nicely | |
// cached copies). | |
CharacterClass* newlineCreate(); | |
CharacterClass* digitsCreate(); | |
CharacterClass* spacesCreate(); | |
CharacterClass* wordcharCreate(); | |
CharacterClass* nondigitsCreate(); | |
CharacterClass* nonspacesCreate(); | |
CharacterClass* nonwordcharCreate(); | |
struct RegexPattern { | |
RegexPattern(bool ignoreCase, bool multiline) | |
: m_ignoreCase(ignoreCase) | |
, m_multiline(multiline) | |
, m_numSubpatterns(0) | |
, m_maxBackReference(0) | |
, newlineCached(0) | |
, digitsCached(0) | |
, spacesCached(0) | |
, wordcharCached(0) | |
, nondigitsCached(0) | |
, nonspacesCached(0) | |
, nonwordcharCached(0) | |
{ | |
} | |
~RegexPattern() | |
{ | |
deleteAllValues(m_disjunctions); | |
deleteAllValues(m_userCharacterClasses); | |
} | |
void reset() | |
{ | |
m_numSubpatterns = 0; | |
m_maxBackReference = 0; | |
newlineCached = 0; | |
digitsCached = 0; | |
spacesCached = 0; | |
wordcharCached = 0; | |
nondigitsCached = 0; | |
nonspacesCached = 0; | |
nonwordcharCached = 0; | |
deleteAllValues(m_disjunctions); | |
m_disjunctions.clear(); | |
deleteAllValues(m_userCharacterClasses); | |
m_userCharacterClasses.clear(); | |
} | |
bool containsIllegalBackReference() | |
{ | |
return m_maxBackReference > m_numSubpatterns; | |
} | |
CharacterClass* newlineCharacterClass() | |
{ | |
if (!newlineCached) | |
m_userCharacterClasses.append(newlineCached = newlineCreate()); | |
return newlineCached; | |
} | |
CharacterClass* digitsCharacterClass() | |
{ | |
if (!digitsCached) | |
m_userCharacterClasses.append(digitsCached = digitsCreate()); | |
return digitsCached; | |
} | |
CharacterClass* spacesCharacterClass() | |
{ | |
if (!spacesCached) | |
m_userCharacterClasses.append(spacesCached = spacesCreate()); | |
return spacesCached; | |
} | |
CharacterClass* wordcharCharacterClass() | |
{ | |
if (!wordcharCached) | |
m_userCharacterClasses.append(wordcharCached = wordcharCreate()); | |
return wordcharCached; | |
} | |
CharacterClass* nondigitsCharacterClass() | |
{ | |
if (!nondigitsCached) | |
m_userCharacterClasses.append(nondigitsCached = nondigitsCreate()); | |
return nondigitsCached; | |
} | |
CharacterClass* nonspacesCharacterClass() | |
{ | |
if (!nonspacesCached) | |
m_userCharacterClasses.append(nonspacesCached = nonspacesCreate()); | |
return nonspacesCached; | |
} | |
CharacterClass* nonwordcharCharacterClass() | |
{ | |
if (!nonwordcharCached) | |
m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate()); | |
return nonwordcharCached; | |
} | |
bool m_ignoreCase; | |
bool m_multiline; | |
unsigned m_numSubpatterns; | |
unsigned m_maxBackReference; | |
PatternDisjunction* m_body; | |
Vector<PatternDisjunction*, 4> m_disjunctions; | |
Vector<CharacterClass*> m_userCharacterClasses; | |
private: | |
CharacterClass* newlineCached; | |
CharacterClass* digitsCached; | |
CharacterClass* spacesCached; | |
CharacterClass* wordcharCached; | |
CharacterClass* nondigitsCached; | |
CharacterClass* nonspacesCached; | |
CharacterClass* nonwordcharCached; | |
}; | |
} } // namespace JSC::Yarr | |
#endif | |
#endif // RegexPattern_h |