blob: 397d70c4891b91bcda1350db03f5adf4f43ab3eb [file] [log] [blame]
/*
* Copyright (C) 2003 Lars Knoll (knoll@kde.org)
* Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved.
* Copyright (C) 2008 Eric Seidel <eric@webkit.org>
* Copyright (C) 2009 - 2010 Torch Mobile (Beijing) Co. Ltd. All rights reserved.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public License
* along with this library; see the file COPYING.LIB. If not, write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef CSSTokenizer_h
#define CSSTokenizer_h
#include "wtf/Noncopyable.h"
#include "wtf/OwnPtr.h"
#include "wtf/text/WTFString.h"
namespace blink {
class BisonCSSParser;
struct CSSParserLocation;
struct CSSParserString;
class CSSTokenizer {
WTF_MAKE_NONCOPYABLE(CSSTokenizer);
public:
// FIXME: This should not be needed but there are still some ties between the 2 classes.
friend class BisonCSSParser;
CSSTokenizer(BisonCSSParser& parser)
: m_parser(parser)
, m_parsedTextPrefixLength(0)
, m_parsedTextSuffixLength(0)
, m_parsingMode(NormalMode)
, m_is8BitSource(false)
, m_length(0)
, m_token(0)
, m_lineNumber(0)
, m_tokenStartLineNumber(0)
, m_internal(true)
{
m_tokenStart.ptr8 = 0;
}
void setupTokenizer(const char* prefix, unsigned prefixLength, const String&, const char* suffix, unsigned suffixLength);
CSSParserLocation currentLocation();
inline int lex(void* yylval) { return (this->*m_lexFunc)(yylval); }
inline unsigned safeUserStringTokenOffset()
{
return std::min(tokenStartOffset(), static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength)) - m_parsedTextPrefixLength;
}
bool is8BitSource() const { return m_is8BitSource; }
// FIXME: These 2 functions should be private so that we don't need the definitions below.
template <typename CharacterType>
inline CharacterType* tokenStart();
inline unsigned tokenStartOffset();
private:
UChar* allocateStringBuffer16(size_t len);
template <typename CharacterType>
inline CharacterType*& currentCharacter();
template <typename CharacterType>
inline CharacterType* dataStart();
template <typename CharacterType>
inline void setTokenStart(CharacterType*);
template <typename CharacterType>
inline bool isIdentifierStart();
template <typename CharacterType>
inline CSSParserLocation tokenLocation();
template <typename CharacterType>
static unsigned parseEscape(CharacterType*&);
template <typename DestCharacterType>
static inline void UnicodeToChars(DestCharacterType*&, unsigned);
template <typename SrcCharacterType, typename DestCharacterType>
static inline bool parseIdentifierInternal(SrcCharacterType*&, DestCharacterType*&, bool&);
template <typename SrcCharacterType>
static size_t peekMaxIdentifierLen(SrcCharacterType*);
template <typename CharacterType>
inline void parseIdentifier(CharacterType*&, CSSParserString&, bool&);
template <typename SrcCharacterType>
static size_t peekMaxStringLen(SrcCharacterType*, UChar quote);
template <typename SrcCharacterType, typename DestCharacterType>
static inline bool parseStringInternal(SrcCharacterType*&, DestCharacterType*&, UChar);
template <typename CharacterType>
inline void parseString(CharacterType*&, CSSParserString& resultString, UChar);
template <typename CharacterType>
inline bool findURI(CharacterType*& start, CharacterType*& end, UChar& quote);
template <typename SrcCharacterType>
static size_t peekMaxURILen(SrcCharacterType*, UChar quote);
template <typename SrcCharacterType, typename DestCharacterType>
static inline bool parseURIInternal(SrcCharacterType*&, DestCharacterType*&, UChar quote);
template <typename CharacterType>
inline void parseURI(CSSParserString&);
template <typename CharacterType>
inline bool parseUnicodeRange();
template <typename CharacterType>
bool parseNthChild();
template <typename CharacterType>
bool parseNthChildExtra();
template <typename CharacterType>
inline bool detectFunctionTypeToken(int);
template <typename CharacterType>
inline void detectMediaQueryToken(int);
template <typename CharacterType>
inline void detectNumberToken(CharacterType*, int);
template <typename CharacterType>
inline void detectDashToken(int);
template <typename CharacterType>
inline void detectAtToken(int, bool);
template <typename CharacterType>
inline void detectSupportsToken(int);
template <typename SourceCharacterType>
int realLex(void* yylval);
BisonCSSParser& m_parser;
size_t m_parsedTextPrefixLength;
size_t m_parsedTextSuffixLength;
enum ParsingMode {
NormalMode,
MediaQueryMode,
SupportsMode,
NthChildMode
};
ParsingMode m_parsingMode;
bool m_is8BitSource;
OwnPtr<LChar[]> m_dataStart8;
OwnPtr<UChar[]> m_dataStart16;
LChar* m_currentCharacter8;
UChar* m_currentCharacter16;
// During parsing of an ASCII stylesheet we might locate escape
// sequences that expand into UTF-16 code points. Strings,
// identifiers and URIs containing such escape sequences are
// stored in m_cssStrings16 so that we don't have to store the
// whole stylesheet as UTF-16.
Vector<OwnPtr<UChar[]> > m_cssStrings16;
union {
LChar* ptr8;
UChar* ptr16;
} m_tokenStart;
unsigned m_length;
int m_token;
int m_lineNumber;
int m_tokenStartLineNumber;
// FIXME: This boolean is misnamed. Also it would be nice if we could consolidate it
// with the CSSParserMode logic to determine if internal properties are allowed.
bool m_internal;
int (CSSTokenizer::*m_lexFunc)(void*);
};
inline unsigned CSSTokenizer::tokenStartOffset()
{
if (is8BitSource())
return m_tokenStart.ptr8 - m_dataStart8.get();
return m_tokenStart.ptr16 - m_dataStart16.get();
}
template <>
inline LChar* CSSTokenizer::tokenStart<LChar>()
{
return m_tokenStart.ptr8;
}
template <>
inline UChar* CSSTokenizer::tokenStart<UChar>()
{
return m_tokenStart.ptr16;
}
} // namespace blink
#endif // CSSTokenizer_h