/* | |
* Copyright (C) 2009 Google Inc. All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are | |
* met: | |
* | |
* * Redistributions of source code must retain the above copyright | |
* notice, this list of conditions and the following disclaimer. | |
* * Redistributions in binary form must reproduce the above | |
* copyright notice, this list of conditions and the following disclaimer | |
* in the documentation and/or other materials provided with the | |
* distribution. | |
* * Neither the name of Google Inc. nor the names of its | |
* contributors may be used to endorse or promote products derived from | |
* this software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
*/ | |
// Generate js file as follows: | |
// | |
// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ | |
// | sed 's|^yy\([^:]*\)*\:|case \1:|' \ | |
// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ | |
// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ | |
// | sed 's|[*]cursor|this._charAt(cursor)|' \ | |
// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ | |
// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ | |
// | sed 's|unsigned\ int|var|' \ | |
// | sed 's|var\ yych|case 1: var yych|' | |
WebInspector.SourceHTMLTokenizer = function() | |
{ | |
WebInspector.SourceTokenizer.call(this); | |
// The order is determined by the generated code. | |
this._lexConditions = { | |
INITIAL: 0, | |
COMMENT: 1, | |
DOCTYPE: 2, | |
TAG: 3, | |
DSTRING: 4, | |
SSTRING: 5 | |
}; | |
this.case_INITIAL = 1000; | |
this.case_COMMENT = 1001; | |
this.case_DOCTYPE = 1002; | |
this.case_TAG = 1003; | |
this.case_DSTRING = 1004; | |
this.case_SSTRING = 1005; | |
this._parseConditions = { | |
INITIAL: 0, | |
ATTRIBUTE: 1, | |
ATTRIBUTE_VALUE: 2, | |
LINKIFY: 4, | |
A_NODE: 8, | |
SCRIPT: 16 | |
}; | |
this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; | |
this.condition = this.initialCondition; | |
} | |
WebInspector.SourceHTMLTokenizer.prototype = { | |
set line(line) { | |
if (this._internalJavaScriptTokenizer) { | |
var match = /<\/script/i.exec(line); | |
if (match) { | |
this._internalJavaScriptTokenizer.line = line.substring(0, match.index); | |
} else | |
this._internalJavaScriptTokenizer.line = line; | |
} | |
this._line = line; | |
}, | |
_isExpectingAttribute: function() | |
{ | |
return this._condition.parseCondition & this._parseConditions.ATTRIBUTE; | |
}, | |
_isExpectingAttributeValue: function() | |
{ | |
return this._condition.parseCondition & this._parseConditions.ATTRIBUTE_VALUE; | |
}, | |
_setExpectingAttribute: function() | |
{ | |
if (this._isExpectingAttributeValue()) | |
this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; | |
this._condition.parseCondition |= this._parseConditions.ATTRIBUTE; | |
}, | |
_setExpectingAttributeValue: function() | |
{ | |
if (this._isExpectingAttribute()) | |
this._condition.parseCondition ^= this._parseConditions.ATTRIBUTE; | |
this._condition.parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; | |
}, | |
_stringToken: function(cursor, stringEnds) | |
{ | |
if (!this._isExpectingAttributeValue()) { | |
this.tokenType = null; | |
return cursor; | |
} | |
this.tokenType = this._attrValueTokenType(); | |
if (stringEnds) | |
this._setExpectingAttribute(); | |
return cursor; | |
}, | |
_attrValueTokenType: function() | |
{ | |
if (this._condition.parseCondition & this._parseConditions.LINKIFY) { | |
if (this._condition.parseCondition & this._parseConditions.A_NODE) | |
return "html-external-link"; | |
return "html-resource-link"; | |
} | |
return "html-attribute-value"; | |
}, | |
nextToken: function(cursor) | |
{ | |
if (this._internalJavaScriptTokenizer) { | |
// Re-set line to force </script> detection first. | |
this.line = this._line; | |
if (cursor !== this._internalJavaScriptTokenizer._line.length) { | |
// Tokenizer is stateless, so restore its condition before tokenizing and save it after. | |
this._internalJavaScriptTokenizer.condition = this._condition.internalJavaScriptTokenizerCondition; | |
var result = this._internalJavaScriptTokenizer.nextToken(cursor); | |
this.tokenType = this._internalJavaScriptTokenizer.tokenType; | |
this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.condition; | |
return result; | |
} else if (cursor !== this._line.length) | |
delete this._internalJavaScriptTokenizer; | |
} | |
var cursorOnEnter = cursor; | |
var gotoCase = 1; | |
while (1) { | |
switch (gotoCase) | |
// Following comment is replaced with generated state machine. | |
/*!re2c | |
re2c:define:YYCTYPE = "var"; | |
re2c:define:YYCURSOR = cursor; | |
re2c:define:YYGETCONDITION = "this.getLexCondition"; | |
re2c:define:YYSETCONDITION = "this.setLexCondition"; | |
re2c:condprefix = "case this.case_"; | |
re2c:condenumprefix = "this._lexConditions."; | |
re2c:yyfill:enable = 0; | |
re2c:labelprefix = "case "; | |
re2c:indent:top = 2; | |
re2c:indent:string = " "; | |
CommentContent = ([^-\r\n] | ("--" [^>]))*; | |
Comment = "<!--" CommentContent "-->"; | |
CommentStart = "<!--" CommentContent [\r\n]; | |
CommentEnd = CommentContent "-->"; | |
DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee]; | |
DocTypeContent = [^\r\n>]*; | |
ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; | |
ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; | |
LT = "<" | "</"; | |
GT = ">"; | |
EqualSign = "="; | |
DoubleStringContent = [^\r\n\"]*; | |
SingleStringContent = [^\r\n\']*; | |
StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; | |
DoubleStringStart = "\"" DoubleStringContent [\r\n]; | |
DoubleStringEnd = DoubleStringContent "\""; | |
SingleStringStart = "'" SingleStringContent [\r\n]; | |
SingleStringEnd = SingleStringContent "'"; | |
Identifier = [^ \r\n"'<>\[\]=]+; | |
<INITIAL> Comment { this.tokenType = "html-comment"; return cursor; } | |
<INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } | |
<COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } | |
<COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } | |
<INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } | |
<DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } | |
<DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } | |
<INITIAL> ScriptStart => TAG | |
{ | |
if (this._condition.parseCondition & this._parseConditions.SCRIPT) { | |
// Do not tokenize script tag contents, keep lexer state although processing "<". | |
this.setLexCondition(this._lexConditions.INITIAL); | |
this.tokenType = null; | |
return cursor; | |
} | |
this.tokenType = "html-tag"; | |
this._condition.parseCondition = this._parseConditions.SCRIPT; | |
this._setExpectingAttribute(); | |
return cursor; | |
} | |
<INITIAL> ScriptEnd => TAG | |
{ | |
this.tokenType = "html-tag"; | |
this._condition.parseCondition = this._parseConditions.INITIAL; | |
return cursor; | |
} | |
<INITIAL> LT => TAG | |
{ | |
if (this._condition.parseCondition & this._parseConditions.SCRIPT) { | |
// Do not tokenize script tag contents, keep lexer state although processing "<". | |
this.setLexCondition(this._lexConditions.INITIAL); | |
this.tokenType = null; | |
return cursor; | |
} | |
this._condition.parseCondition = this._parseConditions.INITIAL; | |
this.tokenType = "html-tag"; | |
return cursor; | |
} | |
<TAG> GT => INITIAL | |
{ | |
this.tokenType = "html-tag"; | |
if (this._condition.parseCondition & this._parseConditions.SCRIPT) { | |
if (!this._internalJavaScriptTokenizer) { | |
this._internalJavaScriptTokenizer = WebInspector.SourceTokenizer.Registry.getInstance().getTokenizer("text/javascript"); | |
this._condition.internalJavaScriptTokenizerCondition = this._internalJavaScriptTokenizer.initialCondition; | |
} | |
// Do not tokenize script tag contents. | |
return cursor; | |
} | |
this._condition.parseCondition = this._parseConditions.INITIAL; | |
return cursor; | |
} | |
<TAG> StringLiteral { return this._stringToken(cursor, true); } | |
<TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); } | |
<DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); } | |
<DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); } | |
<TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); } | |
<SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); } | |
<SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); } | |
<TAG> EqualSign => TAG | |
{ | |
if (this._isExpectingAttribute()) | |
this._setExpectingAttributeValue(); | |
this.tokenType = null; | |
return cursor; | |
} | |
<TAG> Identifier | |
{ | |
if (this._condition.parseCondition === this._parseConditions.SCRIPT) { | |
// Fall through if expecting attributes. | |
this.tokenType = null; | |
return cursor; | |
} | |
if (this._condition.parseCondition === this._parseConditions.INITIAL) { | |
this.tokenType = "html-tag"; | |
this._setExpectingAttribute(); | |
var token = this._line.substring(cursorOnEnter, cursor); | |
if (token === "a") | |
this._condition.parseCondition |= this._parseConditions.A_NODE; | |
else if (this._condition.parseCondition & this._parseConditions.A_NODE) | |
this._condition.parseCondition ^= this._parseConditions.A_NODE; | |
} else if (this._isExpectingAttribute()) { | |
var token = this._line.substring(cursorOnEnter, cursor); | |
if (token === "href" || token === "src") | |
this._condition.parseCondition |= this._parseConditions.LINKIFY; | |
else if (this._condition.parseCondition |= this._parseConditions.LINKIFY) | |
this._condition.parseCondition ^= this._parseConditions.LINKIFY; | |
this.tokenType = "html-attribute-name"; | |
} else if (this._isExpectingAttributeValue()) | |
this.tokenType = this._attrValueTokenType(); | |
else | |
this.tokenType = null; | |
return cursor; | |
} | |
<*> [^] { this.tokenType = null; return cursor; } | |
*/ | |
} | |
} | |
} | |
WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; |