| /* It's an automatically generated code. Do not modify it. */ |
| package com.intellij.lexer; |
| |
| import com.intellij.psi.tree.IElementType; |
| import com.intellij.psi.*; |
| import com.intellij.psi.xml.*; |
| |
| %% |
| |
| %unicode |
| |
| %{ |
| public _HtmlLexer() { |
| this((java.io.Reader)null); |
| } |
| %} |
| |
| %class _HtmlLexer |
| %public |
| %implements FlexLexer |
| %function advance |
| %type IElementType |
| %eof{ return; |
| %eof} |
| |
| %state DOC_TYPE |
| %state COMMENT |
| %state START_TAG_NAME |
| %state END_TAG_NAME |
| %state TAG_ATTRIBUTES |
| %state ATTRIBUTE_VALUE_START |
| %state ATTRIBUTE_VALUE_DQ |
| %state ATTRIBUTE_VALUE_SQ |
| %state PROCESSING_INSTRUCTION |
| %state START_TAG_NAME2 |
| %state END_TAG_NAME2 |
| %state TAG_CHARACTERS |
| %state C_COMMENT_START |
| %state C_COMMENT_END |
| /* IMPORTANT! number of states should not exceed 16. See JspHighlightingLexer. */ |
| |
| ALPHA=[:letter:] |
| DIGIT=[0-9] |
| WHITE_SPACE_CHARS=[ \n\r\t\f]+ |
| |
| TAG_NAME=({ALPHA}|"_"|":")({ALPHA}|{DIGIT}|"_"|":"|"."|"-")* |
| TAG_NAME_FWT=("#")({ALPHA}|{DIGIT}|"_"|":"|"."|"-")* |
| /* see http://www.w3.org/TR/html5/syntax.html#syntax-attribute-name */ |
| ATTRIBUTE_NAME=({ALPHA}|"_"|":")([^ \n\r\t\f\"\'<>/=])* |
| |
| DTD_REF= "\"" [^\"]* "\"" | "'" [^']* "'" |
| DOCTYPE= "<!" (D|d)(O|o)(C|c)(T|t)(Y|y)(P|p)(E|e) |
| HTML= (H|h)(T|t)(M|m)(L|l) |
| PUBLIC= (P|p)(U|u)(B|b)(L|l)(I|i)(C|c) |
| EL_EMBEDDMENT="${" [^<\}]* "}" |
| |
| END_COMMENT="--"[ \n\r\t\f]*">" |
| |
| CONDITIONAL_COMMENT_CONDITION=({ALPHA})({ALPHA}|{WHITE_SPACE_CHARS}|{DIGIT}|"."|"("|")"|"|"|"!"|"&")* |
| %% |
| |
| <YYINITIAL> "<?" { yybegin(PROCESSING_INSTRUCTION); return XmlTokenType.XML_PI_START; } |
| <PROCESSING_INSTRUCTION> "?"? ">" { yybegin(YYINITIAL); return XmlTokenType.XML_PI_END; } |
| <PROCESSING_INSTRUCTION> ([^\?\>] | (\?[^\>]))* { return XmlTokenType.XML_PI_TARGET; } |
| |
| <YYINITIAL> {DOCTYPE} { yybegin(DOC_TYPE); return XmlTokenType.XML_DOCTYPE_START; } |
| <DOC_TYPE> {HTML} { return XmlTokenType.XML_NAME; } |
| <DOC_TYPE> {PUBLIC} { return XmlTokenType.XML_DOCTYPE_PUBLIC; } |
| <DOC_TYPE> {DTD_REF} { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;} |
| <DOC_TYPE> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_DOCTYPE_END; } |
| <YYINITIAL> {WHITE_SPACE_CHARS} { return XmlTokenType.XML_REAL_WHITE_SPACE; } |
| <DOC_TYPE,TAG_ATTRIBUTES,ATTRIBUTE_VALUE_START,PROCESSING_INSTRUCTION, START_TAG_NAME, END_TAG_NAME, END_TAG_NAME2, TAG_CHARACTERS> {WHITE_SPACE_CHARS} { return XmlTokenType.XML_WHITE_SPACE; } |
| <YYINITIAL> "<" {TAG_NAME} { yybegin(START_TAG_NAME); yypushback(yylength()); } |
| <YYINITIAL> "<" {TAG_NAME_FWT} { yybegin(START_TAG_NAME2); yypushback(yylength()); } |
| <START_TAG_NAME, START_TAG_NAME2, TAG_CHARACTERS> "<" { return XmlTokenType.XML_START_TAG_START; } |
| |
| <YYINITIAL> "</" {TAG_NAME} { yybegin(END_TAG_NAME); yypushback(yylength()); } |
| <YYINITIAL> "</" {TAG_NAME_FWT} { yybegin(END_TAG_NAME2); yypushback(yylength()); } |
| <YYINITIAL, END_TAG_NAME, END_TAG_NAME2> "</" { return XmlTokenType.XML_END_TAG_START; } |
| |
| <YYINITIAL> "<!--" { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_START; } |
| <COMMENT> "[" { yybegin(C_COMMENT_START); return XmlTokenType.XML_CONDITIONAL_COMMENT_START; } |
| <COMMENT> "<![" { yybegin(C_COMMENT_END); return XmlTokenType.XML_CONDITIONAL_COMMENT_END_START; } |
| <COMMENT> {END_COMMENT} { yybegin(YYINITIAL); return XmlTokenType.XML_COMMENT_END; } |
| <COMMENT> ">" { |
| // according to HTML spec (http://www.w3.org/html/wg/drafts/html/master/syntax.html#comments) |
| // comments should start with <!-- and end with --> thus making <!--> absolutely valid comment |
| // please note that it's not true for XML (http://www.w3.org/TR/REC-xml/#sec-comments) |
| int loc = getTokenStart(); |
| char prev = zzBuffer.charAt(loc - 1); |
| char prevPrev = zzBuffer.charAt(loc - 2); |
| if (prev == '-' && prevPrev == '-') { |
| yybegin(YYINITIAL); return XmlTokenType.XML_COMMENT_END; |
| } |
| return XmlTokenType.XML_COMMENT_CHARACTERS; |
| } |
| <COMMENT> [^] { return XmlTokenType.XML_COMMENT_CHARACTERS; } |
| |
| <C_COMMENT_START,C_COMMENT_END> {CONDITIONAL_COMMENT_CONDITION} { return XmlTokenType.XML_COMMENT_CHARACTERS; } |
| <C_COMMENT_START> [^] { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_CHARACTERS; } |
| <C_COMMENT_START> "]>" { yybegin(COMMENT); return XmlTokenType.XML_CONDITIONAL_COMMENT_START_END; } |
| <C_COMMENT_START,C_COMMENT_END> {END_COMMENT} { yybegin(YYINITIAL); return XmlTokenType.XML_COMMENT_END; } |
| <C_COMMENT_END> "]" { yybegin(COMMENT); return XmlTokenType.XML_CONDITIONAL_COMMENT_END; } |
| <C_COMMENT_END> [^] { yybegin(COMMENT); return XmlTokenType.XML_COMMENT_CHARACTERS; } |
| |
| <YYINITIAL> \\\$ { |
| return XmlTokenType.XML_DATA_CHARACTERS; |
| } |
| |
| <START_TAG_NAME, END_TAG_NAME> {TAG_NAME} { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_NAME; } |
| <END_TAG_NAME2> {TAG_NAME_FWT} { return XmlTokenType.XML_NAME; } |
| <START_TAG_NAME2> {TAG_NAME_FWT} { yybegin(TAG_CHARACTERS); return XmlTokenType.XML_NAME; } |
| |
| <TAG_ATTRIBUTES, END_TAG_NAME2, TAG_CHARACTERS> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_TAG_END; } |
| <TAG_ATTRIBUTES, TAG_CHARACTERS> "/>" { yybegin(YYINITIAL); return XmlTokenType.XML_EMPTY_ELEMENT_END; } |
| <TAG_ATTRIBUTES> {ATTRIBUTE_NAME} { return XmlTokenType.XML_NAME; } |
| <TAG_ATTRIBUTES> "=" { yybegin(ATTRIBUTE_VALUE_START); return XmlTokenType.XML_EQ; } |
| <TAG_ATTRIBUTES,START_TAG_NAME, END_TAG_NAME, END_TAG_NAME2> [^] { yybegin(YYINITIAL); yypushback(1); break; } |
| |
| <TAG_CHARACTERS> [^] { return XmlTokenType.XML_TAG_CHARACTERS; } |
| |
| <ATTRIBUTE_VALUE_START> ">" { yybegin(YYINITIAL); return XmlTokenType.XML_TAG_END; } |
| <ATTRIBUTE_VALUE_START> "/>" { yybegin(YYINITIAL); return XmlTokenType.XML_EMPTY_ELEMENT_END; } |
| |
| <ATTRIBUTE_VALUE_START> [^ \n\r\t\f'\"\>]([^ \n\r\t\f\>]|(\/[^\>]))* { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; } |
| <ATTRIBUTE_VALUE_START> "\"" { yybegin(ATTRIBUTE_VALUE_DQ); return XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER; } |
| <ATTRIBUTE_VALUE_START> "'" { yybegin(ATTRIBUTE_VALUE_SQ); return XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER; } |
| |
| <ATTRIBUTE_VALUE_DQ> { |
| "\"" { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER; } |
| \\\$ { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; } |
| [^] { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;} |
| } |
| |
| <ATTRIBUTE_VALUE_SQ> { |
| "'" { yybegin(TAG_ATTRIBUTES); return XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER; } |
| \\\$ { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN; } |
| [^] { return XmlTokenType.XML_ATTRIBUTE_VALUE_TOKEN;} |
| } |
| |
| "<" | |
| ">" | |
| "'" | |
| """ | |
| " " | |
| "&" | |
| "&#"{DIGIT}+";" | |
| "&#x"({DIGIT}|[a-fA-F])+";" { return XmlTokenType.XML_CHAR_ENTITY_REF; } |
| "&"{TAG_NAME}";" { return XmlTokenType.XML_ENTITY_REF_TOKEN; } |
| |
| <YYINITIAL> ([^<&\$# \n\r\t\f]|(\\\$)|(\\#))* { return XmlTokenType.XML_DATA_CHARACTERS; } |
| <YYINITIAL> [^] { return XmlTokenType.XML_DATA_CHARACTERS; } |
| [^] { return XmlTokenType.XML_BAD_CHARACTER; } |