blob: 55ab6d28fdd88d193cc4f1947c5452e19b1008d7 [file] [log] [blame]
/*
* Copyright 2000-2009 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @author max
*/
package com.intellij.lang.html;
import com.intellij.codeInsight.completion.CompletionUtilCore;
import com.intellij.codeInsight.daemon.XmlErrorMessages;
import com.intellij.lang.PsiBuilder;
import com.intellij.openapi.util.text.StringUtil;
import com.intellij.psi.tree.CustomParsingType;
import com.intellij.psi.tree.IElementType;
import com.intellij.psi.tree.ILazyParseableElementType;
import com.intellij.psi.xml.XmlElementType;
import com.intellij.psi.xml.XmlTokenType;
import com.intellij.util.containers.Stack;
import com.intellij.xml.util.HtmlUtil;
import org.jetbrains.annotations.NonNls;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class HtmlParsing {
@NonNls private static final String TR_TAG = "tr";
@NonNls private static final String TD_TAG = "td";
@NonNls private static final String DD_TAG = "dd";
@NonNls private static final String DT_TAG = "dt";
@NonNls private static final String TABLE_TAG = "table";
private final PsiBuilder myBuilder;
private final Stack<String> myTagNamesStack = new Stack<String>();
private final Stack<PsiBuilder.Marker> myTagMarkersStack = new Stack<PsiBuilder.Marker>();
@NonNls private static final String COMPLETION_NAME = CompletionUtilCore.DUMMY_IDENTIFIER_TRIMMED.toLowerCase();
public HtmlParsing(final PsiBuilder builder) {
myBuilder = builder;
}
public void parseDocument() {
final PsiBuilder.Marker document = mark();
while (token() == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
parseProlog();
PsiBuilder.Marker error = null;
while (!eof()) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_START_TAG_START) {
error = flushError(error);
parseTag("");
myTagMarkersStack.clear();
myTagNamesStack.clear();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
error = flushError(error);
parseComment();
}
else if (tt == XmlTokenType.XML_PI_START) {
error = flushError(error);
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE || tt == XmlTokenType.XML_DATA_CHARACTERS) {
error = flushError(error);
advance();
} else if (tt == XmlTokenType.XML_END_TAG_START) {
final PsiBuilder.Marker tagEndError = myBuilder.mark();
advance();
if (token() == XmlTokenType.XML_NAME) {
advance();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
}
tagEndError.error(XmlErrorMessages.message("xml.parsing.closing.tag.matches.nothing"));
}
else {
if (error == null) error = mark();
advance();
}
}
if (error != null) {
error.error(XmlErrorMessages.message("top.level.element.is.not.completed"));
}
document.done(XmlElementType.HTML_DOCUMENT);
}
@Nullable
private static PsiBuilder.Marker flushError(PsiBuilder.Marker error) {
if (error != null) {
error.error(XmlErrorMessages.message("xml.parsing.unexpected.tokens"));
error = null;
}
return error;
}
private void parseDoctype() {
assert token() == XmlTokenType.XML_DOCTYPE_START : "Doctype start expected";
final PsiBuilder.Marker doctype = mark();
advance();
while (token() != XmlTokenType.XML_DOCTYPE_END && !eof()) advance();
if (eof()) {
error(XmlErrorMessages.message("xml.parsing.unexpected.end.of.file"));
}
else {
advance();
}
doctype.done(XmlElementType.XML_DOCTYPE);
}
private static boolean ddordt(String name) {
return DT_TAG.equals(name) || DD_TAG.equals(name);
}
private boolean parseTag(String parentName) {
assert token() == XmlTokenType.XML_START_TAG_START : "Tag start expected";
final PsiBuilder.Marker tag = mark();
myTagMarkersStack.push(tag);
// Start tag header
advance();
final String originalTagName;
if (token() != XmlTokenType.XML_NAME) {
error(XmlErrorMessages.message("xml.parsing.tag.name.expected"));
originalTagName = "";
}
else {
originalTagName = myBuilder.getTokenText();
advance();
}
String tagName = StringUtil.toLowerCase(originalTagName);
if (ddordt(tagName) && ddordt(parentName) ||
tagName.equals(parentName) && HtmlUtil.isOptionalEndForHtmlTagL(tagName) ||
myTagMarkersStack.size() > MAGIC_FRAME_COUNT // no chance for evil guys wanting us to have stack overflow
) {
tag.rollbackTo();
myTagMarkersStack.pop();
return false;
}
myTagNamesStack.push(tagName);
boolean freeMakerTag = !tagName.isEmpty() && '#' == tagName.charAt(0);
do {
final IElementType tt = token();
if (freeMakerTag) {
if (tt == XmlTokenType.XML_EMPTY_ELEMENT_END ||
tt == XmlTokenType.XML_TAG_END ||
tt == XmlTokenType.XML_END_TAG_START ||
tt == XmlTokenType.XML_START_TAG_START) break;
advance();
}
else {
if (tt == XmlTokenType.XML_NAME) {
parseAttribute();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else {
break;
}
}
}
while (!eof());
if (token() == XmlTokenType.XML_EMPTY_ELEMENT_END) {
advance();
tag.done(XmlElementType.HTML_TAG);
return true;
}
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
else {
error(XmlErrorMessages.message("tag.start.is.not.closed"));
tag.done(XmlElementType.HTML_TAG);
return true;
}
if (HtmlUtil.isSingleHtmlTagL(tagName)) {
final PsiBuilder.Marker footer = mark();
if (token() == XmlTokenType.XML_END_TAG_START) {
advance();
if (token() == XmlTokenType.XML_NAME) {
if (tagName.equalsIgnoreCase(myBuilder.getTokenText())) {
advance();
footer.drop();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
tag.done(XmlElementType.HTML_TAG);
return true;
}
}
}
footer.rollbackTo();
tag.done(XmlElementType.HTML_TAG);
return true;
}
// Done header, start content
boolean isInlineTagContainer = HtmlUtil.isInlineTagContainerL(tagName);
boolean isOptionalTagEnd = HtmlUtil.isOptionalEndForHtmlTagL(tagName);
PsiBuilder.Marker firstBlockChild = null;
PsiBuilder.Marker xmlText = null;
while (!eof()) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_START_TAG_START) {
xmlText = terminateText(xmlText);
if (!parseTag(tagName)) {
tag.done(XmlElementType.HTML_TAG);
return true;
}
PsiBuilder.Marker childMarker = myTagMarkersStack.pop();
String childName = myTagNamesStack.pop();
if (isOptionalTagEnd) {
boolean foundMatch = childTerminatesParentInStack(childName, true);
if (foundMatch) {
// allow only one promotion per tag, otherwise last row in table
// will make it up to the first one moving all tags in between under first node
if (!canTerminate(childName, tagName)) {
myTagMarkersStack.pop();
myTagNamesStack.pop();
myTagMarkersStack.push(childMarker);
myTagNamesStack.push(childName);
}
tag.doneBefore(XmlElementType.HTML_TAG, childMarker);
return true;
}
}
if (isInlineTagContainer && HtmlUtil.isHtmlBlockTagL(childName) && isOptionalTagEnd && !HtmlUtil.isPossiblyInlineTag(childName)) {
tag.doneBefore(XmlElementType.HTML_TAG, childMarker);
return true;
}
else if (isOptionalTagEnd && firstBlockChild == null && HtmlUtil.isHtmlBlockTagL(childName) && !HtmlUtil.isHtmlBlockTagL(tagName) && canTerminate(childName, tagName)) {
firstBlockChild = childMarker;
}
}
else if (tt == XmlTokenType.XML_PI_START) {
xmlText = terminateText(xmlText);
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
xmlText = terminateText(xmlText);
parseReference();
}
else if (tt == XmlTokenType.XML_CHAR_ENTITY_REF) {
xmlText = startText(xmlText);
parseReference();
}
else if (tt == XmlTokenType.XML_CDATA_START) {
xmlText = startText(xmlText);
parseCData();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
xmlText = startText(xmlText);
parseComment();
}
else if (tt == XmlTokenType.XML_BAD_CHARACTER) {
xmlText = startText(xmlText);
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
}
else if (tt instanceof CustomParsingType || tt instanceof ILazyParseableElementType) {
xmlText = terminateText(xmlText);
advance();
}
else if (token() == XmlTokenType.XML_END_TAG_START) {
xmlText = terminateText(xmlText);
final PsiBuilder.Marker footer = mark();
advance();
if (token() == XmlTokenType.XML_NAME) {
String endName = StringUtil.toLowerCase(myBuilder.getTokenText());
if (!tagName.equals(endName) && !endName.endsWith(COMPLETION_NAME)) {
final boolean hasChancesToMatch = HtmlUtil.isOptionalEndForHtmlTagL(endName) ? childTerminatesParentInStack(endName, false) : myTagNamesStack.contains(endName);
if (hasChancesToMatch) {
footer.rollbackTo();
if (isOptionalTagEnd) {
if (firstBlockChild != null) {
tag.doneBefore(XmlElementType.HTML_TAG, firstBlockChild);
}
else {
tag.done(XmlElementType.HTML_TAG);
}
}
else {
error(XmlErrorMessages.message("named.element.is.not.closed", originalTagName));
tag.done(XmlElementType.HTML_TAG);
}
return true;
}
else {
advance();
if (token() == XmlTokenType.XML_TAG_END) advance();
footer.error(XmlErrorMessages.message("xml.parsing.closing.tag.matches.nothing"));
continue;
}
}
advance();
while (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_START_TAG_START && token() != XmlTokenType.XML_END_TAG_START && !eof()) {
error(XmlErrorMessages.message("xml.parsing.unexpected.token"));
advance();
}
}
else {
error(XmlErrorMessages.message("xml.parsing.closing.tag.name.missing"));
}
footer.drop();
if (token() == XmlTokenType.XML_TAG_END) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.closing.tag.is.not.done"));
}
tag.done(XmlElementType.HTML_TAG);
return true;
}
else {
xmlText = startText(xmlText);
advance();
}
}
terminateText(xmlText);
if (isOptionalTagEnd || "body".equalsIgnoreCase(tagName) || "html".equalsIgnoreCase(tagName)) {
if (firstBlockChild != null) {
tag.doneBefore(XmlElementType.HTML_TAG, firstBlockChild);
}
else {
tag.done(XmlElementType.HTML_TAG);
}
}
else {
error(XmlErrorMessages.message("named.element.is.not.closed", originalTagName));
tag.done(XmlElementType.HTML_TAG);
}
return true;
}
private static boolean canTerminate(final String childTagName,final String tagName) {
return childTagName.equals(tagName) || HtmlUtil.canTerminate(childTagName, tagName);
}
private boolean childTerminatesParentInStack(final String childName, final boolean terminateOnNonOptionalTag) {
boolean isCell = TD_TAG.equals(childName) || "th".equals(childName);
boolean isRow = TR_TAG.equals(childName);
for (int i = myTagNamesStack.size() - 1; i >= 0; i--) {
String parentName = myTagNamesStack.get(i);
if (terminateOnNonOptionalTag && !HtmlUtil.isOptionalEndForHtmlTagL(parentName)) return false;
if (isCell && (TR_TAG.equals(parentName) || TABLE_TAG.equals(parentName)) ||
isRow && TABLE_TAG.equals(parentName)) {
return false;
}
if (canTerminate(childName, parentName)) {
return true;
}
}
return false;
}
@NotNull
private PsiBuilder.Marker startText(@Nullable PsiBuilder.Marker xmlText) {
if (xmlText == null) {
xmlText = mark();
assert xmlText != null;
}
return xmlText;
}
protected final PsiBuilder.Marker mark() {
return myBuilder.mark();
}
@Nullable
private static PsiBuilder.Marker terminateText(@Nullable PsiBuilder.Marker xmlText) {
if (xmlText != null) {
xmlText.done(XmlElementType.XML_TEXT);
xmlText = null;
}
return xmlText;
}
private void parseCData() {
assert token() == XmlTokenType.XML_CDATA_START;
final PsiBuilder.Marker cdata = mark();
while (token() != XmlTokenType.XML_CDATA_END && !eof()) {
advance();
}
if (!eof()) {
advance();
}
cdata.done(XmlElementType.XML_CDATA);
}
protected void parseComment() {
final PsiBuilder.Marker comment = mark();
advance();
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_COMMENT_CHARACTERS || tt == XmlTokenType.XML_CHAR_ENTITY_REF || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START
|| tt == XmlTokenType.XML_CONDITIONAL_COMMENT_START_END || tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END_START
|| tt == XmlTokenType.XML_CONDITIONAL_COMMENT_END) {
advance();
continue;
}
if (tt == XmlTokenType.XML_BAD_CHARACTER) {
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("xml.parsing.bad.character"));
continue;
}
if (tt == XmlTokenType.XML_COMMENT_END) {
advance();
}
break;
}
comment.done(XmlElementType.XML_COMMENT);
}
private void parseReference() {
if (token() == XmlTokenType.XML_CHAR_ENTITY_REF) {
advance();
}
else if (token() == XmlTokenType.XML_ENTITY_REF_TOKEN) {
final PsiBuilder.Marker ref = mark();
advance();
ref.done(XmlElementType.XML_ENTITY_REF);
}
else {
assert false : "Unexpected token";
}
}
private void parseAttribute() {
assert token() == XmlTokenType.XML_NAME;
final PsiBuilder.Marker att = mark();
advance();
if (token() == XmlTokenType.XML_EQ) {
advance();
parseAttributeValue();
att.done(XmlElementType.XML_ATTRIBUTE);
}
else {
att.done(XmlElementType.XML_ATTRIBUTE);
}
}
private void parseAttributeValue() {
final PsiBuilder.Marker attValue = mark();
if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_START_DELIMITER) {
while (true) {
final IElementType tt = token();
if (tt == null || tt == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER || tt == XmlTokenType.XML_END_TAG_START || tt == XmlTokenType
.XML_EMPTY_ELEMENT_END ||
tt == XmlTokenType.XML_START_TAG_START) {
break;
}
if (tt == XmlTokenType.XML_BAD_CHARACTER) {
final PsiBuilder.Marker error = mark();
advance();
error.error(XmlErrorMessages.message("unescaped.ampersand.or.nonterminated.character.entity.reference"));
}
else if (tt == XmlTokenType.XML_ENTITY_REF_TOKEN) {
parseReference();
}
else {
advance();
}
}
if (token() == XmlTokenType.XML_ATTRIBUTE_VALUE_END_DELIMITER) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.unclosed.attribute.value"));
}
}
else {
if (token() != XmlTokenType.XML_TAG_END && token() != XmlTokenType.XML_EMPTY_ELEMENT_END) {
advance(); // Single token att value
}
}
attValue.done(XmlElementType.XML_ATTRIBUTE_VALUE);
}
private void parseProlog() {
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
advance();
}
else {
break;
}
}
final PsiBuilder.Marker prolog = mark();
while (true) {
final IElementType tt = token();
if (tt == XmlTokenType.XML_PI_START) {
parseProcessingInstruction();
}
else if (tt == XmlTokenType.XML_DOCTYPE_START) {
parseDoctype();
}
else if (tt == XmlTokenType.XML_COMMENT_START) {
parseComment();
}
else if (tt == XmlTokenType.XML_REAL_WHITE_SPACE) {
advance();
}
else {
break;
}
}
prolog.done(XmlElementType.XML_PROLOG);
}
private void parseProcessingInstruction() {
assert token() == XmlTokenType.XML_PI_START;
final PsiBuilder.Marker pi = mark();
advance();
if (token() == XmlTokenType.XML_NAME || token() == XmlTokenType.XML_PI_TARGET) {
advance();
}
while (token() == XmlTokenType.XML_NAME) {
advance();
if (token() == XmlTokenType.XML_EQ) {
advance();
}
else {
error(XmlErrorMessages.message("expected.attribute.eq.sign"));
}
parseAttributeValue();
}
if (token() == XmlTokenType.XML_PI_END) {
advance();
}
else {
error(XmlErrorMessages.message("xml.parsing.unterminated.processing.instruction"));
}
pi.done(XmlElementType.XML_PROCESSING_INSTRUCTION);
}
protected final IElementType token() {
return myBuilder.getTokenType();
}
protected final boolean eof() {
return myBuilder.eof();
}
protected final void advance() {
myBuilder.advanceLexer();
}
private void error(final String message) {
myBuilder.error(message);
}
private static final int MAGIC_FRAME_COUNT = 450;
}