blob: 3a8a9766c1f25a38f6a59178c9ddefbc0a73ea6e [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.analysis;
import org.antlr.misc.IntSet;
import org.antlr.misc.IntervalSet;
import org.antlr.tool.Grammar;
/** A state machine transition label. A label can be either a simple
* label such as a token or character. A label can be a set of char or
* tokens. It can be an epsilon transition. It can be a semantic predicate
* (which assumes an epsilon transition) or a tree of predicates (in a DFA).
* Special label types have to be < 0 to avoid conflict with char.
*/
public class Label implements Comparable, Cloneable {
public static final int INVALID = -7;
public static final int ACTION = -6;
public static final int EPSILON = -5;
public static final String EPSILON_STR = "<EPSILON>";
/** label is a semantic predicate; implies label is epsilon also */
public static final int SEMPRED = -4;
/** label is a set of tokens or char */
public static final int SET = -3;
/** End of Token is like EOF for lexer rules. It implies that no more
* characters are available and that NFA conversion should terminate
* for this path. For example
*
* A : 'a' 'b' | 'a' ;
*
* yields a DFA predictor:
*
* o-a->o-b->1 predict alt 1
* |
* |-EOT->o predict alt 2
*
* To generate code for EOT, treat it as the "default" path, which
* implies there is no way to mismatch a char for the state from
* which the EOT emanates.
*/
public static final int EOT = -2;
public static final int EOF = -1;
/** We have labels like EPSILON that are below 0; it's hard to
* store them in an array with negative index so use this
* constant as an index shift when accessing arrays based upon
* token type. If real token type is i, then array index would be
* NUM_FAUX_LABELS + i.
*/
public static final int NUM_FAUX_LABELS = -INVALID;
/** Anything at this value or larger can be considered a simple atom int
* for easy comparison during analysis only; faux labels are not used
* during parse time for real token types or char values.
*/
public static final int MIN_ATOM_VALUE = EOT;
// TODO: is 0 a valid unicode char? max is FFFF -1, right?
public static final int MIN_CHAR_VALUE = '\u0000';
public static final int MAX_CHAR_VALUE = '\uFFFF';
/** End of rule token type; imaginary token type used only for
* local, partial FOLLOW sets to indicate that the local FOLLOW
* hit the end of rule. During error recovery, the local FOLLOW
* of a token reference may go beyond the end of the rule and have
* to use FOLLOW(rule). I have to just shift the token types to 2..n
* rather than 1..n to accommodate this imaginary token in my bitsets.
* If I didn't use a bitset implementation for runtime sets, I wouldn't
* need this. EOF is another candidate for a run time token type for
* parsers. Follow sets are not computed for lexers so we do not have
* this issue.
*/
public static final int EOR_TOKEN_TYPE =
org.antlr.runtime.Token.EOR_TOKEN_TYPE;
public static final int DOWN = org.antlr.runtime.Token.DOWN;
public static final int UP = org.antlr.runtime.Token.UP;
/** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */
public static final int MIN_TOKEN_TYPE =
org.antlr.runtime.Token.MIN_TOKEN_TYPE;
/** The wildcard '.' char atom implies all valid characters==UNICODE */
//public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE);
/** The token type or character value; or, signifies special label. */
protected int label;
/** A set of token types or character codes if label==SET */
// TODO: try IntervalSet for everything
protected IntSet labelSet;
public Label(int label) {
this.label = label;
}
/** Make a set label */
public Label(IntSet labelSet) {
if ( labelSet==null ) {
this.label = SET;
this.labelSet = IntervalSet.of(INVALID);
return;
}
int singleAtom = labelSet.getSingleElement();
if ( singleAtom!=INVALID ) {
// convert back to a single atomic element if |labelSet|==1
label = singleAtom;
return;
}
this.label = SET;
this.labelSet = labelSet;
}
public Object clone() {
Label l;
try {
l = (Label)super.clone();
l.label = this.label;
l.labelSet = new IntervalSet();
l.labelSet.addAll(this.labelSet);
}
catch (CloneNotSupportedException e) {
throw new InternalError();
}
return l;
}
public void add(Label a) {
if ( isAtom() ) {
labelSet = IntervalSet.of(label);
label=SET;
if ( a.isAtom() ) {
labelSet.add(a.getAtom());
}
else if ( a.isSet() ) {
labelSet.addAll(a.getSet());
}
else {
throw new IllegalStateException("can't add element to Label of type "+label);
}
return;
}
if ( isSet() ) {
if ( a.isAtom() ) {
labelSet.add(a.getAtom());
}
else if ( a.isSet() ) {
labelSet.addAll(a.getSet());
}
else {
throw new IllegalStateException("can't add element to Label of type "+label);
}
return;
}
throw new IllegalStateException("can't add element to Label of type "+label);
}
public boolean isAtom() {
return label>=MIN_ATOM_VALUE;
}
public boolean isEpsilon() {
return label==EPSILON;
}
public boolean isSemanticPredicate() {
return false;
}
public boolean isAction() {
return false;
}
public boolean isSet() {
return label==SET;
}
/** return the single atom label or INVALID if not a single atom */
public int getAtom() {
if ( isAtom() ) {
return label;
}
return INVALID;
}
public IntSet getSet() {
if ( label!=SET ) {
// convert single element to a set if they ask for it.
return IntervalSet.of(label);
}
return labelSet;
}
public void setSet(IntSet set) {
label=SET;
labelSet = set;
}
public SemanticContext getSemanticContext() {
return null;
}
public boolean matches(int atom) {
if ( label==atom ) {
return true; // handle the single atom case efficiently
}
if ( isSet() ) {
return labelSet.member(atom);
}
return false;
}
public boolean matches(IntSet set) {
if ( isAtom() ) {
return set.member(getAtom());
}
if ( isSet() ) {
// matches if intersection non-nil
return !getSet().and(set).isNil();
}
return false;
}
public boolean matches(Label other) {
if ( other.isSet() ) {
return matches(other.getSet());
}
if ( other.isAtom() ) {
return matches(other.getAtom());
}
return false;
}
public int hashCode() {
if (label==SET) {
return labelSet.hashCode();
}
else {
return label;
}
}
// TODO: do we care about comparing set {A} with atom A? Doesn't now.
public boolean equals(Object o) {
if ( o==null ) {
return false;
}
if ( this == o ) {
return true; // equals if same object
}
// labels must be the same even if epsilon or set or sempred etc...
if ( label!=((Label)o).label ) {
return false;
}
if ( label==SET ) {
return this.labelSet.equals(((Label)o).labelSet);
}
return true; // label values are same, so true
}
public int compareTo(Object o) {
return this.label-((Label)o).label;
}
/** Predicates are lists of AST nodes from the NFA created from the
* grammar, but the same predicate could be cut/paste into multiple
* places in the grammar. I must compare the text of all the
* predicates to truly answer whether {p1,p2} .equals {p1,p2}.
* Unfortunately, I cannot rely on the AST.equals() to work properly
* so I must do a brute force O(n^2) nested traversal of the Set
* doing a String compare.
*
* At this point, Labels are not compared for equals when they are
* predicates, but here's the code for future use.
*/
/*
protected boolean predicatesEquals(Set others) {
Iterator iter = semanticContext.iterator();
while (iter.hasNext()) {
AST predAST = (AST) iter.next();
Iterator inner = semanticContext.iterator();
while (inner.hasNext()) {
AST otherPredAST = (AST) inner.next();
if ( !predAST.getText().equals(otherPredAST.getText()) ) {
return false;
}
}
}
return true;
}
*/
public String toString() {
switch (label) {
case SET :
return labelSet.toString();
default :
return String.valueOf(label);
}
}
public String toString(Grammar g) {
switch (label) {
case SET :
return labelSet.toString(g);
default :
return g.getTokenDisplayName(label);
}
}
/*
public String predicatesToString() {
if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) {
return "!other preds";
}
StringBuffer buf = new StringBuffer();
Iterator iter = semanticContext.iterator();
while (iter.hasNext()) {
AST predAST = (AST) iter.next();
buf.append(predAST.getText());
if ( iter.hasNext() ) {
buf.append("&");
}
}
return buf.toString();
}
*/
public static boolean intersect(Label label, Label edgeLabel) {
boolean hasIntersection = false;
boolean labelIsSet = label.isSet();
boolean edgeIsSet = edgeLabel.isSet();
if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) {
hasIntersection = true;
}
else if ( labelIsSet && edgeIsSet &&
!edgeLabel.getSet().and(label.getSet()).isNil() ) {
hasIntersection = true;
}
else if ( labelIsSet && !edgeIsSet &&
label.getSet().member(edgeLabel.label) ) {
hasIntersection = true;
}
else if ( !labelIsSet && edgeIsSet &&
edgeLabel.getSet().member(label.label) ) {
hasIntersection = true;
}
return hasIntersection;
}
}