| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.analysis; |
| |
| import org.antlr.misc.IntSet; |
| import org.antlr.misc.IntervalSet; |
| import org.antlr.tool.Grammar; |
| |
| /** A state machine transition label. A label can be either a simple |
| * label such as a token or character. A label can be a set of char or |
| * tokens. It can be an epsilon transition. It can be a semantic predicate |
| * (which assumes an epsilon transition) or a tree of predicates (in a DFA). |
| * Special label types have to be < 0 to avoid conflict with char. |
| */ |
| public class Label implements Comparable, Cloneable { |
| public static final int INVALID = -7; |
| |
| public static final int ACTION = -6; |
| |
| public static final int EPSILON = -5; |
| |
| public static final String EPSILON_STR = "<EPSILON>"; |
| |
| /** label is a semantic predicate; implies label is epsilon also */ |
| public static final int SEMPRED = -4; |
| |
| /** label is a set of tokens or char */ |
| public static final int SET = -3; |
| |
| /** End of Token is like EOF for lexer rules. It implies that no more |
| * characters are available and that NFA conversion should terminate |
| * for this path. For example |
| * |
| * A : 'a' 'b' | 'a' ; |
| * |
| * yields a DFA predictor: |
| * |
| * o-a->o-b->1 predict alt 1 |
| * | |
| * |-EOT->o predict alt 2 |
| * |
| * To generate code for EOT, treat it as the "default" path, which |
| * implies there is no way to mismatch a char for the state from |
| * which the EOT emanates. |
| */ |
| public static final int EOT = -2; |
| |
| public static final int EOF = -1; |
| |
| /** We have labels like EPSILON that are below 0; it's hard to |
| * store them in an array with negative index so use this |
| * constant as an index shift when accessing arrays based upon |
| * token type. If real token type is i, then array index would be |
| * NUM_FAUX_LABELS + i. |
| */ |
| public static final int NUM_FAUX_LABELS = -INVALID; |
| |
| /** Anything at this value or larger can be considered a simple atom int |
| * for easy comparison during analysis only; faux labels are not used |
| * during parse time for real token types or char values. |
| */ |
| public static final int MIN_ATOM_VALUE = EOT; |
| |
| // TODO: is 0 a valid unicode char? max is FFFF -1, right? |
| public static final int MIN_CHAR_VALUE = '\u0000'; |
| public static final int MAX_CHAR_VALUE = '\uFFFF'; |
| |
| /** End of rule token type; imaginary token type used only for |
| * local, partial FOLLOW sets to indicate that the local FOLLOW |
| * hit the end of rule. During error recovery, the local FOLLOW |
| * of a token reference may go beyond the end of the rule and have |
| * to use FOLLOW(rule). I have to just shift the token types to 2..n |
| * rather than 1..n to accommodate this imaginary token in my bitsets. |
| * If I didn't use a bitset implementation for runtime sets, I wouldn't |
| * need this. EOF is another candidate for a run time token type for |
| * parsers. Follow sets are not computed for lexers so we do not have |
| * this issue. |
| */ |
| public static final int EOR_TOKEN_TYPE = |
| org.antlr.runtime.Token.EOR_TOKEN_TYPE; |
| |
| public static final int DOWN = org.antlr.runtime.Token.DOWN; |
| public static final int UP = org.antlr.runtime.Token.UP; |
| |
| /** tokens and char range overlap; tokens are MIN_TOKEN_TYPE..n */ |
| public static final int MIN_TOKEN_TYPE = |
| org.antlr.runtime.Token.MIN_TOKEN_TYPE; |
| |
| /** The wildcard '.' char atom implies all valid characters==UNICODE */ |
| //public static final IntSet ALLCHAR = IntervalSet.of(MIN_CHAR_VALUE,MAX_CHAR_VALUE); |
| |
| /** The token type or character value; or, signifies special label. */ |
| protected int label; |
| |
| /** A set of token types or character codes if label==SET */ |
| // TODO: try IntervalSet for everything |
| protected IntSet labelSet; |
| |
| public Label(int label) { |
| this.label = label; |
| } |
| |
| /** Make a set label */ |
| public Label(IntSet labelSet) { |
| if ( labelSet==null ) { |
| this.label = SET; |
| this.labelSet = IntervalSet.of(INVALID); |
| return; |
| } |
| int singleAtom = labelSet.getSingleElement(); |
| if ( singleAtom!=INVALID ) { |
| // convert back to a single atomic element if |labelSet|==1 |
| label = singleAtom; |
| return; |
| } |
| this.label = SET; |
| this.labelSet = labelSet; |
| } |
| |
| public Object clone() { |
| Label l; |
| try { |
| l = (Label)super.clone(); |
| l.label = this.label; |
| l.labelSet = new IntervalSet(); |
| l.labelSet.addAll(this.labelSet); |
| } |
| catch (CloneNotSupportedException e) { |
| throw new InternalError(); |
| } |
| return l; |
| } |
| |
| public void add(Label a) { |
| if ( isAtom() ) { |
| labelSet = IntervalSet.of(label); |
| label=SET; |
| if ( a.isAtom() ) { |
| labelSet.add(a.getAtom()); |
| } |
| else if ( a.isSet() ) { |
| labelSet.addAll(a.getSet()); |
| } |
| else { |
| throw new IllegalStateException("can't add element to Label of type "+label); |
| } |
| return; |
| } |
| if ( isSet() ) { |
| if ( a.isAtom() ) { |
| labelSet.add(a.getAtom()); |
| } |
| else if ( a.isSet() ) { |
| labelSet.addAll(a.getSet()); |
| } |
| else { |
| throw new IllegalStateException("can't add element to Label of type "+label); |
| } |
| return; |
| } |
| throw new IllegalStateException("can't add element to Label of type "+label); |
| } |
| |
| public boolean isAtom() { |
| return label>=MIN_ATOM_VALUE; |
| } |
| |
| public boolean isEpsilon() { |
| return label==EPSILON; |
| } |
| |
| public boolean isSemanticPredicate() { |
| return false; |
| } |
| |
| public boolean isAction() { |
| return false; |
| } |
| |
| public boolean isSet() { |
| return label==SET; |
| } |
| |
| /** return the single atom label or INVALID if not a single atom */ |
| public int getAtom() { |
| if ( isAtom() ) { |
| return label; |
| } |
| return INVALID; |
| } |
| |
| public IntSet getSet() { |
| if ( label!=SET ) { |
| // convert single element to a set if they ask for it. |
| return IntervalSet.of(label); |
| } |
| return labelSet; |
| } |
| |
| public void setSet(IntSet set) { |
| label=SET; |
| labelSet = set; |
| } |
| |
| public SemanticContext getSemanticContext() { |
| return null; |
| } |
| |
| public boolean matches(int atom) { |
| if ( label==atom ) { |
| return true; // handle the single atom case efficiently |
| } |
| if ( isSet() ) { |
| return labelSet.member(atom); |
| } |
| return false; |
| } |
| |
| public boolean matches(IntSet set) { |
| if ( isAtom() ) { |
| return set.member(getAtom()); |
| } |
| if ( isSet() ) { |
| // matches if intersection non-nil |
| return !getSet().and(set).isNil(); |
| } |
| return false; |
| } |
| |
| |
| public boolean matches(Label other) { |
| if ( other.isSet() ) { |
| return matches(other.getSet()); |
| } |
| if ( other.isAtom() ) { |
| return matches(other.getAtom()); |
| } |
| return false; |
| } |
| |
| public int hashCode() { |
| if (label==SET) { |
| return labelSet.hashCode(); |
| } |
| else { |
| return label; |
| } |
| } |
| |
| // TODO: do we care about comparing set {A} with atom A? Doesn't now. |
| public boolean equals(Object o) { |
| if ( o==null ) { |
| return false; |
| } |
| if ( this == o ) { |
| return true; // equals if same object |
| } |
| // labels must be the same even if epsilon or set or sempred etc... |
| if ( label!=((Label)o).label ) { |
| return false; |
| } |
| if ( label==SET ) { |
| return this.labelSet.equals(((Label)o).labelSet); |
| } |
| return true; // label values are same, so true |
| } |
| |
| public int compareTo(Object o) { |
| return this.label-((Label)o).label; |
| } |
| |
| /** Predicates are lists of AST nodes from the NFA created from the |
| * grammar, but the same predicate could be cut/paste into multiple |
| * places in the grammar. I must compare the text of all the |
| * predicates to truly answer whether {p1,p2} .equals {p1,p2}. |
| * Unfortunately, I cannot rely on the AST.equals() to work properly |
| * so I must do a brute force O(n^2) nested traversal of the Set |
| * doing a String compare. |
| * |
| * At this point, Labels are not compared for equals when they are |
| * predicates, but here's the code for future use. |
| */ |
| /* |
| protected boolean predicatesEquals(Set others) { |
| Iterator iter = semanticContext.iterator(); |
| while (iter.hasNext()) { |
| AST predAST = (AST) iter.next(); |
| Iterator inner = semanticContext.iterator(); |
| while (inner.hasNext()) { |
| AST otherPredAST = (AST) inner.next(); |
| if ( !predAST.getText().equals(otherPredAST.getText()) ) { |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| */ |
| |
| public String toString() { |
| switch (label) { |
| case SET : |
| return labelSet.toString(); |
| default : |
| return String.valueOf(label); |
| } |
| } |
| |
| public String toString(Grammar g) { |
| switch (label) { |
| case SET : |
| return labelSet.toString(g); |
| default : |
| return g.getTokenDisplayName(label); |
| } |
| } |
| |
| /* |
| public String predicatesToString() { |
| if ( semanticContext==NFAConfiguration.DEFAULT_CLAUSE_SEMANTIC_CONTEXT ) { |
| return "!other preds"; |
| } |
| StringBuffer buf = new StringBuffer(); |
| Iterator iter = semanticContext.iterator(); |
| while (iter.hasNext()) { |
| AST predAST = (AST) iter.next(); |
| buf.append(predAST.getText()); |
| if ( iter.hasNext() ) { |
| buf.append("&"); |
| } |
| } |
| return buf.toString(); |
| } |
| */ |
| |
| public static boolean intersect(Label label, Label edgeLabel) { |
| boolean hasIntersection = false; |
| boolean labelIsSet = label.isSet(); |
| boolean edgeIsSet = edgeLabel.isSet(); |
| if ( !labelIsSet && !edgeIsSet && edgeLabel.label==label.label ) { |
| hasIntersection = true; |
| } |
| else if ( labelIsSet && edgeIsSet && |
| !edgeLabel.getSet().and(label.getSet()).isNil() ) { |
| hasIntersection = true; |
| } |
| else if ( labelIsSet && !edgeIsSet && |
| label.getSet().member(edgeLabel.label) ) { |
| hasIntersection = true; |
| } |
| else if ( !labelIsSet && edgeIsSet && |
| edgeLabel.getSet().member(label.label) ) { |
| hasIntersection = true; |
| } |
| return hasIntersection; |
| } |
| } |