blob: 728e0203efd002c4e0575786df2f3072136ea2d4 [file] [log] [blame]
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* JFlex 1.4.3 *
* Copyright (C) 1998-2009 Gerwin Klein <lsf@jflex.de> *
* All rights reserved. *
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License. See the file *
* COPYRIGHT for more information. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License along *
* with this program; if not, write to the Free Software Foundation, Inc., *
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *
* *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
package JFlex;
import java.util.*;
import java.io.*;
/**
* NFA representation in JFlex.
*
* Contains algorithms RegExp -> NFA and NFA -> DFA.
*
* @author Gerwin Klein
* @version $Revision: 1.4.3 $, $Date: 2009/12/21 15:58:48 $
*/
final public class NFA {
/** table[current_state][next_char] is the set of states that can be reached
/* from current_state with an input next_char */
StateSet [][] table;
/** epsilon[current_state] is the set of states that can be reached
/* from current_state via epsilon edges */
StateSet [] epsilon;
/** isFinal[state] == true <=> state is a final state of the NFA */
boolean [] isFinal;
/** action[current_state]: the action associated with the state
/* current_state (null, if there is no action for the state) */
Action [] action;
/** the number of states in this NFA */
int numStates;
/** the current maximum number of input characters */
int numInput;
/** the number of lexical States. Lexical states have the indices
/* 0..numLexStates-1 in the transition table */
int numLexStates;
/** estimated size of the NFA (before actual construction) */
int estSize = 256;
Macros macros;
CharClasses classes;
LexScan scanner;
RegExps regExps;
// will be reused by several methods (avoids excessive object creation)
private static StateSetEnumerator states = new StateSetEnumerator();
private static StateSet tempStateSet = new StateSet();
public NFA(int numInput, int estSize) {
this.numInput = numInput;
this.estSize = estSize;
numStates = 0;
epsilon = new StateSet [estSize];
action = new Action [estSize];
isFinal = new boolean [estSize];
table = new StateSet [estSize][numInput];
}
/**
* Construct new NFA.
*
* Assumes that lookahead cases and numbers are already resolved in RegExps.
* @see RegExps#checkLookAheads()
*/
public NFA(int numInput, LexScan scanner, RegExps regExps,
Macros macros, CharClasses classes) {
this(numInput, regExps.NFASize(macros)+2*scanner.states.number());
this.scanner = scanner;
this.regExps = regExps;
this.macros = macros;
this.classes = classes;
numLexStates = scanner.states.number();
// ensureCapacity assumes correctly set up numStates.
int new_num = numEntryStates();
ensureCapacity(new_num);
numStates = new_num;
}
public int numEntryStates() {
return 2*(numLexStates+regExps.gen_look_count);
}
/**
* Add a standalone rule that has minimum priority, fires a transition
* on all single input characters and has a "print yytext" action.
*/
public void addStandaloneRule() {
int start = numStates;
int end = numStates+1;
for (int c = 0; c < classes.getNumClasses(); c++)
addTransition(start, c, end);
for (int i = 0; i < numLexStates*2; i++)
addEpsilonTransition(i, start);
action[end] = new Action("System.out.print(yytext());", Integer.MAX_VALUE);
isFinal[end] = true;
}
/**
* Add a regexp to this NFA.
*
* @param regExpNum the number of the regexp to add.
*/
public void addRegExp(int regExpNum) {
if (Options.DEBUG)
Out.debug("Adding nfa for regexp "+regExpNum+" :"+Out.NL+regExps.getRegExp(regExpNum));
IntPair nfa = insertNFA( regExps.getRegExp(regExpNum) );
Enumeration lexStates = regExps.getStates(regExpNum).elements();
if ( !lexStates.hasMoreElements() )
lexStates = scanner.states.getInclusiveStates();
while ( lexStates.hasMoreElements() ) {
int stateNum = ((Integer)lexStates.nextElement()).intValue();
if ( !regExps.isBOL(regExpNum) )
addEpsilonTransition(2*stateNum, nfa.start);
addEpsilonTransition(2*stateNum+1, nfa.start);
}
if ( regExps.getLookAhead(regExpNum) != null ) {
Action a = regExps.getAction(regExpNum);
if (a.lookAhead() == Action.FINITE_CHOICE) {
insertLookAheadChoices(nfa.end, a, regExps.getLookAhead(regExpNum));
// remove the original action from the collection: it will never
// be matched directly, only its copies will.
scanner.actions.remove(a);
}
else {
RegExp r1 = regExps.getRegExp(regExpNum);
RegExp r2 = regExps.getLookAhead(regExpNum);
IntPair look = insertNFA(r2);
addEpsilonTransition(nfa.end, look.start);
action[look.end] = a;
isFinal[look.end] = true;
if (a.lookAhead() == Action.GENERAL_LOOK) {
// base forward pass
IntPair forward = insertNFA(r1);
// lookahead backward pass
IntPair backward = insertNFA(r2.rev(macros));
isFinal[forward.end] = true;
action[forward.end] = new Action(Action.FORWARD_ACTION);
isFinal[backward.end] = true;
action[backward.end] = new Action(Action.BACKWARD_ACTION);
int entry = 2*(regExps.getLookEntry(regExpNum) + numLexStates);
addEpsilonTransition(entry, forward.start);
addEpsilonTransition(entry+1, backward.start);
a.setEntryState(entry);
}
}
}
else {
action[nfa.end] = regExps.getAction(regExpNum);
isFinal[nfa.end] = true;
}
}
/**
* Insert NFAs for the (finitely many) fixed length lookahead choices.
*
* @param lookAhead a lookahead of which isFiniteChoice is true
* @param baseEnd the end state of the base expression NFA
* @param a the action of the expression
*
* @see SemCheck#isFiniteChoice(RegExp)
*/
private void insertLookAheadChoices(int baseEnd, Action a, RegExp lookAhead) {
if (lookAhead.type == sym.BAR) {
RegExp2 r = (RegExp2) lookAhead;
insertLookAheadChoices(baseEnd, a, r.r1);
insertLookAheadChoices(baseEnd, a, r.r2);
}
else if (lookAhead.type == sym.MACROUSE) {
RegExp1 r = (RegExp1) lookAhead;
insertLookAheadChoices(baseEnd, a, macros.getDefinition((String) r.content));
}
else {
int len = SemCheck.length(lookAhead);
if (len >= 0) {
// termination case
IntPair look = insertNFA(lookAhead);
addEpsilonTransition(baseEnd, look.start);
Action x = a.copyChoice(len);
action[look.end] = x;
isFinal[look.end] = true;
// add new copy to the collection of known actions such that
// it can be checked for the NEVER_MATCH warning.
scanner.actions.add(x);
}
else {
// should never happen
throw new Error("When inserting lookahead expression: unkown expression type "+lookAhead.type+" in "+lookAhead); //$NON-NLS-1$ //$NON-NLS-2$
}
}
}
/**
* Make sure the NFA can contain at least newNumStates states.
*
* @param newNumStates the minimu number of states.
*/
private void ensureCapacity(int newNumStates) {
int oldLength = epsilon.length;
if ( newNumStates < oldLength ) return;
int newStatesLength = Math.max(oldLength*2, newNumStates);
boolean [] newFinal = new boolean [newStatesLength];
boolean [] newIsPush = new boolean [newStatesLength];
Action [] newAction = new Action [newStatesLength];
StateSet [] [] newTable = new StateSet [newStatesLength] [numInput];
StateSet [] newEpsilon = new StateSet [newStatesLength];
System.arraycopy(isFinal,0,newFinal,0,numStates);
System.arraycopy(action,0,newAction,0,numStates);
System.arraycopy(epsilon,0,newEpsilon,0,numStates);
System.arraycopy(table,0,newTable,0,numStates);
isFinal = newFinal;
action = newAction;
epsilon = newEpsilon;
table = newTable;
}
public void addTransition(int start, int input, int dest) {
Out.debug("Adding transition ("+start+", "+input+", "+dest+")");
int maxS = Math.max(start,dest)+1;
ensureCapacity( maxS );
if (maxS > numStates) numStates = maxS;
if ( table[start][input] != null )
table[start][input].addState(dest);
else
table[start][input] = new StateSet(estSize,dest);
}
public void addEpsilonTransition(int start, int dest) {
int max = Math.max(start,dest)+1;
ensureCapacity( max );
if (max > numStates) numStates = max;
if (epsilon[start] != null)
epsilon[start].addState(dest);
else
epsilon[start] = new StateSet(estSize,dest);
}
/**
* Returns <code>true</code>, iff the specified set of states
* contains a final state.
*
* @param set the set of states that is tested for final states.
*/
private boolean containsFinal(StateSet set) {
states.reset(set);
while ( states.hasMoreElements() )
if ( isFinal[states.nextElement()] ) return true;
return false;
}
/**
* Returns <code>true</code>, iff the specified set of states
* contains a pushback-state.
*
* @param set the set of states that is tested for pushback-states.
private boolean containsPushback(StateSet set) {
states.reset(set);
while ( states.hasMoreElements() )
if ( isPushback[states.nextElement()] ) return true;
return false;
}
*/
/**
* Returns the action with highest priority in the specified
* set of states.
*
* @param set the set of states for which to determine the action
*/
private Action getAction(StateSet set) {
states.reset(set);
Action maxAction = null;
Out.debug("Determining action of : "+set);
while ( states.hasMoreElements() ) {
Action currentAction = action[ states.nextElement() ];
if ( currentAction != null ) {
if (maxAction == null)
maxAction = currentAction;
else
maxAction = maxAction.getHigherPriority(currentAction);
}
}
return maxAction;
}
/**
* Calculates the epsilon closure for a specified set of states.
*
* The epsilon closure for set a is the set of states that can be reached
* by epsilon edges from a.
*
* @param set the set of states to calculate the epsilon closure for
*
* @return the epsilon closure of the specified set of states
* in this NFA
*/
private StateSet closure(int startState) {
// Out.debug("Calculating closure of "+set);
StateSet notvisited = tempStateSet;
StateSet closure = new StateSet(numStates,startState);
notvisited.clear();
notvisited.addState(startState);
while ( notvisited.containsElements() ) {
// Out.debug("closure is now "+closure);
// Out.debug("notvisited is "+notvisited);
int state = notvisited.getAndRemoveElement();
// Out.debug("removed element "+state+" of "+notvisited);
// Out.debug("epsilon[states] = "+epsilon[state]);
notvisited.add(closure.complement(epsilon[state]));
closure.add(epsilon[state]);
}
// Out.debug("Closure is : "+closure);
return closure;
}
/**
* Returns the epsilon closure of a set of states
*/
private StateSet closure(StateSet startStates) {
StateSet result = new StateSet(numStates);
if (startStates != null) {
states.reset(startStates);
while (states.hasMoreElements())
result.add( closure(states.nextElement()) );
}
return result;
}
private void epsilonFill() {
for (int i = 0; i < numStates; i++) {
epsilon[i] = closure(i);
}
}
/**
* Calculates the set of states that can be reached from another
* set of states <code>start</code> with an specified input
* character <code>input</code>
*
* @param start the set of states to start from
* @param input the input character for which to search the next states
*
* @return the set of states that are reached from <code>start</code>
* via <code>input</code>
*/
private StateSet DFAEdge(StateSet start, char input) {
// Out.debug("Calculating DFAEdge for state set "+start+" and input '"+input+"'");
tempStateSet.clear();
states.reset(start);
while ( states.hasMoreElements() )
tempStateSet.add( table[states.nextElement()][input] );
StateSet result = new StateSet(tempStateSet);
states.reset(tempStateSet);
while ( states.hasMoreElements() )
result.add( epsilon[states.nextElement()] );
// Out.debug("DFAEdge is : "+result);
return result;
}
/**
* Returns an DFA that accepts the same language as this NFA.
* This DFA is usually not minimal.
*/
public DFA getDFA() {
Hashtable dfaStates = new Hashtable(numStates);
Vector dfaVector = new Vector(numStates);
DFA dfa = new DFA(numEntryStates(), numInput, numLexStates);
int numDFAStates = 0;
int currentDFAState = 0;
Out.println("Converting NFA to DFA : ");
epsilonFill();
StateSet currentState, newState;
// create the initial states of the DFA
for ( int i = 0; i < numEntryStates(); i++ ) {
newState = epsilon[i];
dfaStates.put(newState, new Integer(numDFAStates));
dfaVector.addElement(newState);
dfa.setEntryState( i, numDFAStates );
dfa.setFinal( numDFAStates, containsFinal(newState) );
dfa.setAction( numDFAStates, getAction(newState) );
numDFAStates++;
}
numDFAStates--;
if (Options.DEBUG)
Out.debug("DFA start states are :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
currentDFAState = 0;
StateSet tempStateSet = NFA.tempStateSet;
StateSetEnumerator states = NFA.states;
// will be reused
newState = new StateSet(numStates);
while ( currentDFAState <= numDFAStates ) {
currentState = (StateSet) dfaVector.elementAt(currentDFAState);
for (char input = 0; input < numInput; input++) {
// newState = DFAEdge(currentState, input);
// inlining DFAEdge for performance:
// Out.debug("Calculating DFAEdge for state set "+currentState+" and input '"+input+"'");
tempStateSet.clear();
states.reset(currentState);
while ( states.hasMoreElements() )
tempStateSet.add( table[states.nextElement()][input] );
newState.copy(tempStateSet);
states.reset(tempStateSet);
while ( states.hasMoreElements() )
newState.add( epsilon[states.nextElement()] );
// Out.debug("DFAEdge is : "+newState);
if ( newState.containsElements() ) {
// Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
// Out.debug("Looking for state set "+newState);
Integer nextDFAState = (Integer) dfaStates.get(newState);
if ( nextDFAState != null ) {
// Out.debug("FOUND!");
dfa.addTransition(currentDFAState, input, nextDFAState.intValue());
}
else {
if (Options.progress) Out.print(".");
// Out.debug("NOT FOUND!");
// Out.debug("Table was "+dfaStates);
numDFAStates++;
// make a new copy of newState to store in dfaStates
StateSet storeState = new StateSet(newState);
dfaStates.put(storeState, new Integer(numDFAStates));
dfaVector.addElement(storeState);
dfa.addTransition(currentDFAState, input, numDFAStates);
dfa.setFinal( numDFAStates, containsFinal(storeState) );
dfa.setAction( numDFAStates, getAction(storeState) );
}
}
}
currentDFAState++;
}
if (Options.verbose) Out.println("");
return dfa;
}
public void dumpTable() {
Out.dump(toString());
}
public String toString() {
StringBuffer result = new StringBuffer();
for (int i=0; i < numStates; i++) {
result.append("State");
if ( isFinal[i] ) {
result.append("[FINAL");
String l = action[i].lookString();
if (!l.equals("")) {
result.append(", ");
result.append(l);
}
result.append("]");
}
result.append(" "+i+Out.NL);
for (char input = 0; input < numInput; input++) {
if ( table[i][input] != null && table[i][input].containsElements() )
result.append(" with "+((int) input)+" in "+table[i][input]+Out.NL);
}
if ( epsilon[i] != null && epsilon[i].containsElements() )
result.append(" with epsilon in "+epsilon[i]+Out.NL);
}
return result.toString();
}
public void writeDot(File file) {
try {
PrintWriter writer = new PrintWriter(new FileWriter(file));
writer.println(dotFormat());
writer.close();
}
catch (IOException e) {
Out.error(ErrorMessages.FILE_WRITE, file);
throw new GeneratorException();
}
}
public String dotFormat() {
StringBuffer result = new StringBuffer();
result.append("digraph NFA {"+Out.NL);
result.append("rankdir = LR"+Out.NL);
for (int i=0; i < numStates; i++) {
if ( isFinal[i] ) {
result.append(i);
result.append(" [shape = doublecircle]");
result.append(Out.NL);
}
}
for (int i=0; i < numStates; i++) {
for (int input = 0; input < numInput; input++) {
if ( table[i][input] != null ) {
StateSetEnumerator states = table[i][input].states();
while (states.hasMoreElements()) {
int s = states.nextElement();
result.append(i+" -> "+s);
result.append(" [label=\""+classes.toString(input)+"\"]"+Out.NL);
}
}
}
if ( epsilon[i] != null ) {
StateSetEnumerator states = epsilon[i].states();
while (states.hasMoreElements()) {
int s = states.nextElement();
result.append(i+" -> "+s+" [style=dotted]"+Out.NL);
}
}
}
result.append("}"+Out.NL);
return result.toString();
}
//-----------------------------------------------------------------------
// Functions for constructing NFAs out of regular expressions.
private void insertLetterNFA(boolean caseless, char letter, int start, int end) {
if (caseless) {
int lower = classes.getClassCode(Character.toLowerCase(letter));
int upper = classes.getClassCode(Character.toUpperCase(letter));
addTransition(start, lower, end);
if (upper != lower) addTransition(start, upper, end);
}
else {
addTransition(start, classes.getClassCode(letter), end);
}
}
private IntPair insertStringNFA(boolean caseless, String letters) {
int start = numStates;
int i;
for (i = 0; i < letters.length(); i++) {
if (caseless) {
char c = letters.charAt(i);
int lower = classes.getClassCode(Character.toLowerCase(c));
int upper = classes.getClassCode(Character.toUpperCase(c));
addTransition(i+start, lower, i+start+1);
if (upper != lower) addTransition(i+start, upper, i+start+1);
}
else {
addTransition(i+start, classes.getClassCode(letters.charAt(i)), i+start+1);
}
}
return new IntPair(start, i+start);
}
private void insertClassNFA(Vector intervalls, int start, int end) {
// empty char class is ok:
if (intervalls == null) return;
int [] cl = classes.getClassCodes(intervalls);
for (int i = 0; i < cl.length; i++)
addTransition(start, cl[i], end);
}
private void insertNotClassNFA(Vector intervalls, int start, int end) {
int [] cl = classes.getNotClassCodes(intervalls);
for (int i = 0; i < cl.length; i++)
addTransition(start, cl[i], end);
}
/**
* Constructs an NFA accepting the complement of the language
* of a given NFA.
*
* Converts the NFA into a DFA, then negates that DFA.
* Exponential state blowup possible and common.
*
* @param the NFA to construct the complement for.
*
* @return a pair of integers denoting the index of start
* and end state of the complement NFA.
*/
private IntPair complement(IntPair nfa) {
if (Options.DEBUG) {
Out.debug("complement for "+nfa);
Out.debug("NFA is :"+Out.NL+this);
}
int dfaStart = nfa.end+1;
// FIXME: only need epsilon closure of states reachable from nfa.start
epsilonFill();
Hashtable dfaStates = new Hashtable(numStates);
Vector dfaVector = new Vector(numStates);
int numDFAStates = 0;
int currentDFAState = 0;
StateSet currentState, newState;
newState = epsilon[nfa.start];
dfaStates.put(newState, new Integer(numDFAStates));
dfaVector.addElement(newState);
if (Options.DEBUG)
Out.debug("pos DFA start state is :"+Out.NL+dfaStates+Out.NL+Out.NL+"ordered :"+Out.NL+dfaVector);
currentDFAState = 0;
while ( currentDFAState <= numDFAStates ) {
currentState = (StateSet) dfaVector.elementAt(currentDFAState);
for (char input = 0; input < numInput; input++) {
newState = DFAEdge(currentState, input);
if ( newState.containsElements() ) {
// Out.debug("DFAEdge for input "+(int)input+" and state set "+currentState+" is "+newState);
// Out.debug("Looking for state set "+newState);
Integer nextDFAState = (Integer) dfaStates.get(newState);
if ( nextDFAState != null ) {
// Out.debug("FOUND!");
addTransition(dfaStart+currentDFAState, input, dfaStart+nextDFAState.intValue());
}
else {
if (Options.dump) Out.print("+");
// Out.debug("NOT FOUND!");
// Out.debug("Table was "+dfaStates);
numDFAStates++;
dfaStates.put(newState, new Integer(numDFAStates));
dfaVector.addElement(newState);
addTransition(dfaStart+currentDFAState, input, dfaStart+numDFAStates);
}
}
}
currentDFAState++;
}
// We have a dfa accepting the positive regexp.
// Now the complement:
if (Options.DEBUG)
Out.debug("dfa finished, nfa is now :"+Out.NL+this);
int start = dfaStart+numDFAStates+1;
int error = dfaStart+numDFAStates+2;
int end = dfaStart+numDFAStates+3;
addEpsilonTransition(start, dfaStart);
for (int i = 0; i < numInput; i++)
addTransition(error, i, error);
addEpsilonTransition(error, end);
for (int s = 0; s <= numDFAStates; s++) {
currentState = (StateSet) dfaVector.elementAt(s);
currentDFAState = dfaStart+s;
// if it was not a final state, it is now in the complement
if (!currentState.isElement(nfa.end))
addEpsilonTransition(currentDFAState, end);
// all inputs not present (formerly leading to an implicit error)
// now lead to an explicit (final) state accepting everything.
for (int i = 0; i < numInput; i++)
if (table[currentDFAState][i] == null)
addTransition(currentDFAState, i, error);
}
// eliminate transitions leading to dead states
if (live == null || live.length < numStates) {
live = new boolean [2*numStates];
visited = new boolean [2*numStates];
}
removeDead(dfaStart, end);
if (Options.DEBUG)
Out.debug("complement finished, nfa ("+start+","+end+") is now :"+this);
return new IntPair(start, end);
}
// "global" data for use in method removeDead only:
// live[s] == false <=> no final state can be reached from s
private boolean [] live; // = new boolean [estSize];
private boolean [] visited; // = new boolean [estSize];
private void removeDead(int start, int end) {
// Out.debug("removeDead ("+start+")");
if ( visited[start] || live[start] ) return;
visited[start] = true;
// Out.debug("not yet visited");
if (closure(start).isElement(end))
live[start] = true;
// Out.debug("is final :"+live[start]);
for (int i = 0; i < numInput; i++) {
StateSet nextState = closure(table[start][i]);
StateSetEnumerator states = nextState.states();
while (states.hasMoreElements()) {
int next = states.nextElement();
if (next != start) {
removeDead(next,end);
if (live[next])
live[start] = true;
else
table[start][i] = null;
}
}
}
StateSet nextState = closure(epsilon[start]);
StateSetEnumerator states = nextState.states();
while (states.hasMoreElements()) {
int next = states.nextElement();
if (next != start) {
removeDead(next,end);
if (live[next])
live[start] = true;
}
}
// Out.debug("state "+start+" is live :"+live[start]);
}
/**
* Constructs a two state NFA for char class regexps,
* such that the NFA has
*
* exactly one start state,
* exactly one end state,
* no transitions leading out of the end state
* no transitions leading into the start state
*
* Assumes that regExp.isCharClass(macros) == true
*
* @param regExp the regular expression to construct the
* NFA for
*
* @return a pair of integers denoting the index of start
* and end state of the NFA.
*/
private void insertCCLNFA(RegExp regExp, int start, int end) {
switch (regExp.type) {
case sym.BAR:
RegExp2 r = (RegExp2) regExp;
insertCCLNFA(r.r1, start, end);
insertCCLNFA(r.r2, start, end);
return;
case sym.CCLASS:
insertClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
return;
case sym.CCLASSNOT:
insertNotClassNFA( (Vector) ((RegExp1) regExp).content, start, end);
return;
case sym.CHAR:
insertLetterNFA(
false, ((Character) ((RegExp1) regExp).content).charValue(),
start, end);
return;
case sym.CHAR_I:
insertLetterNFA(
true, ((Character) ((RegExp1) regExp).content).charValue(),
start, end);
return;
case sym.MACROUSE:
insertCCLNFA(macros.getDefinition((String) ((RegExp1) regExp).content),
start, end);
return;
}
throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
}
/**
* Constructs an NFA for regExp such that the NFA has
*
* exactly one start state,
* exactly one end state,
* no transitions leading out of the end state
* no transitions leading into the start state
*
* @param regExp the regular expression to construct the
* NFA for
*
* @return a pair of integers denoting the index of start
* and end state of the NFA.
*/
public IntPair insertNFA(RegExp regExp) {
IntPair nfa1, nfa2;
int start, end;
RegExp2 r;
if (Options.DEBUG)
Out.debug("Inserting RegExp : "+regExp);
if (regExp.isCharClass(macros)) {
start = numStates;
end = numStates+1;
ensureCapacity(end+1);
if (end+1 > numStates) numStates = end+1;
insertCCLNFA(regExp, start, end);
return new IntPair(start, end);
}
switch (regExp.type) {
case sym.BAR:
r = (RegExp2) regExp;
nfa1 = insertNFA(r.r1);
nfa2 = insertNFA(r.r2);
start = nfa2.end+1;
end = nfa2.end+2;
addEpsilonTransition(start, nfa1.start);
addEpsilonTransition(start, nfa2.start);
addEpsilonTransition(nfa1.end, end);
addEpsilonTransition(nfa2.end, end);
return new IntPair(start, end);
case sym.CONCAT:
r = (RegExp2) regExp;
nfa1 = insertNFA(r.r1);
nfa2 = insertNFA(r.r2);
addEpsilonTransition(nfa1.end, nfa2.start);
return new IntPair(nfa1.start, nfa2.end);
case sym.STAR:
nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
start = nfa1.end+1;
end = nfa1.end+2;
addEpsilonTransition(nfa1.end, end);
addEpsilonTransition(start, nfa1.start);
addEpsilonTransition(start, end);
addEpsilonTransition(nfa1.end, nfa1.start);
return new IntPair(start, end);
case sym.PLUS:
nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
start = nfa1.end+1;
end = nfa1.end+2;
addEpsilonTransition(nfa1.end, end);
addEpsilonTransition(start, nfa1.start);
addEpsilonTransition(nfa1.end, nfa1.start);
return new IntPair(start, end);
case sym.QUESTION:
nfa1 = insertNFA( (RegExp) ((RegExp1) regExp).content );
addEpsilonTransition(nfa1.start, nfa1.end);
return new IntPair(nfa1.start, nfa1.end);
case sym.BANG:
return complement(insertNFA((RegExp) ((RegExp1) regExp).content));
case sym.TILDE:
return insertNFA(regExp.resolveTilde(macros));
case sym.STRING:
return insertStringNFA(false, (String) ((RegExp1) regExp).content );
case sym.STRING_I:
return insertStringNFA(true, (String) ((RegExp1) regExp).content );
case sym.MACROUSE:
return insertNFA(macros.getDefinition((String) ((RegExp1) regExp).content));
}
throw new Error("Unknown expression type "+regExp.type+" in NFA construction");
}
}