blob: fe4e95c505c918fa607a32721f521f587f189b6f [file] [log] [blame]
/*
* [The "BSD license"]
* Copyright (c) 2010 Terence Parr
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.tool;
import org.antlr.analysis.DFA;
import org.antlr.analysis.*;
import org.antlr.misc.IntervalSet;
import org.antlr.runtime.*;
import org.antlr.runtime.debug.BlankDebugEventListener;
import org.antlr.runtime.debug.DebugEventListener;
import org.antlr.runtime.debug.ParseTreeBuilder;
import org.antlr.runtime.tree.ParseTree;
import java.util.List;
import java.util.Stack;
/** The recognition interpreter/engine for grammars. Separated
* out of Grammar as it's related, but technically not a Grammar function.
* You create an interpreter for a grammar and an input stream. This object
* can act as a TokenSource so that you can hook up two grammars (via
* a CommonTokenStream) to lex/parse. Being a token source only makes sense
* for a lexer grammar of course.
*/
public class Interpreter implements TokenSource {
protected Grammar grammar;
protected IntStream input;
/** A lexer listener that just creates token objects as they
* are matched. scan() use this listener to get a single object.
* To get a stream of tokens, you must call scan() multiple times,
* recording the token object result after each call.
*/
class LexerActionGetTokenType extends BlankDebugEventListener {
public CommonToken token;
Grammar g;
public LexerActionGetTokenType(Grammar g) {
this.g = g;
}
public void exitRule(String grammarFileName, String ruleName) {
if ( !ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ){
int type = g.getTokenType(ruleName);
int channel = Token.DEFAULT_CHANNEL;
token = new CommonToken((CharStream)input,type,channel,0,0);
}
}
}
public Interpreter(Grammar grammar, IntStream input) {
this.grammar = grammar;
this.input = input;
}
public Token nextToken() {
if ( grammar.type!=Grammar.LEXER ) {
return null;
}
if ( input.LA(1)==CharStream.EOF ) {
return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,input.index(),input.index());
}
int start = input.index();
int charPos = ((CharStream)input).getCharPositionInLine();
CommonToken token = null;
loop:
while (input.LA(1)!=CharStream.EOF) {
try {
token = scan(Grammar.ARTIFICIAL_TOKENS_RULENAME, null);
break;
}
catch (RecognitionException re) {
// report a problem and try for another
reportScanError(re);
continue loop;
}
}
// the scan can only set type
// we must set the line, and other junk here to make it a complete token
int stop = input.index()-1;
if ( token==null ) {
return new CommonToken((CharStream)input,Token.EOF,Token.DEFAULT_CHANNEL,start,start);
}
token.setLine(((CharStream)input).getLine());
token.setStartIndex(start);
token.setStopIndex(stop);
token.setCharPositionInLine(charPos);
return token;
}
/** For a given input char stream, try to match against the NFA
* starting at startRule. This is a deterministic parse even though
* it is using an NFA because it uses DFAs at each decision point to
* predict which alternative will succeed. This is exactly what the
* generated parser will do.
*
* This only does lexer grammars.
*
* Return the token type associated with the final rule end state.
*/
public void scan(String startRule,
DebugEventListener actions,
List visitedStates)
throws RecognitionException
{
if ( grammar.type!=Grammar.LEXER ) {
return;
}
CharStream in = (CharStream)this.input;
//System.out.println("scan("+startRule+",'"+in.substring(in.index(),in.size()-1)+"')");
// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
if ( grammar.getRuleStartState(startRule)==null ) {
grammar.buildNFA();
}
if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
// Create the DFA predictors for each decision
grammar.createLookaheadDFAs();
}
// do the parse
Stack ruleInvocationStack = new Stack();
NFAState start = grammar.getRuleStartState(startRule);
NFAState stop = grammar.getRuleStopState(startRule);
parseEngine(startRule, start, stop, in, ruleInvocationStack,
actions, visitedStates);
}
public CommonToken scan(String startRule)
throws RecognitionException
{
return scan(startRule, null);
}
public CommonToken scan(String startRule,
List visitedStates)
throws RecognitionException
{
LexerActionGetTokenType actions = new LexerActionGetTokenType(grammar);
scan(startRule, actions, visitedStates);
return actions.token;
}
public void parse(String startRule,
DebugEventListener actions,
List visitedStates)
throws RecognitionException
{
//System.out.println("parse("+startRule+")");
// Build NFAs/DFAs from the grammar AST if NFAs haven't been built yet
if ( grammar.getRuleStartState(startRule)==null ) {
grammar.buildNFA();
}
if ( !grammar.allDecisionDFAHaveBeenCreated() ) {
// Create the DFA predictors for each decision
grammar.createLookaheadDFAs();
}
// do the parse
Stack ruleInvocationStack = new Stack();
NFAState start = grammar.getRuleStartState(startRule);
NFAState stop = grammar.getRuleStopState(startRule);
parseEngine(startRule, start, stop, input, ruleInvocationStack,
actions, visitedStates);
}
public ParseTree parse(String startRule)
throws RecognitionException
{
return parse(startRule, null);
}
public ParseTree parse(String startRule, List visitedStates)
throws RecognitionException
{
ParseTreeBuilder actions = new ParseTreeBuilder(grammar.name);
try {
parse(startRule, actions, visitedStates);
}
catch (RecognitionException re) {
// Errors are tracked via the ANTLRDebugInterface
// Exceptions are used just to blast out of the parse engine
// The error will be in the parse tree.
}
return actions.getTree();
}
/** Fill a list of all NFA states visited during the parse */
protected void parseEngine(String startRule,
NFAState start,
NFAState stop,
IntStream input,
Stack ruleInvocationStack,
DebugEventListener actions,
List visitedStates)
throws RecognitionException
{
NFAState s = start;
if ( actions!=null ) {
actions.enterRule(s.nfa.grammar.getFileName(), start.enclosingRule.name);
}
int t = input.LA(1);
while ( s!=stop ) {
if ( visitedStates!=null ) {
visitedStates.add(s);
}
/*
System.out.println("parse state "+s.stateNumber+" input="+
s.nfa.grammar.getTokenDisplayName(t));
*/
// CASE 1: decision state
if ( s.getDecisionNumber()>0 && s.nfa.grammar.getNumberOfAltsForDecisionNFA(s)>1 ) {
// decision point, must predict and jump to alt
DFA dfa = s.nfa.grammar.getLookaheadDFA(s.getDecisionNumber());
/*
if ( s.nfa.grammar.type!=Grammar.LEXER ) {
System.out.println("decision: "+
dfa.getNFADecisionStartState().getDescription()+
" input="+s.nfa.grammar.getTokenDisplayName(t));
}
*/
int m = input.mark();
int predictedAlt = predict(dfa);
if ( predictedAlt == NFA.INVALID_ALT_NUMBER ) {
String description = dfa.getNFADecisionStartState().getDescription();
NoViableAltException nvae =
new NoViableAltException(description,
dfa.getDecisionNumber(),
s.stateNumber,
input);
if ( actions!=null ) {
actions.recognitionException(nvae);
}
input.consume(); // recover
throw nvae;
}
input.rewind(m);
int parseAlt =
s.translateDisplayAltToWalkAlt(predictedAlt);
/*
if ( s.nfa.grammar.type!=Grammar.LEXER ) {
System.out.println("predicted alt "+predictedAlt+", parseAlt "+
parseAlt);
}
*/
NFAState alt;
if ( parseAlt > s.nfa.grammar.getNumberOfAltsForDecisionNFA(s) ) {
// implied branch of loop etc...
alt = s.nfa.grammar.nfa.getState( s.endOfBlockStateNumber );
}
else {
alt = s.nfa.grammar.getNFAStateForAltOfDecision(s, parseAlt);
}
s = (NFAState)alt.transition[0].target;
continue;
}
// CASE 2: finished matching a rule
if ( s.isAcceptState() ) { // end of rule node
if ( actions!=null ) {
actions.exitRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
}
if ( ruleInvocationStack.empty() ) {
// done parsing. Hit the start state.
//System.out.println("stack empty in stop state for "+s.getEnclosingRule());
break;
}
// pop invoking state off the stack to know where to return to
NFAState invokingState = (NFAState)ruleInvocationStack.pop();
RuleClosureTransition invokingTransition =
(RuleClosureTransition)invokingState.transition[0];
// move to node after state that invoked this rule
s = invokingTransition.followState;
continue;
}
Transition trans = s.transition[0];
Label label = trans.label;
if ( label.isSemanticPredicate() ) {
FailedPredicateException fpe =
new FailedPredicateException(input,
s.enclosingRule.name,
"can't deal with predicates yet");
if ( actions!=null ) {
actions.recognitionException(fpe);
}
}
// CASE 3: epsilon transition
if ( label.isEpsilon() ) {
// CASE 3a: rule invocation state
if ( trans instanceof RuleClosureTransition ) {
ruleInvocationStack.push(s);
s = (NFAState)trans.target;
//System.out.println("call "+s.enclosingRule.name+" from "+s.nfa.grammar.getFileName());
if ( actions!=null ) {
actions.enterRule(s.nfa.grammar.getFileName(), s.enclosingRule.name);
}
// could be jumping to new grammar, make sure DFA created
if ( !s.nfa.grammar.allDecisionDFAHaveBeenCreated() ) {
s.nfa.grammar.createLookaheadDFAs();
}
}
// CASE 3b: plain old epsilon transition, just move
else {
s = (NFAState)trans.target;
}
}
// CASE 4: match label on transition
else if ( label.matches(t) ) {
if ( actions!=null ) {
if ( s.nfa.grammar.type == Grammar.PARSER ||
s.nfa.grammar.type == Grammar.COMBINED )
{
actions.consumeToken(((TokenStream)input).LT(1));
}
}
s = (NFAState)s.transition[0].target;
input.consume();
t = input.LA(1);
}
// CASE 5: error condition; label is inconsistent with input
else {
if ( label.isAtom() ) {
MismatchedTokenException mte =
new MismatchedTokenException(label.getAtom(), input);
if ( actions!=null ) {
actions.recognitionException(mte);
}
input.consume(); // recover
throw mte;
}
else if ( label.isSet() ) {
MismatchedSetException mse =
new MismatchedSetException(((IntervalSet)label.getSet()).toRuntimeBitSet(),
input);
if ( actions!=null ) {
actions.recognitionException(mse);
}
input.consume(); // recover
throw mse;
}
else if ( label.isSemanticPredicate() ) {
FailedPredicateException fpe =
new FailedPredicateException(input,
s.enclosingRule.name,
label.getSemanticContext().toString());
if ( actions!=null ) {
actions.recognitionException(fpe);
}
input.consume(); // recover
throw fpe;
}
else {
throw new RecognitionException(input); // unknown error
}
}
}
//System.out.println("hit stop state for "+stop.getEnclosingRule());
if ( actions!=null ) {
actions.exitRule(s.nfa.grammar.getFileName(), stop.enclosingRule.name);
}
}
/** Given an input stream, return the unique alternative predicted by
* matching the input. Upon error, return NFA.INVALID_ALT_NUMBER
* The first symbol of lookahead is presumed to be primed; that is,
* input.lookahead(1) must point at the input symbol you want to start
* predicting with.
*/
public int predict(DFA dfa) {
DFAState s = dfa.startState;
int c = input.LA(1);
Transition eotTransition = null;
dfaLoop:
while ( !s.isAcceptState() ) {
/*
System.out.println("DFA.predict("+s.getStateNumber()+", "+
dfa.getNFA().getGrammar().getTokenName(c)+")");
*/
// for each edge of s, look for intersection with current char
for (int i=0; i<s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
// special case: EOT matches any char
if ( t.label.matches(c) ) {
// take transition i
s = (DFAState)t.target;
input.consume();
c = input.LA(1);
continue dfaLoop;
}
if ( t.label.getAtom()==Label.EOT ) {
eotTransition = t;
}
}
if ( eotTransition!=null ) {
s = (DFAState)eotTransition.target;
continue dfaLoop;
}
/*
ErrorManager.error(ErrorManager.MSG_NO_VIABLE_DFA_ALT,
s,
dfa.nfa.grammar.getTokenName(c));
*/
return NFA.INVALID_ALT_NUMBER;
}
// woohoo! We know which alt to predict
// nothing emanates from a stop state; must terminate anyway
/*
System.out.println("DFA stop state "+s.getStateNumber()+" predicts "+
s.getUniquelyPredictedAlt());
*/
return s.getUniquelyPredictedAlt();
}
public void reportScanError(RecognitionException re) {
CharStream cs = (CharStream)input;
// print as good of a message as we can, given that we do not have
// a Lexer object and, hence, cannot call the routine to get a
// decent error message.
System.err.println("problem matching token at "+
cs.getLine()+":"+cs.getCharPositionInLine()+" "+re);
}
public String getSourceName() {
return input.getSourceName();
}
}