| /* |
| * [The "BSD license"] |
| * Copyright (c) 2010 Terence Parr |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| package org.antlr.analysis; |
| |
| import org.antlr.misc.Utils; |
| import org.antlr.tool.Grammar; |
| |
| import java.util.HashSet; |
| import java.util.Set; |
| |
| /** A module to perform optimizations on DFAs. |
| * |
| * I could more easily (and more quickly) do some optimizations (such as |
| * PRUNE_EBNF_EXIT_BRANCHES) during DFA construction, but then it |
| * messes up the determinism checking. For example, it looks like |
| * loop exit branches are unreachable if you prune exit branches |
| * during DFA construction and before determinism checks. |
| * |
| * In general, ANTLR's NFA->DFA->codegen pipeline seems very robust |
| * to me which I attribute to a uniform and consistent set of data |
| * structures. Regardless of what I want to "say"/implement, I do so |
| * within the confines of, for example, a DFA. The code generator |
| * can then just generate code--it doesn't have to do much thinking. |
| * Putting optimizations in the code gen code really starts to make |
| * it a spagetti factory (uh oh, now I'm hungry!). The pipeline is |
| * very testable; each stage has well defined input/output pairs. |
| * |
| * ### Optimization: PRUNE_EBNF_EXIT_BRANCHES |
| * |
| * There is no need to test EBNF block exit branches. Not only is it |
| * an unneeded computation, but counter-intuitively, you actually get |
| * better errors. You can report an error at the missing or extra |
| * token rather than as soon as you've figured out you will fail. |
| * |
| * Imagine optional block "( DOT CLASS )? SEMI". ANTLR generates: |
| * |
| * int alt=0; |
| * if ( input.LA(1)==DOT ) { |
| * alt=1; |
| * } |
| * else if ( input.LA(1)==SEMI ) { |
| * alt=2; |
| * } |
| * |
| * Clearly, since Parser.match() will ultimately find the error, we |
| * do not want to report an error nor do we want to bother testing |
| * lookahead against what follows the (...)? We want to generate |
| * simply "should I enter the subrule?": |
| * |
| * int alt=2; |
| * if ( input.LA(1)==DOT ) { |
| * alt=1; |
| * } |
| * |
| * NOTE 1. Greedy loops cannot be optimized in this way. For example, |
| * "(greedy=false:'x'|.)* '\n'". You specifically need the exit branch |
| * to tell you when to terminate the loop as the same input actually |
| * predicts one of the alts (i.e., staying in the loop). |
| * |
| * NOTE 2. I do not optimize cyclic DFAs at the moment as it doesn't |
| * seem to work. ;) I'll have to investigate later to see what work I |
| * can do on cyclic DFAs to make them have fewer edges. Might have |
| * something to do with the EOT token. |
| * |
| * ### PRUNE_SUPERFLUOUS_EOT_EDGES |
| * |
| * When a token is a subset of another such as the following rules, ANTLR |
| * quietly assumes the first token to resolve the ambiguity. |
| * |
| * EQ : '=' ; |
| * ASSIGNOP : '=' | '+=' ; |
| * |
| * It can yield states that have only a single edge on EOT to an accept |
| * state. This is a waste and messes up my code generation. ;) If |
| * Tokens rule DFA goes |
| * |
| * s0 -'='-> s3 -EOT-> s5 (accept) |
| * |
| * then s5 should be pruned and s3 should be made an accept. Do NOT do this |
| * for keyword versus ID as the state with EOT edge emanating from it will |
| * also have another edge. |
| * |
| * ### Optimization: COLLAPSE_ALL_INCIDENT_EDGES |
| * |
| * Done during DFA construction. See method addTransition() in |
| * NFAToDFAConverter. |
| * |
| * ### Optimization: MERGE_STOP_STATES |
| * |
| * Done during DFA construction. See addDFAState() in NFAToDFAConverter. |
| */ |
| public class DFAOptimizer { |
| public static boolean PRUNE_EBNF_EXIT_BRANCHES = true; |
| public static boolean PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES = true; |
| public static boolean COLLAPSE_ALL_PARALLEL_EDGES = true; |
| public static boolean MERGE_STOP_STATES = true; |
| |
| /** Used by DFA state machine generator to avoid infinite recursion |
| * resulting from cycles int the DFA. This is a set of int state #s. |
| * This is a side-effect of calling optimize; can't clear after use |
| * because code gen needs it. |
| */ |
| protected Set visited = new HashSet(); |
| |
| protected Grammar grammar; |
| |
| public DFAOptimizer(Grammar grammar) { |
| this.grammar = grammar; |
| } |
| |
| public void optimize() { |
| // optimize each DFA in this grammar |
| for (int decisionNumber=1; |
| decisionNumber<=grammar.getNumberOfDecisions(); |
| decisionNumber++) |
| { |
| DFA dfa = grammar.getLookaheadDFA(decisionNumber); |
| optimize(dfa); |
| } |
| } |
| |
| protected void optimize(DFA dfa) { |
| if ( dfa==null ) { |
| return; // nothing to do |
| } |
| /* |
| System.out.println("Optimize DFA "+dfa.decisionNFAStartState.decisionNumber+ |
| " num states="+dfa.getNumberOfStates()); |
| */ |
| //long start = System.currentTimeMillis(); |
| if ( PRUNE_EBNF_EXIT_BRANCHES && dfa.canInlineDecision() ) { |
| visited.clear(); |
| int decisionType = |
| dfa.getNFADecisionStartState().decisionStateType; |
| if ( dfa.isGreedy() && |
| (decisionType==NFAState.OPTIONAL_BLOCK_START || |
| decisionType==NFAState.LOOPBACK) ) |
| { |
| optimizeExitBranches(dfa.startState); |
| } |
| } |
| // If the Tokens rule has syntactically ambiguous rules, try to prune |
| if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES && |
| dfa.isTokensRuleDecision() && |
| dfa.probe.stateToSyntacticallyAmbiguousTokensRuleAltsMap.size()>0 ) |
| { |
| visited.clear(); |
| optimizeEOTBranches(dfa.startState); |
| } |
| |
| /* ack...code gen needs this, cannot optimize |
| visited.clear(); |
| unlinkUnneededStateData(dfa.startState); |
| */ |
| //long stop = System.currentTimeMillis(); |
| //System.out.println("minimized in "+(int)(stop-start)+" ms"); |
| } |
| |
| protected void optimizeExitBranches(DFAState d) { |
| Integer sI = Utils.integer(d.stateNumber); |
| if ( visited.contains(sI) ) { |
| return; // already visited |
| } |
| visited.add(sI); |
| int nAlts = d.dfa.getNumberOfAlts(); |
| for (int i = 0; i < d.getNumberOfTransitions(); i++) { |
| Transition edge = (Transition) d.transition(i); |
| DFAState edgeTarget = ((DFAState)edge.target); |
| /* |
| System.out.println(d.stateNumber+"-"+ |
| edge.label.toString(d.dfa.nfa.grammar)+"->"+ |
| edgeTarget.stateNumber); |
| */ |
| // if target is an accept state and that alt is the exit alt |
| if ( edgeTarget.isAcceptState() && |
| edgeTarget.getUniquelyPredictedAlt()==nAlts) |
| { |
| /* |
| System.out.println("ignoring transition "+i+" to max alt "+ |
| d.dfa.getNumberOfAlts()); |
| */ |
| d.removeTransition(i); |
| i--; // back up one so that i++ of loop iteration stays within bounds |
| } |
| optimizeExitBranches(edgeTarget); |
| } |
| } |
| |
| protected void optimizeEOTBranches(DFAState d) { |
| Integer sI = Utils.integer(d.stateNumber); |
| if ( visited.contains(sI) ) { |
| return; // already visited |
| } |
| visited.add(sI); |
| for (int i = 0; i < d.getNumberOfTransitions(); i++) { |
| Transition edge = (Transition) d.transition(i); |
| DFAState edgeTarget = ((DFAState)edge.target); |
| /* |
| System.out.println(d.stateNumber+"-"+ |
| edge.label.toString(d.dfa.nfa.grammar)+"->"+ |
| edgeTarget.stateNumber); |
| */ |
| // if only one edge coming out, it is EOT, and target is accept prune |
| if ( PRUNE_TOKENS_RULE_SUPERFLUOUS_EOT_EDGES && |
| edgeTarget.isAcceptState() && |
| d.getNumberOfTransitions()==1 && |
| edge.label.isAtom() && |
| edge.label.getAtom()==Label.EOT ) |
| { |
| //System.out.println("state "+d+" can be pruned"); |
| // remove the superfluous EOT edge |
| d.removeTransition(i); |
| d.setAcceptState(true); // make it an accept state |
| // force it to uniquely predict the originally predicted state |
| d.cachedUniquelyPredicatedAlt = |
| edgeTarget.getUniquelyPredictedAlt(); |
| i--; // back up one so that i++ of loop iteration stays within bounds |
| } |
| optimizeEOTBranches(edgeTarget); |
| } |
| } |
| |
| /** Walk DFA states, unlinking the nfa configs and whatever else I |
| * can to reduce memory footprint. |
| protected void unlinkUnneededStateData(DFAState d) { |
| Integer sI = Utils.integer(d.stateNumber); |
| if ( visited.contains(sI) ) { |
| return; // already visited |
| } |
| visited.add(sI); |
| d.nfaConfigurations = null; |
| for (int i = 0; i < d.getNumberOfTransitions(); i++) { |
| Transition edge = (Transition) d.transition(i); |
| DFAState edgeTarget = ((DFAState)edge.target); |
| unlinkUnneededStateData(edgeTarget); |
| } |
| } |
| */ |
| |
| } |