| """ANTLR3 runtime package""" |
| |
| # begin[licence] |
| # |
| # [The "BSD licence"] |
| # Copyright (c) 2005-2012 Terence Parr |
| # All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # 3. The name of the author may not be used to endorse or promote products |
| # derived from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| # |
| # end[licence] |
| |
| from .constants import EOF |
| from .exceptions import NoViableAltException, BacktrackingFailed |
| |
| |
| class DFA(object): |
| """@brief A DFA implemented as a set of transition tables. |
| |
| Any state that has a semantic predicate edge is special; those states |
| are generated with if-then-else structures in a specialStateTransition() |
| which is generated by cyclicDFA template. |
| |
| """ |
| |
| def __init__( |
| self, |
| recognizer, decisionNumber, |
| eot, eof, min, max, accept, special, transition |
| ): |
| ## Which recognizer encloses this DFA? Needed to check backtracking |
| self.recognizer = recognizer |
| |
| self.decisionNumber = decisionNumber |
| self.eot = eot |
| self.eof = eof |
| self.min = min |
| self.max = max |
| self.accept = accept |
| self.special = special |
| self.transition = transition |
| |
| |
| def predict(self, input): |
| """ |
| From the input stream, predict what alternative will succeed |
| using this DFA (representing the covering regular approximation |
| to the underlying CFL). Return an alternative number 1..n. Throw |
| an exception upon error. |
| """ |
| mark = input.mark() |
| s = 0 # we always start at s0 |
| try: |
| for _ in range(50000): |
| specialState = self.special[s] |
| if specialState >= 0: |
| s = self.specialStateTransition(specialState, input) |
| if s == -1: |
| self.noViableAlt(s, input) |
| return 0 |
| input.consume() |
| continue |
| |
| if self.accept[s] >= 1: |
| return self.accept[s] |
| |
| # look for a normal char transition |
| c = input.LA(1) |
| |
| if c >= self.min[s] and c <= self.max[s]: |
| # move to next state |
| snext = self.transition[s][c-self.min[s]] |
| |
| if snext < 0: |
| # was in range but not a normal transition |
| # must check EOT, which is like the else clause. |
| # eot[s]>=0 indicates that an EOT edge goes to another |
| # state. |
| if self.eot[s] >= 0: # EOT Transition to accept state? |
| s = self.eot[s] |
| input.consume() |
| # TODO: I had this as return accept[eot[s]] |
| # which assumed here that the EOT edge always |
| # went to an accept...faster to do this, but |
| # what about predicated edges coming from EOT |
| # target? |
| continue |
| |
| self.noViableAlt(s, input) |
| return 0 |
| |
| s = snext |
| input.consume() |
| continue |
| |
| if self.eot[s] >= 0: |
| s = self.eot[s] |
| input.consume() |
| continue |
| |
| # EOF Transition to accept state? |
| if c == EOF and self.eof[s] >= 0: |
| return self.accept[self.eof[s]] |
| |
| # not in range and not EOF/EOT, must be invalid symbol |
| self.noViableAlt(s, input) |
| return 0 |
| |
| else: |
| raise RuntimeError("DFA bang!") |
| |
| finally: |
| input.rewind(mark) |
| |
| |
| def noViableAlt(self, s, input): |
| if self.recognizer._state.backtracking > 0: |
| raise BacktrackingFailed |
| |
| nvae = NoViableAltException( |
| self.getDescription(), |
| self.decisionNumber, |
| s, |
| input |
| ) |
| |
| self.error(nvae) |
| raise nvae |
| |
| |
| def error(self, nvae): |
| """A hook for debugging interface""" |
| pass |
| |
| |
| def specialStateTransition(self, s, input): |
| return -1 |
| |
| |
| def getDescription(self): |
| return "n/a" |
| |
| |
| ## def specialTransition(self, state, symbol): |
| ## return 0 |
| |
| |
| @classmethod |
| def unpack(cls, string): |
| """@brief Unpack the runlength encoded table data. |
| |
| Terence implemented packed table initializers, because Java has a |
| size restriction on .class files and the lookup tables can grow |
| pretty large. The generated JavaLexer.java of the Java.g example |
| would be about 15MB with uncompressed array initializers. |
| |
| Python does not have any size restrictions, but the compilation of |
| such large source files seems to be pretty memory hungry. The memory |
| consumption of the python process grew to >1.5GB when importing a |
| 15MB lexer, eating all my swap space and I was to impacient to see, |
| if it could finish at all. With packed initializers that are unpacked |
| at import time of the lexer module, everything works like a charm. |
| |
| """ |
| |
| ret = [] |
| for i in range(0, len(string) - 1, 2): |
| (n, v) = ord(string[i]), ord(string[i + 1]) |
| |
| if v == 0xFFFF: |
| v = -1 |
| |
| ret += [v] * n |
| |
| return ret |