| unit Antlr.Runtime; |
| (* |
| [The "BSD licence"] |
| Copyright (c) 2008 Erik van Bilsen |
| Copyright (c) 2005-2007 Kunle Odutola |
| All rights reserved. |
| |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| 1. Redistributions of source code MUST RETAIN the above copyright |
| notice, this list of conditions and the following disclaimer. |
| 2. Redistributions in binary form MUST REPRODUCE the above copyright |
| notice, this list of conditions and the following disclaimer in |
| the documentation and/or other materials provided with the |
| distribution. |
| 3. The name of the author may not be used to endorse or promote products |
| derived from this software without specific prior WRITTEN permission. |
| 4. Unless explicitly state otherwise, any contribution intentionally |
| submitted for inclusion in this work to the copyright owner or licensor |
| shall be under the terms and conditions of this license, without any |
| additional terms or conditions. |
| |
| THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| *) |
| |
| interface |
| |
| {$IF CompilerVersion < 20} |
| {$MESSAGE ERROR 'You need Delphi 2009 or higher to use the Antlr runtime'} |
| {$IFEND} |
| |
| uses |
| SysUtils, |
| Classes, |
| Generics.Defaults, |
| Generics.Collections, |
| Antlr.Runtime.Tools, |
| Antlr.Runtime.Collections; |
| |
| type |
| TCharStreamConstants = (cscEOF = -1); |
| |
| type |
| ERecognitionException = class; |
| ENoViableAltException = class; |
| |
| /// <summary> |
| /// A simple stream of integers. This is useful when all we care about is the char |
| /// or token type sequence (such as for interpretation). |
| /// </summary> |
| IIntStream = interface(IANTLRInterface) |
| ['{6B851BDB-DD9C-422B-AD1E-567E52D2654F}'] |
| { Property accessors } |
| function GetSourceName: String; |
| |
| { Methods } |
| /// <summary> |
| /// Advances the read position of the stream. Updates line and column state |
| /// </summary> |
| procedure Consume; |
| |
| /// <summary> |
| /// Get int at current input pointer + I ahead (where I=1 is next int) |
| /// Negative indexes are allowed. LA(-1) is previous token (token just matched). |
| /// LA(-i) where i is before first token should yield -1, invalid char or EOF. |
| /// </summary> |
| function LA(I: Integer): Integer; |
| function LAChar(I: Integer): Char; |
| |
| /// <summary>Tell the stream to start buffering if it hasn't already.</summary> |
| /// <remarks> |
| /// Executing Rewind(Mark()) on a stream should not affect the input position. |
| /// The Lexer tracks line/col info as well as input index so its markers are |
| /// not pure input indexes. Same for tree node streams. */ |
| /// </remarks> |
| /// <returns>Return a marker that can be passed to |
| /// <see cref="IIntStream.Rewind(Integer)"/> to return to the current position. |
| /// This could be the current input position, a value return from |
| /// <see cref="IIntStream.Index"/>, or some other marker.</returns> |
| function Mark: Integer; |
| |
| /// <summary> |
| /// Return the current input symbol index 0..N where N indicates the |
| /// last symbol has been read. The index is the symbol about to be |
| /// read not the most recently read symbol. |
| /// </summary> |
| function Index: Integer; |
| |
| /// <summary> |
| /// Resets the stream so that the next call to |
| /// <see cref="IIntStream.Index"/> would return marker. |
| /// </summary> |
| /// <remarks> |
| /// The marker will usually be <see cref="IIntStream.Index"/> but |
| /// it doesn't have to be. It's just a marker to indicate what |
| /// state the stream was in. This is essentially calling |
| /// <see cref="IIntStream.Release"/> and <see cref="IIntStream.Seek"/>. |
| /// If there are other markers created after the specified marker, |
| /// this routine must unroll them like a stack. Assumes the state the |
| /// stream was in when this marker was created. |
| /// </remarks> |
| procedure Rewind(const Marker: Integer); overload; |
| |
| /// <summary> |
| /// Rewind to the input position of the last marker. |
| /// </summary> |
| /// <remarks> |
| /// Used currently only after a cyclic DFA and just before starting |
| /// a sem/syn predicate to get the input position back to the start |
| /// of the decision. Do not "pop" the marker off the state. Mark(I) |
| /// and Rewind(I) should balance still. It is like invoking |
| /// Rewind(last marker) but it should not "pop" the marker off. |
| /// It's like Seek(last marker's input position). |
| /// </remarks> |
| procedure Rewind; overload; |
| |
| /// <summary> |
| /// You may want to commit to a backtrack but don't want to force the |
| /// stream to keep bookkeeping objects around for a marker that is |
| /// no longer necessary. This will have the same behavior as |
| /// <see cref="IIntStream.Rewind(Integer)"/> except it releases resources without |
| /// the backward seek. |
| /// </summary> |
| /// <remarks> |
| /// This must throw away resources for all markers back to the marker |
| /// argument. So if you're nested 5 levels of Mark(), and then Release(2) |
| /// you have to release resources for depths 2..5. |
| /// </remarks> |
| procedure Release(const Marker: Integer); |
| |
| /// <summary> |
| /// Set the input cursor to the position indicated by index. This is |
| /// normally used to seek ahead in the input stream. |
| /// </summary> |
| /// <remarks> |
| /// No buffering is required to do this unless you know your stream |
| /// will use seek to move backwards such as when backtracking. |
| /// |
| /// This is different from rewind in its multi-directional requirement |
| /// and in that its argument is strictly an input cursor (index). |
| /// |
| /// For char streams, seeking forward must update the stream state such |
| /// as line number. For seeking backwards, you will be presumably |
| /// backtracking using the |
| /// <see cref="IIntStream.Mark"/>/<see cref="IIntStream.Rewind(Integer)"/> |
| /// mechanism that restores state and so this method does not need to |
| /// update state when seeking backwards. |
| /// |
| /// Currently, this method is only used for efficient backtracking using |
| /// memoization, but in the future it may be used for incremental parsing. |
| /// |
| /// The index is 0..N-1. A seek to position i means that LA(1) will return |
| /// the ith symbol. So, seeking to 0 means LA(1) will return the first |
| /// element in the stream. |
| /// </remarks> |
| procedure Seek(const Index: Integer); |
| |
| /// <summary>Returns the size of the entire stream.</summary> |
| /// <remarks> |
| /// Only makes sense for streams that buffer everything up probably, |
| /// but might be useful to display the entire stream or for testing. |
| /// This value includes a single EOF. |
| /// </remarks> |
| function Size: Integer; |
| |
| { Properties } |
| |
| /// <summary> |
| /// Where are you getting symbols from? Normally, implementations will |
| /// pass the buck all the way to the lexer who can ask its input stream |
| /// for the file name or whatever. |
| /// </summary> |
| property SourceName: String read GetSourceName; |
| end; |
| |
| /// <summary>A source of characters for an ANTLR lexer </summary> |
| ICharStream = interface(IIntStream) |
| ['{C30EF0DB-F4BD-4CBC-8C8F-828DABB6FF36}'] |
| { Property accessors } |
| function GetLine: Integer; |
| procedure SetLine(const Value: Integer); |
| function GetCharPositionInLine: Integer; |
| procedure SetCharPositionInLine(const Value: Integer); |
| |
| { Methods } |
| |
| /// <summary> |
| /// Get the ith character of lookahead. This is usually the same as |
| /// LA(I). This will be used for labels in the generated lexer code. |
| /// I'd prefer to return a char here type-wise, but it's probably |
| /// better to be 32-bit clean and be consistent with LA. |
| /// </summary> |
| function LT(const I: Integer): Integer; |
| |
| /// <summary> |
| /// This primarily a useful interface for action code (just make sure |
| /// actions don't use this on streams that don't support it). |
| /// For infinite streams, you don't need this. |
| /// </summary> |
| function Substring(const Start, Stop: Integer): String; |
| |
| { Properties } |
| |
| /// <summary> |
| /// The current line in the character stream (ANTLR tracks the |
| /// line information automatically. To support rewinding character |
| /// streams, we are able to [re-]set the line. |
| /// </summary> |
| property Line: Integer read GetLine write SetLine; |
| |
| /// <summary> |
| /// The index of the character relative to the beginning of the |
| /// line (0..N-1). To support rewinding character streams, we are |
| /// able to [re-]set the character position. |
| /// </summary> |
| property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; |
| end; |
| |
| IToken = interface(IANTLRInterface) |
| ['{73BF129C-2F45-4C68-838E-BF5D3536AC6D}'] |
| { Property accessors } |
| function GetTokenType: Integer; |
| procedure SetTokenType(const Value: Integer); |
| function GetLine: Integer; |
| procedure SetLine(const Value: Integer); |
| function GetCharPositionInLine: Integer; |
| procedure SetCharPositionInLine(const Value: Integer); |
| function GetChannel: Integer; |
| procedure SetChannel(const Value: Integer); |
| function GetTokenIndex: Integer; |
| procedure SetTokenIndex(const Value: Integer); |
| function GetText: String; |
| procedure SetText(const Value: String); |
| |
| { Properties } |
| property TokenType: Integer read GetTokenType write SetTokenType; |
| |
| /// <summary>The line number on which this token was matched; line=1..N</summary> |
| property Line: Integer read GetLine write SetLine; |
| |
| /// <summary> |
| /// The index of the first character relative to the beginning of the line 0..N-1 |
| /// </summary> |
| property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; |
| |
| /// <summary>The line number on which this token was matched; line=1..N</summary> |
| property Channel: Integer read GetChannel write SetChannel; |
| |
| /// <summary> |
| /// An index from 0..N-1 of the token object in the input stream |
| /// </summary> |
| /// <remarks> |
| /// This must be valid in order to use the ANTLRWorks debugger. |
| /// </remarks> |
| property TokenIndex: Integer read GetTokenIndex write SetTokenIndex; |
| |
| /// <summary>The text of the token</summary> |
| /// <remarks> |
| /// When setting the text, it might be a NOP such as for the CommonToken, |
| /// which doesn't have string pointers, just indexes into a char buffer. |
| /// </remarks> |
| property Text: String read GetText write SetText; |
| end; |
| |
| /// <summary> |
| /// A source of tokens must provide a sequence of tokens via NextToken() |
| /// and also must reveal it's source of characters; CommonToken's text is |
| /// computed from a CharStream; it only store indices into the char stream. |
| /// |
| /// Errors from the lexer are never passed to the parser. Either you want |
| /// to keep going or you do not upon token recognition error. If you do not |
| /// want to continue lexing then you do not want to continue parsing. Just |
| /// throw an exception not under RecognitionException and Delphi will naturally |
| /// toss you all the way out of the recognizers. If you want to continue |
| /// lexing then you should not throw an exception to the parser--it has already |
| /// requested a token. Keep lexing until you get a valid one. Just report |
| /// errors and keep going, looking for a valid token. |
| /// </summary> |
| ITokenSource = interface(IANTLRInterface) |
| ['{2C71FAD0-AEEE-417D-B576-4059F7C4CEB4}'] |
| { Property accessors } |
| function GetSourceName: String; |
| |
| { Methods } |
| |
| /// <summary> |
| /// Returns a Token object from the input stream (usually a CharStream). |
| /// Does not fail/return upon lexing error; just keeps chewing on the |
| /// characters until it gets a good one; errors are not passed through |
| /// to the parser. |
| /// </summary> |
| function NextToken: IToken; |
| |
| { Properties } |
| |
| /// <summary> |
| /// Where are you getting tokens from? normally the implication will simply |
| /// ask lexers input stream. |
| /// </summary> |
| property SourceName: String read GetSourceName; |
| end; |
| |
| /// <summary>A stream of tokens accessing tokens from a TokenSource </summary> |
| ITokenStream = interface(IIntStream) |
| ['{59E5B39D-31A6-496D-9FA9-AC75CC584B68}'] |
| { Property accessors } |
| function GetTokenSource: ITokenSource; |
| procedure SetTokenSource(const Value: ITokenSource); |
| |
| { Methods } |
| |
| /// <summary> |
| /// Get Token at current input pointer + I ahead (where I=1 is next |
| /// Token). |
| /// I < 0 indicates tokens in the past. So -1 is previous token and -2 is |
| /// two tokens ago. LT(0) is undefined. For I>=N, return Token.EOFToken. |
| /// Return null for LT(0) and any index that results in an absolute address |
| /// that is negative. |
| /// </summary> |
| function LT(const K: Integer): IToken; |
| |
| /// <summary> |
| /// Get a token at an absolute index I; 0..N-1. This is really only |
| /// needed for profiling and debugging and token stream rewriting. |
| /// If you don't want to buffer up tokens, then this method makes no |
| /// sense for you. Naturally you can't use the rewrite stream feature. |
| /// I believe DebugTokenStream can easily be altered to not use |
| /// this method, removing the dependency. |
| /// </summary> |
| function Get(const I: Integer): IToken; |
| |
| /// <summary>Return the text of all tokens from start to stop, inclusive. |
| /// If the stream does not buffer all the tokens then it can just |
| /// return ''; Users should not access $ruleLabel.text in |
| /// an action of course in that case. |
| /// </summary> |
| function ToString(const Start, Stop: Integer): String; overload; |
| |
| /// <summary>Because the user is not required to use a token with an index stored |
| /// in it, we must provide a means for two token objects themselves to |
| /// indicate the start/end location. Most often this will just delegate |
| /// to the other ToString(Integer,Integer). This is also parallel with |
| /// the TreeNodeStream.ToString(Object,Object). |
| /// </summary> |
| function ToString(const Start, Stop: IToken): String; overload; |
| |
| { Properties } |
| property TokenSource: ITokenSource read GetTokenSource write SetTokenSource; |
| end; |
| |
| /// <summary> |
| /// This is the complete state of a stream. |
| /// |
| /// When walking ahead with cyclic DFA for syntactic predicates, we |
| /// need to record the state of the input stream (char index, line, |
| /// etc...) so that we can rewind the state after scanning ahead. |
| /// </summary> |
| ICharStreamState = interface(IANTLRInterface) |
| ['{62D2A1CD-ED3A-4C95-A366-AB8F2E54060B}'] |
| { Property accessors } |
| function GetP: Integer; |
| procedure SetP(const Value: Integer); |
| function GetLine: Integer; |
| procedure SetLine(const Value: Integer); |
| function GetCharPositionInLine: Integer; |
| procedure SetCharPositionInLine(const Value: Integer); |
| |
| { Properties } |
| /// <summary>Index into the char stream of next lookahead char </summary> |
| property P: Integer read GetP write SetP; |
| |
| /// <summary>What line number is the scanner at before processing buffer[P]? </summary> |
| property Line: Integer read GetLine write SetLine; |
| |
| /// <summary>What char position 0..N-1 in line is scanner before processing buffer[P]? </summary> |
| property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; |
| end; |
| |
| /// <summary> |
| /// A pretty quick <see cref="ICharStream"/> that uses a character array |
| /// directly as it's underlying source. |
| /// </summary> |
| IANTLRStringStream = interface(ICharStream) |
| ['{2FA24299-FF97-4AB6-8CA6-5D3DA13C4AB2}'] |
| { Methods } |
| |
| /// <summary> |
| /// Resets the stream so that it is in the same state it was |
| /// when the object was created *except* the data array is not |
| /// touched. |
| /// </summary> |
| procedure Reset; |
| |
| end; |
| |
| /// <summary> |
| /// A character stream - an <see cref="ICharStream"/> - that loads |
| /// and caches the contents of it's underlying file fully during |
| /// object construction |
| /// </summary> |
| /// <remarks> |
| /// This looks very much like an ANTLReaderStream or an ANTLRInputStream |
| /// but, it is a special case. Since we know the exact size of the file to |
| /// load, we can avoid lots of data copying and buffer resizing. |
| /// </remarks> |
| IANTLRFileStream = interface(IANTLRStringStream) |
| ['{2B0145DB-2DAA-48A0-8316-B47A69EDDD1A}'] |
| { Methods } |
| |
| /// <summary> |
| /// Loads and buffers the specified file to be used as this |
| /// ANTLRFileStream's source |
| /// </summary> |
| /// <param name="FileName">File to load</param> |
| /// <param name="Encoding">Encoding to apply to file</param> |
| procedure Load(const FileName: String; const Encoding: TEncoding); |
| end; |
| |
| /// <summary> |
| /// A stripped-down version of org.antlr.misc.BitSet that is just |
| /// good enough to handle runtime requirements such as FOLLOW sets |
| /// for automatic error recovery. |
| /// </summary> |
| IBitSet = interface(IANTLRInterface) |
| ['{F2045045-FC46-4779-A65D-56C65D257A8E}'] |
| { Property accessors } |
| function GetIsNil: Boolean; |
| |
| { Methods } |
| |
| /// <summary>return "this or a" in a new set </summary> |
| function BitSetOr(const A: IBitSet): IBitSet; |
| |
| /// <summary>Or this element into this set (grow as necessary to accommodate)</summary> |
| procedure Add(const El: Integer); |
| |
| /// <summary> Grows the set to a larger number of bits.</summary> |
| /// <param name="bit">element that must fit in set |
| /// </param> |
| procedure GrowToInclude(const Bit: Integer); |
| |
| procedure OrInPlace(const A: IBitSet); |
| function Size: Integer; |
| function Member(const El: Integer): Boolean; |
| |
| // remove this element from this set |
| procedure Remove(const El: Integer); |
| |
| function NumBits: Integer; |
| |
| /// <summary>return how much space is being used by the bits array not |
| /// how many actually have member bits on. |
| /// </summary> |
| function LengthInLongWords: Integer; |
| |
| function ToArray: TIntegerArray; |
| function ToPackedArray: TUInt64Array; |
| |
| function ToString: String; overload; |
| function ToString(const TokenNames: TStringArray): String; overload; |
| function Equals(Obj: TObject): Boolean; |
| |
| { Properties } |
| property IsNil: Boolean read GetIsNil; |
| end; |
| TBitSetArray = array of IBitSet; |
| |
| /// <summary> |
| /// The set of fields needed by an abstract recognizer to recognize input |
| /// and recover from errors |
| /// </summary> |
| /// <remarks> |
| /// As a separate state object, it can be shared among multiple grammars; |
| /// e.g., when one grammar imports another. |
| /// These fields are publicly visible but the actual state pointer per |
| /// parser is protected. |
| /// </remarks> |
| IRecognizerSharedState = interface(IANTLRInterface) |
| ['{6CB6E17A-0B01-4AA7-8D49-5742A3CB8901}'] |
| { Property accessors } |
| function GetFollowing: TBitSetArray; |
| procedure SetFollowing(const Value: TBitSetArray); |
| function GetFollowingStackPointer: Integer; |
| procedure SetFollowingStackPointer(const Value: Integer); |
| function GetErrorRecovery: Boolean; |
| procedure SetErrorRecovery(const Value: Boolean); |
| function GetLastErrorIndex: Integer; |
| procedure SetLastErrorIndex(const Value: Integer); |
| function GetFailed: Boolean; |
| procedure SetFailed(const Value: Boolean); |
| function GetSyntaxErrors: Integer; |
| procedure SetSyntaxErrors(const Value: Integer); |
| function GetBacktracking: Integer; |
| procedure SetBacktracking(const Value: Integer); |
| function GetRuleMemo: TDictionaryArray<Integer, Integer>; |
| function GetRuleMemoCount: Integer; |
| procedure SetRuleMemoCount(const Value: Integer); |
| function GetToken: IToken; |
| procedure SetToken(const Value: IToken); |
| function GetTokenStartCharIndex: Integer; |
| procedure SetTokenStartCharIndex(const Value: Integer); |
| function GetTokenStartLine: Integer; |
| procedure SetTokenStartLine(const Value: Integer); |
| function GetTokenStartCharPositionInLine: Integer; |
| procedure SetTokenStartCharPositionInLine(const Value: Integer); |
| function GetChannel: Integer; |
| procedure SetChannel(const Value: Integer); |
| function GetTokenType: Integer; |
| procedure SetTokenType(const Value: Integer); |
| function GetText: String; |
| procedure SetText(const Value: String); |
| |
| { Properties } |
| |
| /// <summary> |
| /// Tracks the set of token types that can follow any rule invocation. |
| /// Stack grows upwards. When it hits the max, it grows 2x in size |
| /// and keeps going. |
| /// </summary> |
| property Following: TBitSetArray read GetFollowing write SetFollowing; |
| property FollowingStackPointer: Integer read GetFollowingStackPointer write SetFollowingStackPointer; |
| |
| /// <summary> |
| /// This is true when we see an error and before having successfully |
| /// matched a token. Prevents generation of more than one error message |
| /// per error. |
| /// </summary> |
| property ErrorRecovery: Boolean read GetErrorRecovery write SetErrorRecovery; |
| |
| /// <summary> |
| /// The index into the input stream where the last error occurred. |
| /// </summary> |
| /// <remarks> |
| /// This is used to prevent infinite loops where an error is found |
| /// but no token is consumed during recovery...another error is found, |
| /// ad naseum. This is a failsafe mechanism to guarantee that at least |
| /// one token/tree node is consumed for two errors. |
| /// </remarks> |
| property LastErrorIndex: Integer read GetLastErrorIndex write SetLastErrorIndex; |
| |
| /// <summary> |
| /// In lieu of a return value, this indicates that a rule or token |
| /// has failed to match. Reset to false upon valid token match. |
| /// </summary> |
| property Failed: Boolean read GetFailed write SetFailed; |
| |
| /// <summary> |
| /// Did the recognizer encounter a syntax error? Track how many. |
| /// </summary> |
| property SyntaxErrors: Integer read GetSyntaxErrors write SetSyntaxErrors; |
| |
| /// <summary> |
| /// If 0, no backtracking is going on. Safe to exec actions etc... |
| /// If >0 then it's the level of backtracking. |
| /// </summary> |
| property Backtracking: Integer read GetBacktracking write SetBacktracking; |
| |
| /// <summary> |
| /// An array[size num rules] of Map<Integer,Integer> that tracks |
| /// the stop token index for each rule. |
| /// </summary> |
| /// <remarks> |
| /// RuleMemo[RuleIndex] is the memoization table for RuleIndex. |
| /// For key RuleStartIndex, you get back the stop token for |
| /// associated rule or MEMO_RULE_FAILED. |
| /// |
| /// This is only used if rule memoization is on (which it is by default). |
| /// </remarks> |
| property RuleMemo: TDictionaryArray<Integer, Integer> read GetRuleMemo; |
| property RuleMemoCount: Integer read GetRuleMemoCount write SetRuleMemoCount; |
| |
| // Lexer Specific Members |
| // LEXER FIELDS (must be in same state object to avoid casting |
| // constantly in generated code and Lexer object) :( |
| |
| /// <summary> |
| /// Token object normally returned by NextToken() after matching lexer rules. |
| /// </summary> |
| /// <remarks> |
| /// The goal of all lexer rules/methods is to create a token object. |
| /// This is an instance variable as multiple rules may collaborate to |
| /// create a single token. NextToken will return this object after |
| /// matching lexer rule(s). If you subclass to allow multiple token |
| /// emissions, then set this to the last token to be matched or |
| /// something nonnull so that the auto token emit mechanism will not |
| /// emit another token. |
| /// </remarks> |
| property Token: IToken read GetToken write SetToken; |
| |
| /// <summary> |
| /// What character index in the stream did the current token start at? |
| /// </summary> |
| /// <remarks> |
| /// Needed, for example, to get the text for current token. Set at |
| /// the start of nextToken. |
| /// </remarks> |
| property TokenStartCharIndex: Integer read GetTokenStartCharIndex write SetTokenStartCharIndex; |
| |
| /// <summary> |
| /// The line on which the first character of the token resides |
| /// </summary> |
| property TokenStartLine: Integer read GetTokenStartLine write SetTokenStartLine; |
| |
| /// <summary>The character position of first character within the line</summary> |
| property TokenStartCharPositionInLine: Integer read GetTokenStartCharPositionInLine write SetTokenStartCharPositionInLine; |
| |
| /// <summary>The channel number for the current token</summary> |
| property Channel: Integer read GetChannel write SetChannel; |
| |
| /// <summary>The token type for the current token</summary> |
| property TokenType: Integer read GetTokenType write SetTokenType; |
| |
| /// <summary> |
| /// You can set the text for the current token to override what is in |
| /// the input char buffer. Use setText() or can set this instance var. |
| /// </summary> |
| property Text: String read GetText write SetText; |
| end; |
| |
| ICommonToken = interface(IToken) |
| ['{06B1B0C3-2A0D-477A-AE30-414F51ACE8A0}'] |
| { Property accessors } |
| function GetStartIndex: Integer; |
| procedure SetStartIndex(const Value: Integer); |
| function GetStopIndex: Integer; |
| procedure SetStopIndex(const Value: Integer); |
| function GetInputStream: ICharStream; |
| procedure SetInputStream(const Value: ICharStream); |
| |
| { Methods } |
| function ToString: String; |
| |
| { Properties } |
| property StartIndex: Integer read GetStartIndex write SetStartIndex; |
| property StopIndex: Integer read GetStopIndex write SetStopIndex; |
| property InputStream: ICharStream read GetInputStream write SetInputStream; |
| end; |
| |
| /// <summary> |
| /// A Token object like we'd use in ANTLR 2.x; has an actual string created |
| /// and associated with this object. These objects are needed for imaginary |
| /// tree nodes that have payload objects. We need to create a Token object |
| /// that has a string; the tree node will point at this token. CommonToken |
| /// has indexes into a char stream and hence cannot be used to introduce |
| /// new strings. |
| /// </summary> |
| IClassicToken = interface(IToken) |
| { Property accessors } |
| function GetTokenType: Integer; |
| procedure SetTokenType(const Value: Integer); |
| function GetLine: Integer; |
| procedure SetLine(const Value: Integer); |
| function GetCharPositionInLine: Integer; |
| procedure SetCharPositionInLine(const Value: Integer); |
| function GetChannel: Integer; |
| procedure SetChannel(const Value: Integer); |
| function GetTokenIndex: Integer; |
| procedure SetTokenIndex(const Value: Integer); |
| function GetText: String; |
| procedure SetText(const Value: String); |
| function GetInputStream: ICharStream; |
| procedure SetInputStream(const Value: ICharStream); |
| |
| { Properties } |
| property TokenType: Integer read GetTokenType write SetTokenType; |
| property Line: Integer read GetLine write SetLine; |
| property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; |
| property Channel: Integer read GetChannel write SetChannel; |
| property TokenIndex: Integer read GetTokenIndex write SetTokenIndex; |
| property Text: String read GetText write SetText; |
| property InputStream: ICharStream read GetInputStream write SetInputStream; |
| end; |
| |
| /// <summary> |
| /// A generic recognizer that can handle recognizers generated from |
| /// lexer, parser, and tree grammars. This is all the parsing |
| /// support code essentially; most of it is error recovery stuff and |
| /// backtracking. |
| /// </summary> |
| IBaseRecognizer = interface(IANTLRObject) |
| ['{90813CE2-614B-4773-A26E-936E7DE7E9E9}'] |
| { Property accessors } |
| function GetInput: IIntStream; |
| function GetBacktrackingLevel: Integer; |
| function GetState: IRecognizerSharedState; |
| function GetNumberOfSyntaxErrors: Integer; |
| function GetGrammarFileName: String; |
| function GetSourceName: String; |
| function GetTokenNames: TStringArray; |
| |
| { Methods } |
| procedure BeginBacktrack(const Level: Integer); |
| procedure EndBacktrack(const Level: Integer; const Successful: Boolean); |
| |
| /// <summary>Reset the parser's state. Subclasses must rewind the input stream.</summary> |
| procedure Reset; |
| |
| /// <summary> |
| /// Match current input symbol against ttype. Attempt |
| /// single token insertion or deletion error recovery. If |
| /// that fails, throw EMismatchedTokenException. |
| /// </summary> |
| /// <remarks> |
| /// To turn off single token insertion or deletion error |
| /// recovery, override MismatchRecover() and have it call |
| /// plain Mismatch(), which does not recover. Then any error |
| /// in a rule will cause an exception and immediate exit from |
| /// rule. Rule would recover by resynchronizing to the set of |
| /// symbols that can follow rule ref. |
| /// </remarks> |
| function Match(const Input: IIntStream; const TokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; |
| |
| function MismatchIsUnwantedToken(const Input: IIntStream; |
| const TokenType: Integer): Boolean; |
| |
| function MismatchIsMissingToken(const Input: IIntStream; |
| const Follow: IBitSet): Boolean; |
| |
| /// <summary>A hook to listen in on the token consumption during error recovery. |
| /// The DebugParser subclasses this to fire events to the listenter. |
| /// </summary> |
| procedure BeginResync; |
| procedure EndResync; |
| |
| /// <summary> |
| /// Report a recognition problem. |
| /// </summary> |
| /// <remarks> |
| /// This method sets errorRecovery to indicate the parser is recovering |
| /// not parsing. Once in recovery mode, no errors are generated. |
| /// To get out of recovery mode, the parser must successfully Match |
| /// a token (after a resync). So it will go: |
| /// |
| /// 1. error occurs |
| /// 2. enter recovery mode, report error |
| /// 3. consume until token found in resynch set |
| /// 4. try to resume parsing |
| /// 5. next Match() will reset errorRecovery mode |
| /// |
| /// If you override, make sure to update syntaxErrors if you care about that. |
| /// </remarks> |
| procedure ReportError(const E: ERecognitionException); |
| |
| /// <summary> Match the wildcard: in a symbol</summary> |
| procedure MatchAny(const Input: IIntStream); |
| |
| procedure DisplayRecognitionError(const TokenNames: TStringArray; |
| const E: ERecognitionException); |
| |
| /// <summary> |
| /// What error message should be generated for the various exception types? |
| /// |
| /// Not very object-oriented code, but I like having all error message generation |
| /// within one method rather than spread among all of the exception classes. This |
| /// also makes it much easier for the exception handling because the exception |
| /// classes do not have to have pointers back to this object to access utility |
| /// routines and so on. Also, changing the message for an exception type would be |
| /// difficult because you would have to subclassing exception, but then somehow get |
| /// ANTLR to make those kinds of exception objects instead of the default. |
| /// |
| /// This looks weird, but trust me--it makes the most sense in terms of flexibility. |
| /// |
| /// For grammar debugging, you will want to override this to add more information |
| /// such as the stack frame with GetRuleInvocationStack(e, this.GetType().Fullname) |
| /// and, for no viable alts, the decision description and state etc... |
| /// |
| /// Override this to change the message generated for one or more exception types. |
| /// </summary> |
| function GetErrorMessage(const E: ERecognitionException; |
| const TokenNames: TStringArray): String; |
| |
| /// <summary> |
| /// What is the error header, normally line/character position information? |
| /// </summary> |
| function GetErrorHeader(const E: ERecognitionException): String; |
| |
| /// <summary> |
| /// How should a token be displayed in an error message? The default |
| /// is to display just the text, but during development you might |
| /// want to have a lot of information spit out. Override in that case |
| /// to use t.ToString() (which, for CommonToken, dumps everything about |
| /// the token). This is better than forcing you to override a method in |
| /// your token objects because you don't have to go modify your lexer |
| /// so that it creates a new type. |
| /// </summary> |
| function GetTokenErrorDisplay(const T: IToken): String; |
| |
| /// <summary> |
| /// Override this method to change where error messages go |
| /// </summary> |
| procedure EmitErrorMessage(const Msg: String); |
| |
| /// <summary> |
| /// Recover from an error found on the input stream. This is |
| /// for NoViableAlt and mismatched symbol exceptions. If you enable |
| /// single token insertion and deletion, this will usually not |
| /// handle mismatched symbol exceptions but there could be a mismatched |
| /// token that the Match() routine could not recover from. |
| /// </summary> |
| procedure Recover(const Input: IIntStream; const RE: ERecognitionException); |
| |
| // Not currently used |
| function RecoverFromMismatchedSet(const Input: IIntStream; |
| const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; |
| |
| procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; |
| |
| /// <summary>Consume tokens until one matches the given token set </summary> |
| procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; |
| |
| /// <summary> |
| /// Returns List <String> of the rules in your parser instance |
| /// leading up to a call to this method. You could override if |
| /// you want more details such as the file/line info of where |
| /// in the parser source code a rule is invoked. |
| /// </summary> |
| /// <remarks> |
| /// NOT IMPLEMENTED IN THE DELPHI VERSION YET |
| /// This is very useful for error messages and for context-sensitive |
| /// error recovery. |
| /// </remarks> |
| //function GetRuleInvocationStack: IList<IANTLRInterface>; overload; |
| |
| /// <summary> |
| /// A more general version of GetRuleInvocationStack where you can |
| /// pass in, for example, a RecognitionException to get it's rule |
| /// stack trace. This routine is shared with all recognizers, hence, |
| /// static. |
| /// |
| /// TODO: move to a utility class or something; weird having lexer call this |
| /// </summary> |
| /// <remarks> |
| /// NOT IMPLEMENTED IN THE DELPHI VERSION YET |
| /// </remarks> |
| //function GetRuleInvocationStack(const E: Exception; |
| // const RecognizerClassName: String): IList<IANTLRInterface>; overload; |
| |
| /// <summary>A convenience method for use most often with template rewrites. |
| /// Convert a List<Token> to List<String> |
| /// </summary> |
| function ToStrings(const Tokens: IList<IToken>): IList<String>; |
| |
| /// <summary> |
| /// Given a rule number and a start token index number, return |
| /// MEMO_RULE_UNKNOWN if the rule has not parsed input starting from |
| /// start index. If this rule has parsed input starting from the |
| /// start index before, then return where the rule stopped parsing. |
| /// It returns the index of the last token matched by the rule. |
| /// </summary> |
| /// <remarks> |
| /// For now we use a hashtable and just the slow Object-based one. |
| /// Later, we can make a special one for ints and also one that |
| /// tosses out data after we commit past input position i. |
| /// </remarks> |
| function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; |
| |
| /// <summary> |
| /// Has this rule already parsed input at the current index in the |
| /// input stream? Return the stop token index or MEMO_RULE_UNKNOWN. |
| /// If we attempted but failed to parse properly before, return |
| /// MEMO_RULE_FAILED. |
| /// |
| /// This method has a side-effect: if we have seen this input for |
| /// this rule and successfully parsed before, then seek ahead to |
| /// 1 past the stop token matched for this rule last time. |
| /// </summary> |
| function AlreadyParsedRule(const Input: IIntStream; |
| const RuleIndex: Integer): Boolean; |
| |
| /// <summary> |
| /// Record whether or not this rule parsed the input at this position |
| /// successfully. Use a standard hashtable for now. |
| /// </summary> |
| procedure Memoize(const Input: IIntStream; const RuleIndex, |
| RuleStartIndex: Integer); |
| |
| /// <summary> |
| /// Return how many rule/input-index pairs there are in total. |
| /// TODO: this includes synpreds. :( |
| /// </summary> |
| /// <returns></returns> |
| function GetRuleMemoizationChaceSize: Integer; |
| |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer; |
| const InputSymbol: String); |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer; |
| const InputSymbol: String); |
| |
| { Properties } |
| property Input: IIntStream read GetInput; |
| property BacktrackingLevel: Integer read GetBacktrackingLevel; |
| property State: IRecognizerSharedState read GetState; |
| |
| /// <summary> |
| /// Get number of recognition errors (lexer, parser, tree parser). Each |
| /// recognizer tracks its own number. So parser and lexer each have |
| /// separate count. Does not count the spurious errors found between |
| /// an error and next valid token match |
| /// |
| /// See also ReportError() |
| /// </summary> |
| property NumberOfSyntaxErrors: Integer read GetNumberOfSyntaxErrors; |
| |
| /// <summary> |
| /// For debugging and other purposes, might want the grammar name. |
| /// Have ANTLR generate an implementation for this property. |
| /// </summary> |
| /// <returns></returns> |
| property GrammarFileName: String read GetGrammarFileName; |
| |
| /// <summary> |
| /// For debugging and other purposes, might want the source name. |
| /// Have ANTLR provide a hook for this property. |
| /// </summary> |
| /// <returns>The source name</returns> |
| property SourceName: String read GetSourceName; |
| |
| /// <summary> |
| /// Used to print out token names like ID during debugging and |
| /// error reporting. The generated parsers implement a method |
| /// that overrides this to point to their string[] tokenNames. |
| /// </summary> |
| property TokenNames: TStringArray read GetTokenNames; |
| end; |
| |
| /// <summary> |
| /// The most common stream of tokens is one where every token is buffered up |
| /// and tokens are prefiltered for a certain channel (the parser will only |
| /// see these tokens and cannot change the filter channel number during the |
| /// parse). |
| /// |
| /// TODO: how to access the full token stream? How to track all tokens matched per rule? |
| /// </summary> |
| ICommonTokenStream = interface(ITokenStream) |
| { Methods } |
| |
| /// <summary> |
| /// A simple filter mechanism whereby you can tell this token stream |
| /// to force all tokens of type TType to be on Channel. |
| /// </summary> |
| /// |
| /// <remarks> |
| /// For example, |
| /// when interpreting, we cannot exec actions so we need to tell |
| /// the stream to force all WS and NEWLINE to be a different, ignored |
| /// channel. |
| /// </remarks> |
| procedure SetTokenTypeChannel(const TType, Channel: Integer); |
| |
| procedure DiscardTokenType(const TType: Integer); |
| |
| procedure DiscardOffChannelTokens(const Discard: Boolean); |
| |
| function GetTokens: IList<IToken>; overload; |
| function GetTokens(const Start, Stop: Integer): IList<IToken>; overload; |
| |
| /// <summary>Given a start and stop index, return a List of all tokens in |
| /// the token type BitSet. Return null if no tokens were found. This |
| /// method looks at both on and off channel tokens. |
| /// </summary> |
| function GetTokens(const Start, Stop: Integer; |
| const Types: IBitSet): IList<IToken>; overload; |
| |
| function GetTokens(const Start, Stop: Integer; |
| const Types: IList<Integer>): IList<IToken>; overload; |
| |
| function GetTokens(const Start, Stop, |
| TokenType: Integer): IList<IToken>; overload; |
| |
| procedure Reset; |
| end; |
| |
| IDFA = interface; |
| |
| TSpecialStateTransitionHandler = function(const DFA: IDFA; S: Integer; |
| const Input: IIntStream): Integer of Object; |
| |
| /// <summary> |
| /// A DFA implemented as a set of transition tables. |
| /// </summary> |
| /// <remarks> |
| /// <para> |
| /// Any state that has a semantic predicate edge is special; those states are |
| /// generated with if-then-else structures in a SpecialStateTransition() |
| /// which is generated by cyclicDFA template. |
| /// </para> |
| /// <para> |
| /// There are at most 32767 states (16-bit signed short). Could get away with byte |
| /// sometimes but would have to generate different types and the simulation code too. |
| /// </para> |
| /// <para> |
| /// As a point of reference, the Tokens rule DFA for the lexer in the Java grammar |
| /// sample has approximately 326 states. |
| /// </para> |
| /// </remarks> |
| IDFA = interface(IANTLRInterface) |
| ['{36312B59-B718-48EF-A0EC-4529DE70F4C2}'] |
| { Property accessors } |
| function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; |
| procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler); |
| |
| { Methods } |
| |
| /// <summary> |
| /// From the input stream, predict what alternative will succeed using this |
| /// DFA (representing the covering regular approximation to the underlying CFL). |
| /// </summary> |
| /// <param name="Input">Input stream</param> |
| /// <returns>Return an alternative number 1..N. Throw an exception upon error.</returns> |
| function Predict(const Input: IIntStream): Integer; |
| |
| /// <summary> |
| /// A hook for debugging interface |
| /// </summary> |
| /// <param name="NVAE"></param> |
| procedure Error(const NVAE: ENoViableAltException); |
| |
| function SpecialStateTransition(const S: Integer; const Input: IIntStream): Integer; |
| |
| function Description: String; |
| |
| function SpecialTransition(const State, Symbol: Integer): Integer; |
| |
| { Properties } |
| property SpecialStateTransitionHandler: TSpecialStateTransitionHandler read GetSpecialStateTransitionHandler write SetSpecialStateTransitionHandler; |
| end; |
| |
| /// <summary> |
| /// A lexer is recognizer that draws input symbols from a character stream. |
| /// lexer grammars result in a subclass of this object. A Lexer object |
| /// uses simplified Match() and error recovery mechanisms in the interest |
| /// of speed. |
| /// </summary> |
| ILexer = interface(IBaseRecognizer) |
| ['{331AAB49-E7CD-40E7-AEF5-427F7D6577AD}'] |
| { Property accessors } |
| function GetCharStream: ICharStream; |
| procedure SetCharStream(const Value: ICharStream); |
| function GetLine: Integer; |
| function GetCharPositionInLine: Integer; |
| function GetCharIndex: Integer; |
| function GetText: String; |
| procedure SetText(const Value: String); |
| |
| { Methods } |
| |
| /// <summary> |
| /// Return a token from this source; i.e., Match a token on the char stream. |
| /// </summary> |
| function NextToken: IToken; |
| |
| /// <summary> |
| /// Instruct the lexer to skip creating a token for current lexer rule and |
| /// look for another token. NextToken() knows to keep looking when a lexer |
| /// rule finishes with token set to SKIP_TOKEN. Recall that if token==null |
| /// at end of any token rule, it creates one for you and emits it. |
| /// </summary> |
| procedure Skip; |
| |
| /// <summary>This is the lexer entry point that sets instance var 'token' </summary> |
| procedure DoTokens; |
| |
| /// <summary> |
| /// Currently does not support multiple emits per nextToken invocation |
| /// for efficiency reasons. Subclass and override this method and |
| /// NextToken (to push tokens into a list and pull from that list rather |
| /// than a single variable as this implementation does). |
| /// </summary> |
| procedure Emit(const Token: IToken); overload; |
| |
| /// <summary> |
| /// The standard method called to automatically emit a token at the |
| /// outermost lexical rule. The token object should point into the |
| /// char buffer start..stop. If there is a text override in 'text', |
| /// use that to set the token's text. |
| /// </summary> |
| /// <remarks><para>Override this method to emit custom Token objects.</para> |
| /// <para>If you are building trees, then you should also override |
| /// Parser or TreeParser.GetMissingSymbol().</para> |
| ///</remarks> |
| function Emit: IToken; overload; |
| |
| procedure Match(const S: String); overload; |
| procedure Match(const C: Integer); overload; |
| procedure MatchAny; |
| procedure MatchRange(const A, B: Integer); |
| |
| /// <summary> |
| /// Lexers can normally Match any char in it's vocabulary after matching |
| /// a token, so do the easy thing and just kill a character and hope |
| /// it all works out. You can instead use the rule invocation stack |
| /// to do sophisticated error recovery if you are in a Fragment rule. |
| /// </summary> |
| procedure Recover(const RE: ERecognitionException); |
| |
| function GetCharErrorDisplay(const C: Integer): String; |
| |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer); |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer); |
| |
| { Properties } |
| |
| /// <summary>Set the char stream and reset the lexer </summary> |
| property CharStream: ICharStream read GetCharStream write SetCharStream; |
| property Line: Integer read GetLine; |
| property CharPositionInLine: Integer read GetCharPositionInLine; |
| |
| /// <summary>What is the index of the current character of lookahead? </summary> |
| property CharIndex: Integer read GetCharIndex; |
| |
| /// <summary> |
| /// Gets or sets the 'lexeme' for the current token. |
| /// </summary> |
| /// <remarks> |
| /// <para> |
| /// The getter returns the text matched so far for the current token or any |
| /// text override. |
| /// </para> |
| /// <para> |
| /// The setter sets the complete text of this token. It overrides/wipes any |
| /// previous changes to the text. |
| /// </para> |
| /// </remarks> |
| property Text: String read GetText write SetText; |
| end; |
| |
| /// <summary>A parser for TokenStreams. Parser grammars result in a subclass |
| /// of this. |
| /// </summary> |
| IParser = interface(IBaseRecognizer) |
| ['{7420879A-5D1F-43CA-BD49-2264D7514501}'] |
| { Property accessors } |
| function GetTokenStream: ITokenStream; |
| procedure SetTokenStream(const Value: ITokenStream); |
| |
| { Methods } |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer); |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer); |
| |
| { Properties } |
| |
| /// <summary>Set the token stream and reset the parser </summary> |
| property TokenStream: ITokenStream read GetTokenStream write SetTokenStream; |
| end; |
| |
| /// <summary> |
| /// Rules can return start/stop info as well as possible trees and templates |
| /// </summary> |
| IRuleReturnScope = interface(IANTLRInterface) |
| ['{E9870056-BF6D-4CB2-B71C-10B80797C0B4}'] |
| { Property accessors } |
| function GetStart: IANTLRInterface; |
| procedure SetStart(const Value: IANTLRInterface); |
| function GetStop: IANTLRInterface; |
| procedure SetStop(const Value: IANTLRInterface); |
| function GetTree: IANTLRInterface; |
| procedure SetTree(const Value: IANTLRInterface); |
| function GetTemplate: IANTLRInterface; |
| |
| { Properties } |
| |
| /// <summary>Return the start token or tree </summary> |
| property Start: IANTLRInterface read GetStart write SetStart; |
| |
| /// <summary>Return the stop token or tree </summary> |
| property Stop: IANTLRInterface read GetStop write SetStop; |
| |
| /// <summary>Has a value potentially if output=AST; </summary> |
| property Tree: IANTLRInterface read GetTree write SetTree; |
| |
| /// <summary> |
| /// Has a value potentially if output=template; |
| /// Don't use StringTemplate type to avoid dependency on ST assembly |
| /// </summary> |
| property Template: IANTLRInterface read GetTemplate; |
| end; |
| |
| /// <summary> |
| /// Rules that return more than a single value must return an object |
| /// containing all the values. Besides the properties defined in |
| /// RuleLabelScope.PredefinedRulePropertiesScope there may be user-defined |
| /// return values. This class simply defines the minimum properties that |
| /// are always defined and methods to access the others that might be |
| /// available depending on output option such as template and tree. |
| /// |
| /// Note text is not an actual property of the return value, it is computed |
| /// from start and stop using the input stream's ToString() method. I |
| /// could add a ctor to this so that we can pass in and store the input |
| /// stream, but I'm not sure we want to do that. It would seem to be undefined |
| /// to get the .text property anyway if the rule matches tokens from multiple |
| /// input streams. |
| /// |
| /// I do not use getters for fields of objects that are used simply to |
| /// group values such as this aggregate. |
| /// </summary> |
| IParserRuleReturnScope = interface(IRuleReturnScope) |
| ['{9FB62050-E23B-4FE4-87D5-2C1EE67AEC3E}'] |
| end; |
| |
| /// <summary>Useful for dumping out the input stream after doing some |
| /// augmentation or other manipulations. |
| /// </summary> |
| /// |
| /// <remarks> |
| /// You can insert stuff, Replace, and delete chunks. Note that the |
| /// operations are done lazily--only if you convert the buffer to a |
| /// String. This is very efficient because you are not moving data around |
| /// all the time. As the buffer of tokens is converted to strings, the |
| /// ToString() method(s) check to see if there is an operation at the |
| /// current index. If so, the operation is done and then normal String |
| /// rendering continues on the buffer. This is like having multiple Turing |
| /// machine instruction streams (programs) operating on a single input tape. :) |
| /// |
| /// Since the operations are done lazily at ToString-time, operations do not |
| /// screw up the token index values. That is, an insert operation at token |
| /// index I does not change the index values for tokens I+1..N-1. |
| /// |
| /// Because operations never actually alter the buffer, you may always get |
| /// the original token stream back without undoing anything. Since |
| /// the instructions are queued up, you can easily simulate transactions and |
| /// roll back any changes if there is an error just by removing instructions. |
| /// For example, |
| /// |
| /// var |
| /// Input: ICharStream; |
| /// Lex: ILexer; |
| /// Tokens: ITokenRewriteStream; |
| /// Parser: IParser; |
| /// Input := TANTLRFileStream.Create('input'); |
| /// Lex := TLexer.Create(Input); |
| /// Tokens := TTokenRewriteStream.Create(Lex); |
| /// Parser := TParser.Create(tokens); |
| /// Parser.startRule(); |
| /// |
| /// Then in the rules, you can execute |
| /// var |
| /// t,u: IToken; |
| /// ... |
| /// Input.InsertAfter(t, 'text to put after t'); |
| /// Input.InsertAfter(u, 'text after u'); |
| /// WriteLn(Tokens.ToString()); |
| /// |
| /// Actually, you have to cast the 'input' to a TokenRewriteStream. :( |
| /// |
| /// You can also have multiple "instruction streams" and get multiple |
| /// rewrites from a single pass over the input. Just name the instruction |
| /// streams and use that name again when printing the buffer. This could be |
| /// useful for generating a C file and also its header file--all from the |
| /// same buffer: |
| /// |
| /// Tokens.InsertAfter('pass1', t, 'text to put after t'); |
| /// Tokens.InsertAfter('pass2', u, 'text after u'); |
| /// WriteLn(Tokens.ToString('pass1')); |
| /// WriteLn(Tokens.ToString('pass2')); |
| /// |
| /// If you don't use named rewrite streams, a "default" stream is used as |
| /// the first example shows. |
| /// </remarks> |
| ITokenRewriteStream = interface(ICommonTokenStream) |
| ['{7B49CBB6-9395-4781-B616-F201889EEA13}'] |
| { Methods } |
| procedure Rollback(const InstructionIndex: Integer); overload; |
| |
| /// <summary>Rollback the instruction stream for a program so that |
| /// the indicated instruction (via instructionIndex) is no |
| /// longer in the stream. UNTESTED! |
| /// </summary> |
| procedure Rollback(const ProgramName: String; |
| const InstructionIndex: Integer); overload; |
| |
| procedure DeleteProgram; overload; |
| |
| /// <summary>Reset the program so that no instructions exist </summary> |
| procedure DeleteProgram(const ProgramName: String); overload; |
| |
| procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; |
| procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; |
| procedure InsertAfter(const ProgramName: String; const T: IToken; |
| const Text: IANTLRInterface); overload; |
| procedure InsertAfter(const ProgramName: String; const Index: Integer; |
| const Text: IANTLRInterface); overload; |
| procedure InsertAfter(const T: IToken; const Text: String); overload; |
| procedure InsertAfter(const Index: Integer; const Text: String); overload; |
| procedure InsertAfter(const ProgramName: String; const T: IToken; |
| const Text: String); overload; |
| procedure InsertAfter(const ProgramName: String; const Index: Integer; |
| const Text: String); overload; |
| |
| procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; |
| procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; |
| procedure InsertBefore(const ProgramName: String; const T: IToken; |
| const Text: IANTLRInterface); overload; |
| procedure InsertBefore(const ProgramName: String; const Index: Integer; |
| const Text: IANTLRInterface); overload; |
| procedure InsertBefore(const T: IToken; const Text: String); overload; |
| procedure InsertBefore(const Index: Integer; const Text: String); overload; |
| procedure InsertBefore(const ProgramName: String; const T: IToken; |
| const Text: String); overload; |
| procedure InsertBefore(const ProgramName: String; const Index: Integer; |
| const Text: String); overload; |
| |
| procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; |
| procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; |
| procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; |
| procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: Integer; |
| const Text: IANTLRInterface); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: IToken; |
| const Text: IANTLRInterface); overload; |
| procedure Replace(const Index: Integer; const Text: String); overload; |
| procedure Replace(const Start, Stop: Integer; const Text: String); overload; |
| procedure Replace(const IndexT: IToken; const Text: String); overload; |
| procedure Replace(const Start, Stop: IToken; const Text: String); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: Integer; |
| const Text: String); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: IToken; |
| const Text: String); overload; |
| |
| procedure Delete(const Index: Integer); overload; |
| procedure Delete(const Start, Stop: Integer); overload; |
| procedure Delete(const IndexT: IToken); overload; |
| procedure Delete(const Start, Stop: IToken); overload; |
| procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; |
| procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; |
| |
| function GetLastRewriteTokenIndex: Integer; |
| |
| function ToOriginalString: String; overload; |
| function ToOriginalString(const Start, Stop: Integer): String; overload; |
| |
| function ToString(const ProgramName: String): String; overload; |
| function ToString(const ProgramName: String; |
| const Start, Stop: Integer): String; overload; |
| |
| function ToDebugString: String; overload; |
| function ToDebugString(const Start, Stop: Integer): String; overload; |
| end; |
| |
| /// <summary>The root of the ANTLR exception hierarchy.</summary> |
| /// <remarks> |
| /// To avoid English-only error messages and to generally make things |
| /// as flexible as possible, these exceptions are not created with strings, |
| /// but rather the information necessary to generate an error. Then |
| /// the various reporting methods in Parser and Lexer can be overridden |
| /// to generate a localized error message. For example, MismatchedToken |
| /// exceptions are built with the expected token type. |
| /// So, don't expect getMessage() to return anything. |
| /// |
| /// You can access the stack trace, which means that you can compute the |
| /// complete trace of rules from the start symbol. This gives you considerable |
| /// context information with which to generate useful error messages. |
| /// |
| /// ANTLR generates code that throws exceptions upon recognition error and |
| /// also generates code to catch these exceptions in each rule. If you |
| /// want to quit upon first error, you can turn off the automatic error |
| /// handling mechanism using rulecatch action, but you still need to |
| /// override methods mismatch and recoverFromMismatchSet. |
| /// |
| /// In general, the recognition exceptions can track where in a grammar a |
| /// problem occurred and/or what was the expected input. While the parser |
| /// knows its state (such as current input symbol and line info) that |
| /// state can change before the exception is reported so current token index |
| /// is computed and stored at exception time. From this info, you can |
| /// perhaps print an entire line of input not just a single token, for example. |
| /// Better to just say the recognizer had a problem and then let the parser |
| /// figure out a fancy report. |
| /// </remarks> |
| ERecognitionException = class(Exception) |
| strict private |
| FApproximateLineInfo: Boolean; |
| strict protected |
| /// <summary>What input stream did the error occur in? </summary> |
| FInput: IIntStream; |
| |
| /// <summary> |
| /// What is index of token/char were we looking at when the error occurred? |
| /// </summary> |
| FIndex: Integer; |
| |
| /// <summary> |
| /// The current Token when an error occurred. Since not all streams |
| /// can retrieve the ith Token, we have to track the Token object. |
| /// </summary> |
| FToken: IToken; |
| |
| /// <summary>[Tree parser] Node with the problem.</summary> |
| FNode: IANTLRInterface; |
| |
| /// <summary>The current char when an error occurred. For lexers. </summary> |
| FC: Integer; |
| |
| /// <summary>Track the line at which the error occurred in case this is |
| /// generated from a lexer. We need to track this since the |
| /// unexpected char doesn't carry the line info. |
| /// </summary> |
| FLine: Integer; |
| FCharPositionInLine: Integer; |
| strict protected |
| procedure ExtractInformationFromTreeNodeStream(const Input: IIntStream); |
| function GetUnexpectedType: Integer; virtual; |
| public |
| /// <summary>Used for remote debugger deserialization </summary> |
| constructor Create; overload; |
| constructor Create(const AMessage: String); overload; |
| constructor Create(const AInput: IIntStream); overload; |
| constructor Create(const AMessage: String; const AInput: IIntStream); overload; |
| |
| /// <summary> |
| /// If you are parsing a tree node stream, you will encounter some |
| /// imaginary nodes w/o line/col info. We now search backwards looking |
| /// for most recent token with line/col info, but notify getErrorHeader() |
| /// that info is approximate. |
| /// </summary> |
| property ApproximateLineInfo: Boolean read FApproximateLineInfo write FApproximateLineInfo; |
| |
| /// <summary> |
| /// Returns the current Token when the error occurred (for parsers |
| /// although a tree parser might also set the token) |
| /// </summary> |
| property Token: IToken read FToken write FToken; |
| |
| /// <summary> |
| /// Returns the [tree parser] node where the error occured (for tree parsers). |
| /// </summary> |
| property Node: IANTLRInterface read FNode write FNode; |
| |
| /// <summary> |
| /// Returns the line at which the error occurred (for lexers) |
| /// </summary> |
| property Line: Integer read FLine write FLine; |
| |
| /// <summary> |
| /// Returns the character position in the line when the error |
| /// occurred (for lexers) |
| /// </summary> |
| property CharPositionInLine: Integer read FCharPositionInLine write FCharPositionInLine; |
| |
| /// <summary>Returns the input stream in which the error occurred</summary> |
| property Input: IIntStream read FInput write FInput; |
| |
| /// <summary> |
| /// Returns the token type or char of the unexpected input element |
| /// </summary> |
| property UnexpectedType: Integer read GetUnexpectedType; |
| |
| /// <summary> |
| /// Returns the current char when the error occurred (for lexers) |
| /// </summary> |
| property Character: Integer read FC write FC; |
| |
| /// <summary> |
| /// Returns the token/char index in the stream when the error occurred |
| /// </summary> |
| property Index: Integer read FIndex write FIndex; |
| end; |
| |
| /// <summary> |
| /// A mismatched char or Token or tree node. |
| /// </summary> |
| EMismatchedTokenException = class(ERecognitionException) |
| strict private |
| FExpecting: Integer; |
| public |
| constructor Create(const AExpecting: Integer; const AInput: IIntStream); |
| |
| function ToString: String; override; |
| |
| property Expecting: Integer read FExpecting write FExpecting; |
| end; |
| |
| EUnwantedTokenException = class(EMismatchedTokenException) |
| strict private |
| function GetUnexpectedToken: IToken; |
| public |
| property UnexpectedToken: IToken read GetUnexpectedToken; |
| |
| function ToString: String; override; |
| end; |
| |
| /// <summary> |
| /// We were expecting a token but it's not found. The current token |
| /// is actually what we wanted next. Used for tree node errors too. |
| /// </summary> |
| EMissingTokenException = class(EMismatchedTokenException) |
| strict private |
| FInserted: IANTLRInterface; |
| function GetMissingType: Integer; |
| public |
| constructor Create(const AExpecting: Integer; const AInput: IIntStream; |
| const AInserted: IANTLRInterface); |
| |
| function ToString: String; override; |
| |
| property MissingType: Integer read GetMissingType; |
| property Inserted: IANTLRInterface read FInserted write FInserted; |
| end; |
| |
| EMismatchedTreeNodeException = class(ERecognitionException) |
| strict private |
| FExpecting: Integer; |
| public |
| constructor Create(const AExpecting: Integer; const AInput: IIntStream); |
| |
| function ToString: String; override; |
| |
| property Expecting: Integer read FExpecting write FExpecting; |
| end; |
| |
| ENoViableAltException = class(ERecognitionException) |
| strict private |
| FGrammarDecisionDescription: String; |
| FDecisionNumber: Integer; |
| FStateNumber: Integer; |
| public |
| constructor Create(const AGrammarDecisionDescription: String; |
| const ADecisionNumber, AStateNumber: Integer; const AInput: IIntStream); |
| |
| function ToString: String; override; |
| |
| property GrammarDecisionDescription: String read FGrammarDecisionDescription; |
| property DecisionNumber: Integer read FDecisionNumber; |
| property StateNumber: Integer read FStateNumber; |
| end; |
| |
| EEarlyExitException = class(ERecognitionException) |
| strict private |
| FDecisionNumber: Integer; |
| public |
| constructor Create(const ADecisionNumber: Integer; const AInput: IIntStream); |
| |
| property DecisionNumber: Integer read FDecisionNumber; |
| end; |
| |
| EMismatchedSetException = class(ERecognitionException) |
| strict private |
| FExpecting: IBitSet; |
| public |
| constructor Create(const AExpecting: IBitSet; const AInput: IIntStream); |
| |
| function ToString: String; override; |
| |
| property Expecting: IBitSet read FExpecting write FExpecting; |
| end; |
| |
| EMismatchedNotSetException = class(EMismatchedSetException) |
| |
| public |
| function ToString: String; override; |
| end; |
| |
| EFailedPredicateException = class(ERecognitionException) |
| strict private |
| FRuleName: String; |
| FPredicateText: String; |
| public |
| constructor Create(const AInput: IIntStream; const ARuleName, |
| APredicateText: String); |
| |
| function ToString: String; override; |
| |
| property RuleName: String read FRuleName write FRuleName; |
| property PredicateText: String read FPredicateText write FPredicateText; |
| end; |
| |
| EMismatchedRangeException = class(ERecognitionException) |
| strict private |
| FA: Integer; |
| FB: Integer; |
| public |
| constructor Create(const AA, AB: Integer; const AInput: IIntStream); |
| |
| function ToString: String; override; |
| |
| property A: Integer read FA write FA; |
| property B: Integer read FB write FB; |
| end; |
| |
| type |
| TCharStreamState = class(TANTLRObject, ICharStreamState) |
| strict private |
| FP: Integer; |
| FLine: Integer; |
| FCharPositionInLine: Integer; |
| protected |
| { ICharStreamState } |
| function GetP: Integer; |
| procedure SetP(const Value: Integer); |
| function GetLine: Integer; |
| procedure SetLine(const Value: Integer); |
| function GetCharPositionInLine: Integer; |
| procedure SetCharPositionInLine(const Value: Integer); |
| end; |
| |
| type |
| TANTLRStringStream = class(TANTLRObject, IANTLRStringStream, ICharStream) |
| private |
| FData: PChar; |
| FOwnsData: Boolean; |
| |
| /// <summary>How many characters are actually in the buffer?</summary> |
| FN: Integer; |
| |
| /// <summary>Current line number within the input (1..n )</summary> |
| FLine: Integer; |
| |
| /// <summary>Index in our array for the next char (0..n-1)</summary> |
| FP: Integer; |
| |
| /// <summary> |
| /// The index of the character relative to the beginning of the |
| /// line (0..n-1) |
| /// </summary> |
| FCharPositionInLine: Integer; |
| |
| /// <summary> |
| /// Tracks the depth of nested <see cref="IIntStream.Mark"/> calls |
| /// </summary> |
| FMarkDepth: Integer; |
| |
| /// <summary> |
| /// A list of CharStreamState objects that tracks the stream state |
| /// (i.e. line, charPositionInLine, and p) that can change as you |
| /// move through the input stream. Indexed from 1..markDepth. |
| /// A null is kept @ index 0. Create upon first call to Mark(). |
| /// </summary> |
| FMarkers: IList<ICharStreamState>; |
| |
| /// <summary> |
| /// Track the last Mark() call result value for use in Rewind(). |
| /// </summary> |
| FLastMarker: Integer; |
| /// <summary> |
| /// What is name or source of this char stream? |
| /// </summary> |
| FName: String; |
| protected |
| { IIntStream } |
| function GetSourceName: String; virtual; |
| |
| procedure Consume; virtual; |
| function LA(I: Integer): Integer; virtual; |
| function LAChar(I: Integer): Char; |
| function Index: Integer; |
| function Size: Integer; |
| function Mark: Integer; virtual; |
| procedure Rewind(const Marker: Integer); overload; virtual; |
| procedure Rewind; overload; virtual; |
| procedure Release(const Marker: Integer); virtual; |
| procedure Seek(const Index: Integer); virtual; |
| |
| property SourceName: String read GetSourceName write FName; |
| protected |
| { ICharStream } |
| function GetLine: Integer; virtual; |
| procedure SetLine(const Value: Integer); virtual; |
| function GetCharPositionInLine: Integer; virtual; |
| procedure SetCharPositionInLine(const Value: Integer); virtual; |
| function LT(const I: Integer): Integer; virtual; |
| function Substring(const Start, Stop: Integer): String; virtual; |
| protected |
| { IANTLRStringStream } |
| procedure Reset; virtual; |
| public |
| constructor Create; overload; |
| |
| /// <summary> |
| /// Initializes a new instance of the ANTLRStringStream class for the |
| /// specified string. This copies data from the string to a local |
| /// character array |
| /// </summary> |
| constructor Create(const AInput: String); overload; |
| |
| /// <summary> |
| /// Initializes a new instance of the ANTLRStringStream class for the |
| /// specified character array. This is the preferred constructor as |
| /// no data is copied |
| /// </summary> |
| constructor Create(const AData: PChar; |
| const ANumberOfActualCharsInArray: Integer); overload; |
| |
| destructor Destroy; override; |
| end; |
| |
| TANTLRFileStream = class(TANTLRStringStream, IANTLRFileStream) |
| strict private |
| /// <summary>Fully qualified name of the stream's underlying file</summary> |
| FFileName: String; |
| protected |
| { IIntStream } |
| function GetSourceName: String; override; |
| protected |
| { IANTLRFileStream } |
| |
| procedure Load(const FileName: String; const Encoding: TEncoding); virtual; |
| public |
| /// <summary> |
| /// Initializes a new instance of the ANTLRFileStream class for the |
| /// specified file name |
| /// </summary> |
| constructor Create(const AFileName: String); overload; |
| |
| /// <summary> |
| /// Initializes a new instance of the ANTLRFileStream class for the |
| /// specified file name and encoding |
| /// </summary> |
| constructor Create(const AFileName: String; const AEncoding: TEncoding); overload; |
| end; |
| |
| TBitSet = class(TANTLRObject, IBitSet, ICloneable) |
| strict private |
| const |
| BITS = 64; // number of bits / ulong |
| LOG_BITS = 6; // 2 shl 6 = 64 |
| |
| ///<summary> We will often need to do a mod operator (i mod nbits). |
| /// Its turns out that, for powers of two, this mod operation is |
| /// same as <![CDATA[(I and (nbits-1))]]>. Since mod is slow, we use a precomputed |
| /// mod mask to do the mod instead. |
| /// </summary> |
| MOD_MASK = BITS - 1; |
| strict private |
| /// <summary>The actual data bits </summary> |
| FBits: TUInt64Array; |
| strict private |
| class function WordNumber(const Bit: Integer): Integer; static; |
| class function BitMask(const BitNumber: Integer): UInt64; static; |
| class function NumWordsToHold(const El: Integer): Integer; static; |
| protected |
| { ICloneable } |
| function Clone: IANTLRInterface; virtual; |
| protected |
| { IBitSet } |
| function GetIsNil: Boolean; virtual; |
| function BitSetOr(const A: IBitSet): IBitSet; virtual; |
| procedure Add(const El: Integer); virtual; |
| procedure GrowToInclude(const Bit: Integer); virtual; |
| procedure OrInPlace(const A: IBitSet); virtual; |
| function Size: Integer; virtual; |
| function Member(const El: Integer): Boolean; virtual; |
| procedure Remove(const El: Integer); virtual; |
| function NumBits: Integer; virtual; |
| function LengthInLongWords: Integer; virtual; |
| function ToArray: TIntegerArray; virtual; |
| function ToPackedArray: TUInt64Array; virtual; |
| function ToString(const TokenNames: TStringArray): String; reintroduce; overload; virtual; |
| public |
| /// <summary>Construct a bitset of size one word (64 bits) </summary> |
| constructor Create; overload; |
| |
| /// <summary>Construction from a static array of ulongs </summary> |
| constructor Create(const ABits: array of UInt64); overload; |
| |
| /// <summary>Construction from a list of integers </summary> |
| constructor Create(const AItems: IList<Integer>); overload; |
| |
| /// <summary>Construct a bitset given the size</summary> |
| /// <param name="nbits">The size of the bitset in bits</param> |
| constructor Create(const ANBits: Integer); overload; |
| |
| class function BitSetOf(const El: Integer): IBitSet; overload; static; |
| class function BitSetOf(const A, B: Integer): IBitSet; overload; static; |
| class function BitSetOf(const A, B, C: Integer): IBitSet; overload; static; |
| class function BitSetOf(const A, B, C, D: Integer): IBitSet; overload; static; |
| |
| function ToString: String; overload; override; |
| function Equals(Obj: TObject): Boolean; override; |
| end; |
| |
| TRecognizerSharedState = class(TANTLRObject, IRecognizerSharedState) |
| strict private |
| FFollowing: TBitSetArray; |
| FFollowingStackPointer: Integer; |
| FErrorRecovery: Boolean; |
| FLastErrorIndex: Integer; |
| FFailed: Boolean; |
| FSyntaxErrors: Integer; |
| FBacktracking: Integer; |
| FRuleMemo: TDictionaryArray<Integer, Integer>; |
| FToken: IToken; |
| FTokenStartCharIndex: Integer; |
| FTokenStartLine: Integer; |
| FTokenStartCharPositionInLine: Integer; |
| FChannel: Integer; |
| FTokenType: Integer; |
| FText: String; |
| protected |
| { IRecognizerSharedState } |
| function GetFollowing: TBitSetArray; |
| procedure SetFollowing(const Value: TBitSetArray); |
| function GetFollowingStackPointer: Integer; |
| procedure SetFollowingStackPointer(const Value: Integer); |
| function GetErrorRecovery: Boolean; |
| procedure SetErrorRecovery(const Value: Boolean); |
| function GetLastErrorIndex: Integer; |
| procedure SetLastErrorIndex(const Value: Integer); |
| function GetFailed: Boolean; |
| procedure SetFailed(const Value: Boolean); |
| function GetSyntaxErrors: Integer; |
| procedure SetSyntaxErrors(const Value: Integer); |
| function GetBacktracking: Integer; |
| procedure SetBacktracking(const Value: Integer); |
| function GetRuleMemo: TDictionaryArray<Integer, Integer>; |
| function GetRuleMemoCount: Integer; |
| procedure SetRuleMemoCount(const Value: Integer); |
| function GetToken: IToken; |
| procedure SetToken(const Value: IToken); |
| function GetTokenStartCharIndex: Integer; |
| procedure SetTokenStartCharIndex(const Value: Integer); |
| function GetTokenStartLine: Integer; |
| procedure SetTokenStartLine(const Value: Integer); |
| function GetTokenStartCharPositionInLine: Integer; |
| procedure SetTokenStartCharPositionInLine(const Value: Integer); |
| function GetChannel: Integer; |
| procedure SetChannel(const Value: Integer); |
| function GetTokenType: Integer; |
| procedure SetTokenType(const Value: Integer); |
| function GetText: String; |
| procedure SetText(const Value: String); |
| public |
| constructor Create; |
| end; |
| |
| TCommonToken = class(TANTLRObject, ICommonToken, IToken) |
| strict protected |
| FTokenType: Integer; |
| FLine: Integer; |
| FCharPositionInLine: Integer; |
| FChannel: Integer; |
| FInput: ICharStream; |
| |
| /// <summary>We need to be able to change the text once in a while. If |
| /// this is non-null, then getText should return this. Note that |
| /// start/stop are not affected by changing this. |
| /// </summary> |
| FText: String; |
| |
| /// <summary>What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary> |
| FIndex: Integer; |
| |
| /// <summary>The char position into the input buffer where this token starts </summary> |
| FStart: Integer; |
| |
| /// <summary>The char position into the input buffer where this token stops </summary> |
| FStop: Integer; |
| protected |
| { IToken } |
| function GetTokenType: Integer; virtual; |
| procedure SetTokenType(const Value: Integer); virtual; |
| function GetLine: Integer; virtual; |
| procedure SetLine(const Value: Integer); virtual; |
| function GetCharPositionInLine: Integer; virtual; |
| procedure SetCharPositionInLine(const Value: Integer); virtual; |
| function GetChannel: Integer; virtual; |
| procedure SetChannel(const Value: Integer); virtual; |
| function GetTokenIndex: Integer; virtual; |
| procedure SetTokenIndex(const Value: Integer); virtual; |
| function GetText: String; virtual; |
| procedure SetText(const Value: String); virtual; |
| protected |
| { ICommonToken } |
| function GetStartIndex: Integer; |
| procedure SetStartIndex(const Value: Integer); |
| function GetStopIndex: Integer; |
| procedure SetStopIndex(const Value: Integer); |
| function GetInputStream: ICharStream; |
| procedure SetInputStream(const Value: ICharStream); |
| protected |
| constructor Create; overload; |
| public |
| constructor Create(const ATokenType: Integer); overload; |
| constructor Create(const AInput: ICharStream; const ATokenType, AChannel, |
| AStart, AStop: Integer); overload; |
| constructor Create(const ATokenType: Integer; const AText: String); overload; |
| constructor Create(const AOldToken: IToken); overload; |
| |
| function ToString: String; override; |
| end; |
| |
| TClassicToken = class(TANTLRObject, IClassicToken, IToken) |
| strict private |
| FText: String; |
| FTokenType: Integer; |
| FLine: Integer; |
| FCharPositionInLine: Integer; |
| FChannel: Integer; |
| |
| /// <summary>What token number is this from 0..n-1 tokens </summary> |
| FIndex: Integer; |
| protected |
| { IClassicToken } |
| function GetTokenType: Integer; virtual; |
| procedure SetTokenType(const Value: Integer); virtual; |
| function GetLine: Integer; virtual; |
| procedure SetLine(const Value: Integer); virtual; |
| function GetCharPositionInLine: Integer; virtual; |
| procedure SetCharPositionInLine(const Value: Integer); virtual; |
| function GetChannel: Integer; virtual; |
| procedure SetChannel(const Value: Integer); virtual; |
| function GetTokenIndex: Integer; virtual; |
| procedure SetTokenIndex(const Value: Integer); virtual; |
| function GetText: String; virtual; |
| procedure SetText(const Value: String); virtual; |
| function GetInputStream: ICharStream; virtual; |
| procedure SetInputStream(const Value: ICharStream); virtual; |
| public |
| constructor Create(const ATokenType: Integer); overload; |
| constructor Create(const AOldToken: IToken); overload; |
| constructor Create(const ATokenType: Integer; const AText: String); overload; |
| constructor Create(const ATokenType: Integer; const AText: String; |
| const AChannel: Integer); overload; |
| |
| function ToString: String; override; |
| end; |
| |
| TToken = class sealed |
| public |
| const |
| EOR_TOKEN_TYPE = 1; |
| |
| /// <summary>imaginary tree navigation type; traverse "get child" link </summary> |
| DOWN = 2; |
| |
| /// <summary>imaginary tree navigation type; finish with a child list </summary> |
| UP = 3; |
| |
| MIN_TOKEN_TYPE = UP + 1; |
| EOF = Integer(cscEOF); |
| INVALID_TOKEN_TYPE = 0; |
| |
| /// <summary> |
| /// All tokens go to the parser (unless skip() is called in that rule) |
| /// on a particular "channel". The parser tunes to a particular channel |
| /// so that whitespace etc... can go to the parser on a "hidden" channel. |
| /// </summary> |
| DEFAULT_CHANNEL = 0; |
| |
| /// <summary> |
| /// Anything on different channel than DEFAULT_CHANNEL is not parsed by parser. |
| /// </summary> |
| HIDDEN_CHANNEL = 99; |
| public |
| class var |
| EOF_TOKEN: IToken; |
| INVALID_TOKEN: IToken; |
| /// <summary> |
| /// In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR |
| /// will avoid creating a token for this symbol and try to fetch another. |
| /// </summary> |
| SKIP_TOKEN: IToken; |
| private |
| class procedure Initialize; static; |
| end; |
| |
| /// <summary> |
| /// Global constants |
| /// </summary> |
| TConstants = class sealed |
| public |
| const |
| VERSION = '3.1b1'; |
| |
| // Moved to version 2 for v3.1: added grammar name to enter/exit Rule |
| DEBUG_PROTOCOL_VERSION = '2'; |
| |
| ANTLRWORKS_DIR = 'antlrworks'; |
| end; |
| |
| TBaseRecognizer = class abstract(TANTLRObject, IBaseRecognizer) |
| public |
| const |
| MEMO_RULE_FAILED = -2; |
| MEMO_RULE_UNKNOWN = -1; |
| INITIAL_FOLLOW_STACK_SIZE = 100; |
| NEXT_TOKEN_RULE_NAME = 'nextToken'; |
| // copies from Token object for convenience in actions |
| DEFAULT_TOKEN_CHANNEL = TToken.DEFAULT_CHANNEL; |
| HIDDEN = TToken.HIDDEN_CHANNEL; |
| strict protected |
| /// <summary> |
| /// An externalized representation of the - shareable - internal state of |
| /// this lexer, parser or tree parser. |
| /// </summary> |
| /// <remarks> |
| /// The state of a lexer, parser, or tree parser are collected into |
| /// external state objects so that the state can be shared. This sharing |
| /// is needed to have one grammar import others and share same error |
| /// variables and other state variables. It's a kind of explicit multiple |
| /// inheritance via delegation of methods and shared state. |
| /// </remarks> |
| FState: IRecognizerSharedState; |
| |
| property State: IRecognizerSharedState read FState; |
| strict protected |
| /// <summary> |
| /// Match needs to return the current input symbol, which gets put |
| /// into the label for the associated token ref; e.g., x=ID. Token |
| /// and tree parsers need to return different objects. Rather than test |
| /// for input stream type or change the IntStream interface, I use |
| /// a simple method to ask the recognizer to tell me what the current |
| /// input symbol is. |
| /// </summary> |
| /// <remarks>This is ignored for lexers.</remarks> |
| function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; virtual; |
| |
| /// <summary> |
| /// Factor out what to do upon token mismatch so tree parsers can behave |
| /// differently. Override and call MismatchRecover(input, ttype, follow) |
| /// to get single token insertion and deletion. Use this to turn off |
| /// single token insertion and deletion. Override mismatchRecover |
| /// to call this instead. |
| /// </summary> |
| procedure Mismatch(const Input: IIntStream; const TokenType: Integer; |
| const Follow: IBitSet); virtual; |
| |
| /// <summary> |
| /// Attempt to Recover from a single missing or extra token. |
| /// </summary> |
| /// <remarks> |
| /// EXTRA TOKEN |
| /// |
| /// LA(1) is not what we are looking for. If LA(2) has the right token, |
| /// however, then assume LA(1) is some extra spurious token. Delete it |
| /// and LA(2) as if we were doing a normal Match(), which advances the |
| /// input. |
| /// |
| /// MISSING TOKEN |
| /// |
| /// If current token is consistent with what could come after |
| /// ttype then it is ok to "insert" the missing token, else throw |
| /// exception For example, Input "i=(3;" is clearly missing the |
| /// ')'. When the parser returns from the nested call to expr, it |
| /// will have call chain: |
| /// |
| /// stat -> expr -> atom |
| /// |
| /// and it will be trying to Match the ')' at this point in the |
| /// derivation: |
| /// |
| /// => ID '=' '(' INT ')' ('+' atom)* ';' |
| /// ^ |
| /// Match() will see that ';' doesn't Match ')' and report a |
| /// mismatched token error. To Recover, it sees that LA(1)==';' |
| /// is in the set of tokens that can follow the ')' token |
| /// reference in rule atom. It can assume that you forgot the ')'. |
| /// </remarks> |
| function RecoverFromMismatchedToken(const Input: IIntStream; |
| const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; virtual; |
| |
| /// <summary> |
| /// Conjure up a missing token during error recovery. |
| /// </summary> |
| /// <remarks> |
| /// The recognizer attempts to recover from single missing |
| /// symbols. But, actions might refer to that missing symbol. |
| /// For example, x=ID {f($x);}. The action clearly assumes |
| /// that there has been an identifier matched previously and that |
| /// $x points at that token. If that token is missing, but |
| /// the next token in the stream is what we want we assume that |
| /// this token is missing and we keep going. Because we |
| /// have to return some token to replace the missing token, |
| /// we have to conjure one up. This method gives the user control |
| /// over the tokens returned for missing tokens. Mostly, |
| /// you will want to create something special for identifier |
| /// tokens. For literals such as '{' and ',', the default |
| /// action in the parser or tree parser works. It simply creates |
| /// a CommonToken of the appropriate type. The text will be the token. |
| /// If you change what tokens must be created by the lexer, |
| /// override this method to create the appropriate tokens. |
| /// </remarks> |
| function GetMissingSymbol(const Input: IIntStream; |
| const E: ERecognitionException; const ExpectedTokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; virtual; |
| |
| /// <summary> |
| /// Push a rule's follow set using our own hardcoded stack |
| /// </summary> |
| /// <param name="fset"></param> |
| procedure PushFollow(const FSet: IBitSet); |
| |
| /// <summary>Compute the context-sensitive FOLLOW set for current rule. |
| /// This is set of token types that can follow a specific rule |
| /// reference given a specific call chain. You get the set of |
| /// viable tokens that can possibly come next (lookahead depth 1) |
| /// given the current call chain. Contrast this with the |
| /// definition of plain FOLLOW for rule r: |
| /// |
| /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} |
| /// |
| /// where x in T* and alpha, beta in V*; T is set of terminals and |
| /// V is the set of terminals and nonterminals. In other words, |
| /// FOLLOW(r) is the set of all tokens that can possibly follow |
| /// references to r in *any* sentential form (context). At |
| /// runtime, however, we know precisely which context applies as |
| /// we have the call chain. We may compute the exact (rather |
| /// than covering superset) set of following tokens. |
| /// |
| /// For example, consider grammar: |
| /// |
| /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} |
| /// | "return" expr '.' |
| /// ; |
| /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} |
| /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} |
| /// | '(' expr ')' |
| /// ; |
| /// |
| /// The FOLLOW sets are all inclusive whereas context-sensitive |
| /// FOLLOW sets are precisely what could follow a rule reference. |
| /// For input input "i=(3);", here is the derivation: |
| /// |
| /// stat => ID '=' expr ';' |
| /// => ID '=' atom ('+' atom)* ';' |
| /// => ID '=' '(' expr ')' ('+' atom)* ';' |
| /// => ID '=' '(' atom ')' ('+' atom)* ';' |
| /// => ID '=' '(' INT ')' ('+' atom)* ';' |
| /// => ID '=' '(' INT ')' ';' |
| /// |
| /// At the "3" token, you'd have a call chain of |
| /// |
| /// stat -> expr -> atom -> expr -> atom |
| /// |
| /// What can follow that specific nested ref to atom? Exactly ')' |
| /// as you can see by looking at the derivation of this specific |
| /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. |
| /// |
| /// You want the exact viable token set when recovering from a |
| /// token mismatch. Upon token mismatch, if LA(1) is member of |
| /// the viable next token set, then you know there is most likely |
| /// a missing token in the input stream. "Insert" one by just not |
| /// throwing an exception. |
| /// </summary> |
| function ComputeContextSensitiveRuleFOLLOW: IBitSet; virtual; |
| |
| (* Compute the error recovery set for the current rule. During |
| * rule invocation, the parser pushes the set of tokens that can |
| * follow that rule reference on the stack; this amounts to |
| * computing FIRST of what follows the rule reference in the |
| * enclosing rule. This local follow set only includes tokens |
| * from within the rule; i.e., the FIRST computation done by |
| * ANTLR stops at the end of a rule. |
| * |
| * EXAMPLE |
| * |
| * When you find a "no viable alt exception", the input is not |
| * consistent with any of the alternatives for rule r. The best |
| * thing to do is to consume tokens until you see something that |
| * can legally follow a call to r *or* any rule that called r. |
| * You don't want the exact set of viable next tokens because the |
| * input might just be missing a token--you might consume the |
| * rest of the input looking for one of the missing tokens. |
| * |
| * Consider grammar: |
| * |
| * a : '[' b ']' |
| * | '(' b ')' |
| * ; |
| * b : c '^' INT ; |
| * c : ID |
| * | INT |
| * ; |
| * |
| * At each rule invocation, the set of tokens that could follow |
| * that rule is pushed on a stack. Here are the various "local" |
| * follow sets: |
| * |
| * FOLLOW(b1_in_a) = FIRST(']') = ']' |
| * FOLLOW(b2_in_a) = FIRST(')') = ')' |
| * FOLLOW(c_in_b) = FIRST('^') = '^' |
| * |
| * Upon erroneous input "[]", the call chain is |
| * |
| * a -> b -> c |
| * |
| * and, hence, the follow context stack is: |
| * |
| * depth local follow set after call to rule |
| * 0 <EOF> a (from main()) |
| * 1 ']' b |
| * 3 '^' c |
| * |
| * Notice that ')' is not included, because b would have to have |
| * been called from a different context in rule a for ')' to be |
| * included. |
| * |
| * For error recovery, we cannot consider FOLLOW(c) |
| * (context-sensitive or otherwise). We need the combined set of |
| * all context-sensitive FOLLOW sets--the set of all tokens that |
| * could follow any reference in the call chain. We need to |
| * resync to one of those tokens. Note that FOLLOW(c)='^' and if |
| * we resync'd to that token, we'd consume until EOF. We need to |
| * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. |
| * In this case, for input "[]", LA(1) is in this set so we would |
| * not consume anything and after printing an error rule c would |
| * return normally. It would not find the required '^' though. |
| * At this point, it gets a mismatched token error and throws an |
| * exception (since LA(1) is not in the viable following token |
| * set). The rule exception handler tries to Recover, but finds |
| * the same recovery set and doesn't consume anything. Rule b |
| * exits normally returning to rule a. Now it finds the ']' (and |
| * with the successful Match exits errorRecovery mode). |
| * |
| * So, you cna see that the parser walks up call chain looking |
| * for the token that was a member of the recovery set. |
| * |
| * Errors are not generated in errorRecovery mode. |
| * |
| * ANTLR's error recovery mechanism is based upon original ideas: |
| * |
| * "Algorithms + Data Structures = Programs" by Niklaus Wirth |
| * |
| * and |
| * |
| * "A note on error recovery in recursive descent parsers": |
| * http://portal.acm.org/citation.cfm?id=947902.947905 |
| * |
| * Later, Josef Grosch had some good ideas: |
| * |
| * "Efficient and Comfortable Error Recovery in Recursive Descent |
| * Parsers": |
| * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip |
| * |
| * Like Grosch I implemented local FOLLOW sets that are combined |
| * at run-time upon error to avoid overhead during parsing. |
| *) |
| function ComputeErrorRecoverySet: IBitSet; virtual; |
| |
| function CombineFollows(const Exact: Boolean): IBitSet; |
| protected |
| { IBaseRecognizer } |
| function GetInput: IIntStream; virtual; abstract; |
| function GetBacktrackingLevel: Integer; |
| function GetState: IRecognizerSharedState; |
| function GetNumberOfSyntaxErrors: Integer; |
| function GetGrammarFileName: String; virtual; |
| function GetSourceName: String; virtual; abstract; |
| function GetTokenNames: TStringArray; virtual; |
| |
| procedure BeginBacktrack(const Level: Integer); virtual; |
| procedure EndBacktrack(const Level: Integer; const Successful: Boolean); virtual; |
| procedure Reset; virtual; |
| function Match(const Input: IIntStream; const TokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; virtual; |
| function MismatchIsUnwantedToken(const Input: IIntStream; |
| const TokenType: Integer): Boolean; |
| function MismatchIsMissingToken(const Input: IIntStream; |
| const Follow: IBitSet): Boolean; |
| procedure BeginResync; virtual; |
| procedure EndResync; virtual; |
| procedure ReportError(const E: ERecognitionException); virtual; |
| procedure MatchAny(const Input: IIntStream); virtual; |
| procedure DisplayRecognitionError(const TokenNames: TStringArray; |
| const E: ERecognitionException); virtual; |
| function GetErrorMessage(const E: ERecognitionException; |
| const TokenNames: TStringArray): String; virtual; |
| function GetErrorHeader(const E: ERecognitionException): String; virtual; |
| function GetTokenErrorDisplay(const T: IToken): String; virtual; |
| procedure EmitErrorMessage(const Msg: String); virtual; |
| procedure Recover(const Input: IIntStream; const RE: ERecognitionException); virtual; |
| function RecoverFromMismatchedSet(const Input: IIntStream; |
| const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; virtual; |
| procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; virtual; |
| procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; virtual; |
| //function GetRuleInvocationStack: IList<IANTLRInterface>; overload; virtual; |
| //function GetRuleInvocationStack(const E: Exception; |
| // const RecognizerClassName: String): IList<IANTLRInterface>; overload; |
| function ToStrings(const Tokens: IList<IToken>): IList<String>; virtual; |
| function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; virtual; |
| function AlreadyParsedRule(const Input: IIntStream; |
| const RuleIndex: Integer): Boolean; virtual; |
| procedure Memoize(const Input: IIntStream; const RuleIndex, |
| RuleStartIndex: Integer); virtual; |
| function GetRuleMemoizationChaceSize: Integer; |
| |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer; |
| const InputSymbol: String); virtual; |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer; |
| const InputSymbol: String); virtual; |
| |
| property Input: IIntStream read GetInput; |
| public |
| constructor Create; overload; |
| constructor Create(const AState: IRecognizerSharedState); overload; |
| end; |
| |
| TCommonTokenStream = class(TANTLRObject, ICommonTokenStream, ITokenStream) |
| strict private |
| FTokenSource: ITokenSource; |
| |
| /// <summary>Record every single token pulled from the source so we can reproduce |
| /// chunks of it later. |
| /// </summary> |
| FTokens: IList<IToken>; |
| |
| /// <summary><![CDATA[Map<tokentype, channel>]]> to override some Tokens' channel numbers </summary> |
| FChannelOverrideMap: IDictionary<Integer, Integer>; |
| |
| /// <summary><![CDATA[Set<tokentype>;]]> discard any tokens with this type </summary> |
| FDiscardSet: IHashList<Integer, Integer>; |
| |
| /// <summary>Skip tokens on any channel but this one; this is how we skip whitespace... </summary> |
| FChannel: Integer; |
| |
| /// <summary>By default, track all incoming tokens </summary> |
| FDiscardOffChannelTokens: Boolean; |
| |
| /// <summary>Track the last Mark() call result value for use in Rewind().</summary> |
| FLastMarker: Integer; |
| |
| /// <summary> |
| /// The index into the tokens list of the current token (next token |
| /// to consume). p==-1 indicates that the tokens list is empty |
| /// </summary> |
| FP: Integer; |
| strict protected |
| /// <summary>Load all tokens from the token source and put in tokens. |
| /// This is done upon first LT request because you might want to |
| /// set some token type / channel overrides before filling buffer. |
| /// </summary> |
| procedure FillBuffer; virtual; |
| |
| /// <summary>Look backwards k tokens on-channel tokens </summary> |
| function LB(const K: Integer): IToken; virtual; |
| |
| /// <summary>Given a starting index, return the index of the first on-channel |
| /// token. |
| /// </summary> |
| function SkipOffTokenChannels(const I: Integer): Integer; virtual; |
| function SkipOffTokenChannelsReverse(const I: Integer): Integer; virtual; |
| protected |
| { IIntStream } |
| function GetSourceName: String; virtual; |
| |
| procedure Consume; virtual; |
| function LA(I: Integer): Integer; virtual; |
| function LAChar(I: Integer): Char; |
| function Mark: Integer; virtual; |
| function Index: Integer; virtual; |
| procedure Rewind(const Marker: Integer); overload; virtual; |
| procedure Rewind; overload; virtual; |
| procedure Release(const Marker: Integer); virtual; |
| procedure Seek(const Index: Integer); virtual; |
| function Size: Integer; virtual; |
| protected |
| { ITokenStream } |
| function GetTokenSource: ITokenSource; virtual; |
| procedure SetTokenSource(const Value: ITokenSource); virtual; |
| |
| function LT(const K: Integer): IToken; virtual; |
| function Get(const I: Integer): IToken; virtual; |
| function ToString(const Start, Stop: Integer): String; reintroduce; overload; virtual; |
| function ToString(const Start, Stop: IToken): String; reintroduce; overload; virtual; |
| protected |
| { ICommonTokenStream } |
| procedure SetTokenTypeChannel(const TType, Channel: Integer); |
| procedure DiscardTokenType(const TType: Integer); |
| procedure DiscardOffChannelTokens(const Discard: Boolean); |
| function GetTokens: IList<IToken>; overload; |
| function GetTokens(const Start, Stop: Integer): IList<IToken>; overload; |
| function GetTokens(const Start, Stop: Integer; |
| const Types: IBitSet): IList<IToken>; overload; |
| function GetTokens(const Start, Stop: Integer; |
| const Types: IList<Integer>): IList<IToken>; overload; |
| function GetTokens(const Start, Stop, |
| TokenType: Integer): IList<IToken>; overload; |
| procedure Reset; virtual; |
| public |
| constructor Create; overload; |
| constructor Create(const ATokenSource: ITokenSource); overload; |
| constructor Create(const ATokenSource: ITokenSource; |
| const AChannel: Integer); overload; |
| constructor Create(const ALexer: ILexer); overload; |
| constructor Create(const ALexer: ILexer; |
| const AChannel: Integer); overload; |
| |
| function ToString: String; overload; override; |
| end; |
| |
| TDFA = class abstract(TANTLRObject, IDFA) |
| strict private |
| FSpecialStateTransitionHandler: TSpecialStateTransitionHandler; |
| FEOT: TSmallintArray; |
| FEOF: TSmallintArray; |
| FMin: TCharArray; |
| FMax: TCharArray; |
| FAccept: TSmallintArray; |
| FSpecial: TSmallintArray; |
| FTransition: TSmallintMatrix; |
| FDecisionNumber: Integer; |
| FRecognizer: Pointer; { IBaseRecognizer } |
| function GetRecognizer: IBaseRecognizer; |
| procedure SetRecognizer(const Value: IBaseRecognizer); |
| strict protected |
| procedure NoViableAlt(const S: Integer; const Input: IIntStream); |
| |
| property Recognizer: IBaseRecognizer read GetRecognizer write SetRecognizer; |
| property DecisionNumber: Integer read FDecisionNumber write FDecisionNumber; |
| property EOT: TSmallintArray read FEOT write FEOT; |
| property EOF: TSmallintArray read FEOF write FEOF; |
| property Min: TCharArray read FMin write FMin; |
| property Max: TCharArray read FMax write FMax; |
| property Accept: TSmallintArray read FAccept write FAccept; |
| property Special: TSmallintArray read FSpecial write FSpecial; |
| property Transition: TSmallintMatrix read FTransition write FTransition; |
| protected |
| { IDFA } |
| function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; |
| procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler); |
| |
| function Predict(const Input: IIntStream): Integer; |
| procedure Error(const NVAE: ENoViableAltException); virtual; |
| function SpecialStateTransition(const S: Integer; |
| const Input: IIntStream): Integer; virtual; |
| function Description: String; virtual; |
| function SpecialTransition(const State, Symbol: Integer): Integer; |
| public |
| class function UnpackEncodedString(const EncodedString: String): TSmallintArray; static; |
| class function UnpackEncodedStringArray(const EncodedStrings: TStringArray): TSmallintMatrix; overload; static; |
| class function UnpackEncodedStringArray(const EncodedStrings: array of String): TSmallintMatrix; overload; static; |
| class function UnpackEncodedStringToUnsignedChars(const EncodedString: String): TCharArray; static; |
| end; |
| |
| TLexer = class abstract(TBaseRecognizer, ILexer, ITokenSource) |
| strict private |
| const |
| TOKEN_dot_EOF = Ord(cscEOF); |
| strict private |
| /// <summary>Where is the lexer drawing characters from? </summary> |
| FInput: ICharStream; |
| protected |
| { IBaseRecognizer } |
| function GetSourceName: String; override; |
| function GetInput: IIntStream; override; |
| procedure Reset; override; |
| procedure ReportError(const E: ERecognitionException); override; |
| function GetErrorMessage(const E: ERecognitionException; |
| const TokenNames: TStringArray): String; override; |
| protected |
| { ILexer } |
| function GetCharStream: ICharStream; virtual; |
| procedure SetCharStream(const Value: ICharStream); virtual; |
| function GetLine: Integer; virtual; |
| function GetCharPositionInLine: Integer; virtual; |
| function GetCharIndex: Integer; virtual; |
| function GetText: String; virtual; |
| procedure SetText(const Value: String); virtual; |
| |
| function NextToken: IToken; virtual; |
| procedure Skip; |
| procedure DoTokens; virtual; abstract; |
| procedure Emit(const Token: IToken); overload; virtual; |
| function Emit: IToken; overload; virtual; |
| procedure Match(const S: String); reintroduce; overload; virtual; |
| procedure Match(const C: Integer); reintroduce; overload; virtual; |
| procedure MatchAny; reintroduce; overload; virtual; |
| procedure MatchRange(const A, B: Integer); virtual; |
| procedure Recover(const RE: ERecognitionException); reintroduce; overload; virtual; |
| function GetCharErrorDisplay(const C: Integer): String; |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual; |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual; |
| strict protected |
| property Input: ICharStream read FInput; |
| property CharIndex: Integer read GetCharIndex; |
| property Text: String read GetText write SetText; |
| public |
| constructor Create; overload; |
| constructor Create(const AInput: ICharStream); overload; |
| constructor Create(const AInput: ICharStream; |
| const AState: IRecognizerSharedState); overload; |
| end; |
| |
| TParser = class(TBaseRecognizer, IParser) |
| strict private |
| FInput: ITokenStream; |
| protected |
| property Input: ITokenStream read FInput; |
| protected |
| { IBaseRecognizer } |
| procedure Reset; override; |
| function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; override; |
| function GetMissingSymbol(const Input: IIntStream; |
| const E: ERecognitionException; const ExpectedTokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; override; |
| function GetSourceName: String; override; |
| function GetInput: IIntStream; override; |
| protected |
| { IParser } |
| function GetTokenStream: ITokenStream; virtual; |
| procedure SetTokenStream(const Value: ITokenStream); virtual; |
| |
| procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; |
| procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; |
| public |
| constructor Create(const AInput: ITokenStream); overload; |
| constructor Create(const AInput: ITokenStream; |
| const AState: IRecognizerSharedState); overload; |
| end; |
| |
| TRuleReturnScope = class(TANTLRObject, IRuleReturnScope) |
| protected |
| { IRuleReturnScope } |
| function GetStart: IANTLRInterface; virtual; |
| procedure SetStart(const Value: IANTLRInterface); virtual; |
| function GetStop: IANTLRInterface; virtual; |
| procedure SetStop(const Value: IANTLRInterface); virtual; |
| function GetTree: IANTLRInterface; virtual; |
| procedure SetTree(const Value: IANTLRInterface); virtual; |
| function GetTemplate: IANTLRInterface; virtual; |
| end; |
| |
| TParserRuleReturnScope = class(TRuleReturnScope, IParserRuleReturnScope) |
| strict private |
| FStart: IToken; |
| FStop: IToken; |
| protected |
| { IRuleReturnScope } |
| function GetStart: IANTLRInterface; override; |
| procedure SetStart(const Value: IANTLRInterface); override; |
| function GetStop: IANTLRInterface; override; |
| procedure SetStop(const Value: IANTLRInterface); override; |
| end; |
| |
| TTokenRewriteStream = class(TCommonTokenStream, ITokenRewriteStream) |
| public |
| const |
| DEFAULT_PROGRAM_NAME = 'default'; |
| PROGRAM_INIT_SIZE = 100; |
| MIN_TOKEN_INDEX = 0; |
| strict protected |
| // Define the rewrite operation hierarchy |
| type |
| IRewriteOperation = interface(IANTLRInterface) |
| ['{285A54ED-58FF-44B1-A268-2686476D4419}'] |
| { Property accessors } |
| function GetInstructionIndex: Integer; |
| procedure SetInstructionIndex(const Value: Integer); |
| function GetIndex: Integer; |
| procedure SetIndex(const Value: Integer); |
| function GetText: IANTLRInterface; |
| procedure SetText(const Value: IANTLRInterface); |
| function GetParent: ITokenRewriteStream; |
| procedure SetParent(const Value: ITokenRewriteStream); |
| |
| { Methods } |
| |
| /// <summary>Execute the rewrite operation by possibly adding to the buffer. |
| /// Return the index of the next token to operate on. |
| /// </summary> |
| function Execute(const Buf: TStringBuilder): Integer; |
| |
| { Properties } |
| property InstructionIndex: Integer read GetInstructionIndex write SetInstructionIndex; |
| property Index: Integer read GetIndex write SetIndex; |
| property Text: IANTLRInterface read GetText write SetText; |
| property Parent: ITokenRewriteStream read GetParent write SetParent; |
| end; |
| |
| TRewriteOperation = class(TANTLRObject, IRewriteOperation) |
| strict private |
| // What index into rewrites List are we? |
| FInstructionIndex: Integer; |
| // Token buffer index |
| FIndex: Integer; |
| FText: IANTLRInterface; |
| FParent: Pointer; {ITokenRewriteStream;} |
| protected |
| { IRewriteOperation } |
| function GetInstructionIndex: Integer; |
| procedure SetInstructionIndex(const Value: Integer); |
| function GetIndex: Integer; |
| procedure SetIndex(const Value: Integer); |
| function GetText: IANTLRInterface; |
| procedure SetText(const Value: IANTLRInterface); |
| function GetParent: ITokenRewriteStream; |
| procedure SetParent(const Value: ITokenRewriteStream); |
| |
| function Execute(const Buf: TStringBuilder): Integer; virtual; |
| protected |
| constructor Create(const AIndex: Integer; const AText: IANTLRInterface; |
| const AParent: ITokenRewriteStream); |
| |
| property Index: Integer read FIndex write FIndex; |
| property Text: IANTLRInterface read FText write FText; |
| property Parent: ITokenRewriteStream read GetParent write SetParent; |
| public |
| function ToString: String; override; |
| end; |
| |
| IInsertBeforeOp = interface(IRewriteOperation) |
| ['{BFB732E2-BE6A-4691-AE3B-5C8013DE924E}'] |
| end; |
| |
| TInsertBeforeOp = class(TRewriteOperation, IInsertBeforeOp) |
| protected |
| { IRewriteOperation } |
| function Execute(const Buf: TStringBuilder): Integer; override; |
| end; |
| |
| /// <summary>I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp |
| /// instructions. |
| /// </summary> |
| IReplaceOp = interface(IRewriteOperation) |
| ['{630C434A-99EA-4589-A65D-64A7B3DAC407}'] |
| { Property accessors } |
| function GetLastIndex: Integer; |
| procedure SetLastIndex(const Value: Integer); |
| |
| { Properties } |
| property LastIndex: Integer read GetLastIndex write SetLastIndex; |
| end; |
| |
| TReplaceOp = class(TRewriteOperation, IReplaceOp) |
| private |
| FLastIndex: Integer; |
| protected |
| { IRewriteOperation } |
| function Execute(const Buf: TStringBuilder): Integer; override; |
| protected |
| { IReplaceOp } |
| function GetLastIndex: Integer; |
| procedure SetLastIndex(const Value: Integer); |
| public |
| constructor Create(const AStart, AStop: Integer; |
| const AText: IANTLRInterface; const AParent: ITokenRewriteStream); |
| |
| function ToString: String; override; |
| end; |
| |
| IDeleteOp = interface(IRewriteOperation) |
| ['{C39345BC-F170-4C3A-A989-65E6B9F0712B}'] |
| end; |
| |
| TDeleteOp = class(TReplaceOp) |
| public |
| function ToString: String; override; |
| end; |
| strict private |
| type |
| TRewriteOpComparer<T: IRewriteOperation> = class(TComparer<T>) |
| public |
| function Compare(const Left, Right: T): Integer; override; |
| end; |
| strict private |
| /// <summary>You may have multiple, named streams of rewrite operations. |
| /// I'm calling these things "programs." |
| /// Maps String (name) -> rewrite (IList) |
| /// </summary> |
| FPrograms: IDictionary<String, IList<IRewriteOperation>>; |
| |
| /// <summary>Map String (program name) -> Integer index </summary> |
| FLastRewriteTokenIndexes: IDictionary<String, Integer>; |
| strict private |
| function InitializeProgram(const Name: String): IList<IRewriteOperation>; |
| protected |
| { ITokenRewriteStream } |
| procedure Rollback(const InstructionIndex: Integer); overload; virtual; |
| procedure Rollback(const ProgramName: String; |
| const InstructionIndex: Integer); overload; virtual; |
| |
| procedure DeleteProgram; overload; virtual; |
| procedure DeleteProgram(const ProgramName: String); overload; virtual; |
| |
| procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; virtual; |
| procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; virtual; |
| procedure InsertAfter(const ProgramName: String; const T: IToken; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure InsertAfter(const ProgramName: String; const Index: Integer; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure InsertAfter(const T: IToken; const Text: String); overload; |
| procedure InsertAfter(const Index: Integer; const Text: String); overload; |
| procedure InsertAfter(const ProgramName: String; const T: IToken; |
| const Text: String); overload; |
| procedure InsertAfter(const ProgramName: String; const Index: Integer; |
| const Text: String); overload; |
| |
| procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; virtual; |
| procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; virtual; |
| procedure InsertBefore(const ProgramName: String; const T: IToken; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure InsertBefore(const ProgramName: String; const Index: Integer; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure InsertBefore(const T: IToken; const Text: String); overload; |
| procedure InsertBefore(const Index: Integer; const Text: String); overload; |
| procedure InsertBefore(const ProgramName: String; const T: IToken; |
| const Text: String); overload; |
| procedure InsertBefore(const ProgramName: String; const Index: Integer; |
| const Text: String); overload; |
| |
| procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const ProgramName: String; const Start, Stop: Integer; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const ProgramName: String; const Start, Stop: IToken; |
| const Text: IANTLRInterface); overload; virtual; |
| procedure Replace(const Index: Integer; const Text: String); overload; |
| procedure Replace(const Start, Stop: Integer; const Text: String); overload; |
| procedure Replace(const IndexT: IToken; const Text: String); overload; |
| procedure Replace(const Start, Stop: IToken; const Text: String); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: Integer; |
| const Text: String); overload; |
| procedure Replace(const ProgramName: String; const Start, Stop: IToken; |
| const Text: String); overload; |
| |
| procedure Delete(const Index: Integer); overload; virtual; |
| procedure Delete(const Start, Stop: Integer); overload; virtual; |
| procedure Delete(const IndexT: IToken); overload; virtual; |
| procedure Delete(const Start, Stop: IToken); overload; virtual; |
| procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; virtual; |
| procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; virtual; |
| |
| function GetLastRewriteTokenIndex: Integer; overload; virtual; |
| |
| function ToOriginalString: String; overload; virtual; |
| function ToOriginalString(const Start, Stop: Integer): String; overload; virtual; |
| |
| function ToString(const ProgramName: String): String; overload; virtual; |
| function ToString(const ProgramName: String; |
| const Start, Stop: Integer): String; overload; virtual; |
| |
| function ToDebugString: String; overload; virtual; |
| function ToDebugString(const Start, Stop: Integer): String; overload; virtual; |
| protected |
| { ITokenStream } |
| function ToString(const Start, Stop: Integer): String; overload; override; |
| strict protected |
| procedure Init; virtual; |
| function GetProgram(const Name: String): IList<IRewriteOperation>; virtual; |
| function GetLastRewriteTokenIndex(const ProgramName: String): Integer; overload; virtual; |
| procedure SetLastRewriteTokenIndex(const ProgramName: String; const I: Integer); overload; virtual; |
| |
| /// <summary> |
| /// Return a map from token index to operation. |
| /// </summary> |
| /// <remarks>We need to combine operations and report invalid operations (like |
| /// overlapping replaces that are not completed nested). Inserts to |
| /// same index need to be combined etc... Here are the cases: |
| /// |
| /// I.i.u I.j.v leave alone, nonoverlapping |
| /// I.i.u I.i.v combine: Iivu |
| /// |
| /// R.i-j.u R.x-y.v | i-j in x-y delete first R |
| /// R.i-j.u R.i-j.v delete first R |
| /// R.i-j.u R.x-y.v | x-y in i-j ERROR |
| /// R.i-j.u R.x-y.v | boundaries overlap ERROR |
| /// |
| /// I.i.u R.x-y.v | i in x-y delete I |
| /// I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping |
| /// R.x-y.v I.i.u | i in x-y ERROR |
| /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) |
| /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping |
| /// |
| /// I.i.u = insert u before op @ index i |
| /// R.x-y.u = replace x-y indexed tokens with u |
| /// |
| /// First we need to examine replaces. For any replace op: |
| /// |
| /// 1. wipe out any insertions before op within that range. |
| /// 2. Drop any replace op before that is contained completely within |
| /// that range. |
| /// 3. Throw exception upon boundary overlap with any previous replace. |
| /// |
| /// Then we can deal with inserts: |
| /// |
| /// 1. for any inserts to same index, combine even if not adjacent. |
| /// 2. for any prior replace with same left boundary, combine this |
| /// insert with replace and delete this replace. |
| /// 3. throw exception if index in same range as previous replace |
| /// |
| /// Don't actually delete; make op null in list. Easier to walk list. |
| /// Later we can throw as we add to index -> op map. |
| /// |
| /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the |
| /// inserted stuff would be before the replace range. But, if you |
| /// add tokens in front of a method body '{' and then delete the method |
| /// body, I think the stuff before the '{' you added should disappear too. |
| /// </remarks> |
| function ReduceToSingleOperationPerIndex( |
| const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>; |
| |
| function GetKindOfOps(const Rewrites: IList<IRewriteOperation>; |
| const Kind: TGUID): IList<IRewriteOperation>; overload; |
| /// <summary> |
| /// Get all operations before an index of a particular kind |
| /// </summary> |
| function GetKindOfOps(const Rewrites: IList<IRewriteOperation>; |
| const Kind: TGUID; const Before: Integer): IList<IRewriteOperation>; overload; |
| |
| function CatOpText(const A, B: IANTLRInterface): IANTLRInterface; |
| public |
| constructor Create; overload; |
| constructor Create(const ATokenSource: ITokenSource); overload; |
| constructor Create(const ATokenSource: ITokenSource; |
| const AChannel: Integer); overload; |
| constructor Create(const ALexer: ILexer); overload; |
| constructor Create(const ALexer: ILexer; |
| const AChannel: Integer); overload; |
| |
| function ToString: String; overload; override; |
| end; |
| |
| { These functions return X or, if X = nil, an empty default instance } |
| function Def(const X: IToken): IToken; overload; |
| function Def(const X: IRuleReturnScope): IRuleReturnScope; overload; |
| |
| implementation |
| |
| uses |
| StrUtils, |
| Math, |
| Antlr.Runtime.Tree; |
| |
| { ERecognitionException } |
| |
| constructor ERecognitionException.Create; |
| begin |
| Create('', nil); |
| end; |
| |
| constructor ERecognitionException.Create(const AMessage: String); |
| begin |
| Create(AMessage, nil); |
| end; |
| |
| constructor ERecognitionException.Create(const AInput: IIntStream); |
| begin |
| Create('', AInput); |
| end; |
| |
| constructor ERecognitionException.Create(const AMessage: String; |
| const AInput: IIntStream); |
| var |
| TokenStream: ITokenStream; |
| CharStream: ICharStream; |
| begin |
| inherited Create(AMessage); |
| FInput := AInput; |
| FIndex := AInput.Index; |
| |
| if Supports(AInput, ITokenStream, TokenStream) then |
| begin |
| FToken := TokenStream.LT(1); |
| FLine := FToken.Line; |
| FCharPositionInLine := FToken.CharPositionInLine; |
| end; |
| |
| if Supports(AInput, ITreeNodeStream) then |
| ExtractInformationFromTreeNodeStream(AInput) |
| else |
| begin |
| if Supports(AInput, ICharStream, CharStream) then |
| begin |
| FC := AInput.LA(1); |
| FLine := CharStream.Line; |
| FCharPositionInLine := CharStream.CharPositionInLine; |
| end |
| else |
| FC := AInput.LA(1); |
| end; |
| end; |
| |
| procedure ERecognitionException.ExtractInformationFromTreeNodeStream( |
| const Input: IIntStream); |
| var |
| Nodes: ITreeNodeStream; |
| Adaptor: ITreeAdaptor; |
| Payload, PriorPayload: IToken; |
| I, NodeType: Integer; |
| PriorNode: IANTLRInterface; |
| Tree: ITree; |
| Text: String; |
| CommonTree: ICommonTree; |
| begin |
| Nodes := Input as ITreeNodeStream; |
| FNode := Nodes.LT(1); |
| Adaptor := Nodes.TreeAdaptor; |
| Payload := Adaptor.GetToken(FNode); |
| |
| if Assigned(Payload) then |
| begin |
| FToken := Payload; |
| if (Payload.Line <= 0) then |
| begin |
| // imaginary node; no line/pos info; scan backwards |
| I := -1; |
| PriorNode := Nodes.LT(I); |
| while Assigned(PriorNode) do |
| begin |
| PriorPayload := Adaptor.GetToken(PriorNode); |
| if Assigned(PriorPayload) and (PriorPayload.Line > 0) then |
| begin |
| // we found the most recent real line / pos info |
| FLine := PriorPayload.Line; |
| FCharPositionInLine := PriorPayload.CharPositionInLine; |
| FApproximateLineInfo := True; |
| Break; |
| end; |
| Dec(I); |
| PriorNode := Nodes.LT(I) |
| end; |
| end |
| else |
| begin |
| // node created from real token |
| FLine := Payload.Line; |
| FCharPositionInLine := Payload.CharPositionInLine; |
| end; |
| end else |
| if Supports(FNode, ITree, Tree) then |
| begin |
| FLine := Tree.Line; |
| FCharPositionInLine := Tree.CharPositionInLine; |
| if Supports(FNode, ICommonTree, CommonTree) then |
| FToken := CommonTree.Token; |
| end |
| else |
| begin |
| NodeType := Adaptor.GetNodeType(FNode); |
| Text := Adaptor.GetNodeText(FNode); |
| FToken := TCommonToken.Create(NodeType, Text); |
| end; |
| end; |
| |
| function ERecognitionException.GetUnexpectedType: Integer; |
| var |
| Nodes: ITreeNodeStream; |
| Adaptor: ITreeAdaptor; |
| begin |
| if Supports(FInput, ITokenStream) then |
| Result := FToken.TokenType |
| else |
| if Supports(FInput, ITreeNodeStream, Nodes) then |
| begin |
| Adaptor := Nodes.TreeAdaptor; |
| Result := Adaptor.GetNodeType(FNode); |
| end else |
| Result := FC; |
| end; |
| |
| { EMismatchedTokenException } |
| |
| constructor EMismatchedTokenException.Create(const AExpecting: Integer; |
| const AInput: IIntStream); |
| begin |
| inherited Create(AInput); |
| FExpecting := AExpecting; |
| end; |
| |
| function EMismatchedTokenException.ToString: String; |
| begin |
| Result := 'MismatchedTokenException(' + IntToStr(UnexpectedType) |
| + '!=' + IntToStr(Expecting) + ')'; |
| |
| end; |
| |
| { EUnwantedTokenException } |
| |
| function EUnwantedTokenException.GetUnexpectedToken: IToken; |
| begin |
| Result := FToken; |
| end; |
| |
| function EUnwantedTokenException.ToString: String; |
| var |
| Exp: String; |
| begin |
| if (Expecting = TToken.INVALID_TOKEN_TYPE) then |
| Exp := '' |
| else |
| Exp := ', expected ' + IntToStr(Expecting); |
| if (Token = nil) then |
| Result := 'UnwantedTokenException(found=nil' + Exp + ')' |
| else |
| Result := 'UnwantedTokenException(found=' + Token.Text + Exp + ')' |
| end; |
| |
| { EMissingTokenException } |
| |
| constructor EMissingTokenException.Create(const AExpecting: Integer; |
| const AInput: IIntStream; const AInserted: IANTLRInterface); |
| begin |
| inherited Create(AExpecting, AInput); |
| FInserted := AInserted; |
| end; |
| |
| function EMissingTokenException.GetMissingType: Integer; |
| begin |
| Result := Expecting; |
| end; |
| |
| function EMissingTokenException.ToString: String; |
| begin |
| if Assigned(FInserted) and Assigned(FToken) then |
| Result := 'MissingTokenException(inserted ' + FInserted.ToString |
| + ' at ' + FToken.Text + ')' |
| else |
| if Assigned(FToken) then |
| Result := 'MissingTokenException(at ' + FToken.Text + ')' |
| else |
| Result := 'MissingTokenException'; |
| end; |
| |
| { EMismatchedTreeNodeException } |
| |
| constructor EMismatchedTreeNodeException.Create(const AExpecting: Integer; |
| const AInput: IIntStream); |
| begin |
| inherited Create(AInput); |
| FExpecting := AExpecting; |
| end; |
| |
| function EMismatchedTreeNodeException.ToString: String; |
| begin |
| Result := 'MismatchedTreeNodeException(' + IntToStr(UnexpectedType) |
| + '!=' + IntToStr(Expecting) + ')'; |
| end; |
| |
| { ENoViableAltException } |
| |
| constructor ENoViableAltException.Create( |
| const AGrammarDecisionDescription: String; const ADecisionNumber, |
| AStateNumber: Integer; const AInput: IIntStream); |
| begin |
| inherited Create(AInput); |
| FGrammarDecisionDescription := AGrammarDecisionDescription; |
| FDecisionNumber := ADecisionNumber; |
| FStateNumber := AStateNumber; |
| end; |
| |
| function ENoViableAltException.ToString: String; |
| begin |
| if Supports(Input, ICharStream) then |
| Result := 'NoViableAltException(''' + Char(UnexpectedType) + '''@[' |
| + FGrammarDecisionDescription + '])' |
| else |
| Result := 'NoViableAltException(''' + IntToStr(UnexpectedType) + '''@[' |
| + FGrammarDecisionDescription + '])' |
| end; |
| |
| { EEarlyExitException } |
| |
| constructor EEarlyExitException.Create(const ADecisionNumber: Integer; |
| const AInput: IIntStream); |
| begin |
| inherited Create(AInput); |
| FDecisionNumber := ADecisionNumber; |
| end; |
| |
| { EMismatchedSetException } |
| |
| constructor EMismatchedSetException.Create(const AExpecting: IBitSet; |
| const AInput: IIntStream); |
| begin |
| inherited Create(AInput); |
| FExpecting := AExpecting; |
| end; |
| |
| function EMismatchedSetException.ToString: String; |
| begin |
| Result := 'MismatchedSetException(' + IntToStr(UnexpectedType) |
| + '!=' + Expecting.ToString + ')'; |
| end; |
| |
| { EMismatchedNotSetException } |
| |
| function EMismatchedNotSetException.ToString: String; |
| begin |
| Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType) |
| + '!=' + Expecting.ToString + ')'; |
| end; |
| |
| { EFailedPredicateException } |
| |
| constructor EFailedPredicateException.Create(const AInput: IIntStream; |
| const ARuleName, APredicateText: String); |
| begin |
| inherited Create(AInput); |
| FRuleName := ARuleName; |
| FPredicateText := APredicateText; |
| end; |
| |
| function EFailedPredicateException.ToString: String; |
| begin |
| Result := 'FailedPredicateException(' + FRuleName + ',{' + FPredicateText + '}?)'; |
| end; |
| |
| { EMismatchedRangeException } |
| |
| constructor EMismatchedRangeException.Create(const AA, AB: Integer; |
| const AInput: IIntStream); |
| begin |
| inherited Create(FInput); |
| FA := AA; |
| FB := AB; |
| end; |
| |
| function EMismatchedRangeException.ToString: String; |
| begin |
| Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType) |
| + ' not in [' + IntToStr(FA)+ ',' + IntToStr(FB) + '])'; |
| end; |
| |
| { TCharStreamState } |
| |
| function TCharStreamState.GetCharPositionInLine: Integer; |
| begin |
| Result := FCharPositionInLine; |
| end; |
| |
| function TCharStreamState.GetLine: Integer; |
| begin |
| Result := FLine; |
| end; |
| |
| function TCharStreamState.GetP: Integer; |
| begin |
| Result := FP; |
| end; |
| |
| procedure TCharStreamState.SetCharPositionInLine(const Value: Integer); |
| begin |
| FCharPositionInLine := Value; |
| end; |
| |
| procedure TCharStreamState.SetLine(const Value: Integer); |
| begin |
| FLine := Value; |
| end; |
| |
| procedure TCharStreamState.SetP(const Value: Integer); |
| begin |
| FP := Value; |
| end; |
| |
| { TANTLRStringStream } |
| |
| constructor TANTLRStringStream.Create(const AInput: String); |
| begin |
| inherited Create; |
| FLine := 1; |
| FOwnsData := True; |
| FN := Length(AInput); |
| if (FN > 0) then |
| begin |
| GetMem(FData,FN * SizeOf(Char)); |
| Move(AInput[1],FData^,FN * SizeOf(Char)); |
| end; |
| end; |
| |
| procedure TANTLRStringStream.Consume; |
| begin |
| if (FP < FN) then |
| begin |
| Inc(FCharPositionInLine); |
| if (FData[FP] = #10) then |
| begin |
| Inc(FLine); |
| FCharPositionInLine := 0; |
| end; |
| Inc(FP); |
| end; |
| end; |
| |
| constructor TANTLRStringStream.Create(const AData: PChar; |
| const ANumberOfActualCharsInArray: Integer); |
| begin |
| inherited Create; |
| FLine := 1; |
| FOwnsData := False; |
| FData := AData; |
| FN := ANumberOfActualCharsInArray; |
| end; |
| |
| constructor TANTLRStringStream.Create; |
| begin |
| inherited Create; |
| FLine := 1; |
| end; |
| |
| destructor TANTLRStringStream.Destroy; |
| begin |
| if (FOwnsData) then |
| FreeMem(FData); |
| inherited; |
| end; |
| |
| function TANTLRStringStream.GetCharPositionInLine: Integer; |
| begin |
| Result := FCharPositionInLine; |
| end; |
| |
| function TANTLRStringStream.GetLine: Integer; |
| begin |
| Result := FLine; |
| end; |
| |
| function TANTLRStringStream.GetSourceName: String; |
| begin |
| Result := FName; |
| end; |
| |
| function TANTLRStringStream.Index: Integer; |
| begin |
| Result := FP; |
| end; |
| |
| function TANTLRStringStream.LA(I: Integer): Integer; |
| begin |
| if (I = 0) then |
| Result := 0 // undefined |
| else begin |
| if (I < 0) then |
| begin |
| Inc(I); // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] |
| if ((FP + I - 1) < 0) then |
| begin |
| Result := Integer(cscEOF); |
| Exit; |
| end; |
| end; |
| |
| if ((FP + I - 1) >= FN) then |
| Result := Integer(cscEOF) |
| else |
| Result := Integer(FData[FP + I - 1]); |
| end; |
| end; |
| |
| function TANTLRStringStream.LAChar(I: Integer): Char; |
| begin |
| Result := Char(LA(I)); |
| end; |
| |
| function TANTLRStringStream.LT(const I: Integer): Integer; |
| begin |
| Result := LA(I); |
| end; |
| |
| function TANTLRStringStream.Mark: Integer; |
| var |
| State: ICharStreamState; |
| begin |
| if (FMarkers = nil) then |
| begin |
| FMarkers := TList<ICharStreamState>.Create; |
| FMarkers.Add(nil); // depth 0 means no backtracking, leave blank |
| end; |
| |
| Inc(FMarkDepth); |
| if (FMarkDepth >= FMarkers.Count) then |
| begin |
| State := TCharStreamState.Create; |
| FMarkers.Add(State); |
| end |
| else |
| State := FMarkers[FMarkDepth]; |
| |
| State.P := FP; |
| State.Line := FLine; |
| State.CharPositionInLine := FCharPositionInLine; |
| FLastMarker := FMarkDepth; |
| Result := FMarkDepth; |
| end; |
| |
| procedure TANTLRStringStream.Release(const Marker: Integer); |
| begin |
| // unwind any other markers made after m and release m |
| FMarkDepth := Marker; |
| // release this marker |
| Dec(FMarkDepth); |
| end; |
| |
| procedure TANTLRStringStream.Reset; |
| begin |
| FP := 0; |
| FLine := 1; |
| FCharPositionInLine := 0; |
| FMarkDepth := 0; |
| end; |
| |
| procedure TANTLRStringStream.Rewind(const Marker: Integer); |
| var |
| State: ICharStreamState; |
| begin |
| State := FMarkers[Marker]; |
| // restore stream state |
| Seek(State.P); |
| FLine := State.Line; |
| FCharPositionInLine := State.CharPositionInLine; |
| Release(Marker); |
| end; |
| |
| procedure TANTLRStringStream.Rewind; |
| begin |
| Rewind(FLastMarker); |
| end; |
| |
| procedure TANTLRStringStream.Seek(const Index: Integer); |
| begin |
| if (Index <= FP) then |
| FP := Index // just jump; don't update stream state (line, ...) |
| else begin |
| // seek forward, consume until p hits index |
| while (FP < Index) do |
| Consume; |
| end; |
| end; |
| |
| procedure TANTLRStringStream.SetCharPositionInLine(const Value: Integer); |
| begin |
| FCharPositionInLine := Value; |
| end; |
| |
| procedure TANTLRStringStream.SetLine(const Value: Integer); |
| begin |
| FLine := Value; |
| end; |
| |
| function TANTLRStringStream.Size: Integer; |
| begin |
| Result := FN; |
| end; |
| |
| function TANTLRStringStream.Substring(const Start, Stop: Integer): String; |
| begin |
| Result := Copy(FData, Start + 1, Stop - Start + 1); |
| end; |
| |
| { TANTLRFileStream } |
| |
| constructor TANTLRFileStream.Create(const AFileName: String); |
| begin |
| Create(AFilename,TEncoding.Default); |
| end; |
| |
| constructor TANTLRFileStream.Create(const AFileName: String; |
| const AEncoding: TEncoding); |
| begin |
| inherited Create; |
| FFileName := AFileName; |
| Load(FFileName, AEncoding); |
| end; |
| |
| function TANTLRFileStream.GetSourceName: String; |
| begin |
| Result := FFileName; |
| end; |
| |
| procedure TANTLRFileStream.Load(const FileName: String; |
| const Encoding: TEncoding); |
| var |
| FR: TStreamReader; |
| S: String; |
| begin |
| if (FFileName <> '') then |
| begin |
| if (Encoding = nil) then |
| FR := TStreamReader.Create(FileName,TEncoding.Default) |
| else |
| FR := TStreamReader.Create(FileName,Encoding); |
| |
| try |
| if (FOwnsData) then |
| begin |
| FreeMem(FData); |
| FData := nil; |
| end; |
| |
| FOwnsData := True; |
| S := FR.ReadToEnd; |
| FN := Length(S); |
| if (FN > 0) then |
| begin |
| GetMem(FData,FN * SizeOf(Char)); |
| Move(S[1],FData^,FN * SizeOf(Char)); |
| end; |
| finally |
| FR.Free; |
| end; |
| end; |
| end; |
| |
| { TBitSet } |
| |
| class function TBitSet.BitSetOf(const El: Integer): IBitSet; |
| begin |
| Result := TBitSet.Create(El + 1); |
| Result.Add(El); |
| end; |
| |
| class function TBitSet.BitSetOf(const A, B: Integer): IBitSet; |
| begin |
| Result := TBitSet.Create(Max(A,B) + 1); |
| Result.Add(A); |
| Result.Add(B); |
| end; |
| |
| class function TBitSet.BitSetOf(const A, B, C: Integer): IBitSet; |
| begin |
| Result := TBitSet.Create; |
| Result.Add(A); |
| Result.Add(B); |
| Result.Add(C); |
| end; |
| |
| class function TBitSet.BitSetOf(const A, B, C, D: Integer): IBitSet; |
| begin |
| Result := TBitSet.Create; |
| Result.Add(A); |
| Result.Add(B); |
| Result.Add(C); |
| Result.Add(D); |
| end; |
| |
| procedure TBitSet.Add(const El: Integer); |
| var |
| N: Integer; |
| begin |
| N := WordNumber(El); |
| if (N >= Length(FBits)) then |
| GrowToInclude(El); |
| FBits[N] := FBits[N] or BitMask(El); |
| end; |
| |
| class function TBitSet.BitMask(const BitNumber: Integer): UInt64; |
| var |
| BitPosition: Integer; |
| begin |
| BitPosition := BitNumber and MOD_MASK; |
| Result := UInt64(1) shl BitPosition; |
| end; |
| |
| function TBitSet.BitSetOr(const A: IBitSet): IBitSet; |
| begin |
| Result := Clone as IBitSet; |
| Result.OrInPlace(A); |
| end; |
| |
| function TBitSet.Clone: IANTLRInterface; |
| var |
| BS: TBitSet; |
| begin |
| BS := TBitSet.Create; |
| Result := BS; |
| SetLength(BS.FBits,Length(FBits)); |
| if (Length(FBits) > 0) then |
| Move(FBits[0],BS.FBits[0],Length(FBits) * SizeOf(UInt64)); |
| end; |
| |
| constructor TBitSet.Create; |
| begin |
| Create(BITS); |
| end; |
| |
| constructor TBitSet.Create(const ABits: array of UInt64); |
| begin |
| inherited Create; |
| SetLength(FBits, Length(ABits)); |
| if (Length(ABits) > 0) then |
| Move(ABits[0], FBits[0], Length(ABits) * SizeOf(UInt64)); |
| end; |
| |
| constructor TBitSet.Create(const AItems: IList<Integer>); |
| var |
| V: Integer; |
| begin |
| Create(BITS); |
| for V in AItems do |
| Add(V); |
| end; |
| |
| constructor TBitSet.Create(const ANBits: Integer); |
| begin |
| inherited Create; |
| SetLength(FBits,((ANBits - 1) shr LOG_BITS) + 1); |
| end; |
| |
| function TBitSet.Equals(Obj: TObject): Boolean; |
| var |
| OtherSet: TBitSet absolute Obj; |
| I, N: Integer; |
| begin |
| Result := False; |
| if (Obj = nil) or (not (Obj is TBitSet)) then |
| Exit; |
| |
| N := Min(Length(FBits), Length(OtherSet.FBits)); |
| |
| // for any bits in common, compare |
| for I := 0 to N - 1 do |
| begin |
| if (FBits[I] <> OtherSet.FBits[I]) then |
| Exit; |
| end; |
| |
| // make sure any extra bits are off |
| if (Length(FBits) > N) then |
| begin |
| for I := N + 1 to Length(FBits) - 1 do |
| begin |
| if (FBits[I] <> 0) then |
| Exit; |
| end; |
| end |
| else |
| if (Length(OtherSet.FBits) > N) then |
| begin |
| for I := N + 1 to Length(OtherSet.FBits) - 1 do |
| begin |
| if (OtherSet.FBits[I] <> 0) then |
| Exit; |
| end; |
| end; |
| |
| Result := True; |
| end; |
| |
| function TBitSet.GetIsNil: Boolean; |
| var |
| I: Integer; |
| begin |
| for I := Length(FBits) - 1 downto 0 do |
| if (FBits[I] <> 0) then |
| begin |
| Result := False; |
| Exit; |
| end; |
| Result := True; |
| end; |
| |
| procedure TBitSet.GrowToInclude(const Bit: Integer); |
| var |
| NewSize: Integer; |
| begin |
| NewSize := Max(Length(FBits) shl 1,NumWordsToHold(Bit)); |
| SetLength(FBits,NewSize); |
| end; |
| |
| function TBitSet.LengthInLongWords: Integer; |
| begin |
| Result := Length(FBits); |
| end; |
| |
| function TBitSet.Member(const El: Integer): Boolean; |
| var |
| N: Integer; |
| begin |
| if (El < 0) then |
| Result := False |
| else |
| begin |
| N := WordNumber(El); |
| if (N >= Length(FBits)) then |
| Result := False |
| else |
| Result := ((FBits[N] and BitMask(El)) <> 0); |
| end; |
| end; |
| |
| function TBitSet.NumBits: Integer; |
| begin |
| Result := Length(FBits) shl LOG_BITS; |
| end; |
| |
| class function TBitSet.NumWordsToHold(const El: Integer): Integer; |
| begin |
| Result := (El shr LOG_BITS) + 1; |
| end; |
| |
| procedure TBitSet.OrInPlace(const A: IBitSet); |
| var |
| I, M: Integer; |
| ABits: TUInt64Array; |
| begin |
| if Assigned(A) then |
| begin |
| // If this is smaller than a, grow this first |
| if (A.LengthInLongWords > Length(FBits)) then |
| SetLength(FBits,A.LengthInLongWords); |
| M := Min(Length(FBits), A.LengthInLongWords); |
| ABits := A.ToPackedArray; |
| for I := M - 1 downto 0 do |
| FBits[I] := FBits[I] or ABits[I]; |
| end; |
| end; |
| |
| procedure TBitSet.Remove(const El: Integer); |
| var |
| N: Integer; |
| begin |
| N := WordNumber(El); |
| if (N < Length(FBits)) then |
| FBits[N] := (FBits[N] and not BitMask(El)); |
| end; |
| |
| function TBitSet.Size: Integer; |
| var |
| I, Bit: Integer; |
| W: UInt64; |
| begin |
| Result := 0; |
| for I := Length(FBits) - 1 downto 0 do |
| begin |
| W := FBits[I]; |
| if (W <> 0) then |
| begin |
| for Bit := BITS - 1 downto 0 do |
| begin |
| if ((W and (UInt64(1) shl Bit)) <> 0) then |
| Inc(Result); |
| end; |
| end; |
| end; |
| end; |
| |
| function TBitSet.ToArray: TIntegerArray; |
| var |
| I, En: Integer; |
| begin |
| SetLength(Result,Size); |
| En := 0; |
| for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do |
| begin |
| if Member(I) then |
| begin |
| Result[En] := I; |
| Inc(En); |
| end; |
| end; |
| end; |
| |
| function TBitSet.ToPackedArray: TUInt64Array; |
| begin |
| Result := FBits; |
| end; |
| |
| function TBitSet.ToString: String; |
| begin |
| Result := ToString(nil); |
| end; |
| |
| function TBitSet.ToString(const TokenNames: TStringArray): String; |
| var |
| Buf: TStringBuilder; |
| I: Integer; |
| HavePrintedAnElement: Boolean; |
| begin |
| HavePrintedAnElement := False; |
| Buf := TStringBuilder.Create; |
| try |
| Buf.Append('{'); |
| for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do |
| begin |
| if Member(I) then |
| begin |
| if (I > 0) and HavePrintedAnElement then |
| Buf.Append(','); |
| if Assigned(TokenNames) then |
| Buf.Append(TokenNames[I]) |
| else |
| Buf.Append(I); |
| HavePrintedAnElement := True; |
| end; |
| end; |
| Buf.Append('}'); |
| Result := Buf.ToString; |
| finally |
| Buf.Free; |
| end; |
| end; |
| |
| class function TBitSet.WordNumber(const Bit: Integer): Integer; |
| begin |
| Result := Bit shr LOG_BITS; // Bit / BITS |
| end; |
| |
| { TRecognizerSharedState } |
| |
| constructor TRecognizerSharedState.Create; |
| var |
| I: Integer; |
| begin |
| inherited; |
| SetLength(FFollowing,TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE); |
| for I := 0 to TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE - 1 do |
| FFollowing[I] := TBitSet.Create; |
| FFollowingStackPointer := -1; |
| FLastErrorIndex := -1; |
| FTokenStartCharIndex := -1; |
| end; |
| |
| function TRecognizerSharedState.GetBacktracking: Integer; |
| begin |
| Result := FBacktracking; |
| end; |
| |
| function TRecognizerSharedState.GetChannel: Integer; |
| begin |
| Result := FChannel; |
| end; |
| |
| function TRecognizerSharedState.GetErrorRecovery: Boolean; |
| begin |
| Result := FErrorRecovery; |
| end; |
| |
| function TRecognizerSharedState.GetFailed: Boolean; |
| begin |
| Result := FFailed; |
| end; |
| |
| function TRecognizerSharedState.GetFollowing: TBitSetArray; |
| begin |
| Result := FFollowing; |
| end; |
| |
| function TRecognizerSharedState.GetFollowingStackPointer: Integer; |
| begin |
| Result := FFollowingStackPointer; |
| end; |
| |
| function TRecognizerSharedState.GetLastErrorIndex: Integer; |
| begin |
| Result := FLastErrorIndex; |
| end; |
| |
| function TRecognizerSharedState.GetRuleMemo: TDictionaryArray<Integer, Integer>; |
| begin |
| Result := FRuleMemo; |
| end; |
| |
| function TRecognizerSharedState.GetRuleMemoCount: Integer; |
| begin |
| Result := Length(FRuleMemo); |
| end; |
| |
| function TRecognizerSharedState.GetSyntaxErrors: Integer; |
| begin |
| Result := FSyntaxErrors; |
| end; |
| |
| function TRecognizerSharedState.GetText: String; |
| begin |
| Result := FText; |
| end; |
| |
| function TRecognizerSharedState.GetToken: IToken; |
| begin |
| Result := FToken; |
| end; |
| |
| function TRecognizerSharedState.GetTokenStartCharIndex: Integer; |
| begin |
| Result := FTokenStartCharIndex; |
| end; |
| |
| function TRecognizerSharedState.GetTokenStartCharPositionInLine: Integer; |
| begin |
| Result := FTokenStartCharPositionInLine; |
| end; |
| |
| function TRecognizerSharedState.GetTokenStartLine: Integer; |
| begin |
| Result := FTokenStartLine; |
| end; |
| |
| function TRecognizerSharedState.GetTokenType: Integer; |
| begin |
| Result := FTokenType; |
| end; |
| |
| procedure TRecognizerSharedState.SetBacktracking(const Value: Integer); |
| begin |
| FBacktracking := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetChannel(const Value: Integer); |
| begin |
| FChannel := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetErrorRecovery(const Value: Boolean); |
| begin |
| FErrorRecovery := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetFailed(const Value: Boolean); |
| begin |
| FFailed := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetFollowing(const Value: TBitSetArray); |
| begin |
| FFollowing := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetFollowingStackPointer(const Value: Integer); |
| begin |
| FFollowingStackPointer := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetLastErrorIndex(const Value: Integer); |
| begin |
| FLastErrorIndex := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetRuleMemoCount(const Value: Integer); |
| begin |
| SetLength(FRuleMemo, Value); |
| end; |
| |
| procedure TRecognizerSharedState.SetSyntaxErrors(const Value: Integer); |
| begin |
| FSyntaxErrors := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetText(const Value: String); |
| begin |
| FText := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetToken(const Value: IToken); |
| begin |
| FToken := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetTokenStartCharIndex(const Value: Integer); |
| begin |
| FTokenStartCharIndex := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetTokenStartCharPositionInLine( |
| const Value: Integer); |
| begin |
| FTokenStartCharPositionInLine := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetTokenStartLine(const Value: Integer); |
| begin |
| FTokenStartLine := Value; |
| end; |
| |
| procedure TRecognizerSharedState.SetTokenType(const Value: Integer); |
| begin |
| FTokenType := Value; |
| end; |
| |
| { TCommonToken } |
| |
| constructor TCommonToken.Create; |
| begin |
| inherited; |
| FChannel := TToken.DEFAULT_CHANNEL; |
| FCharPositionInLine := -1; |
| FIndex := -1; |
| end; |
| |
| constructor TCommonToken.Create(const ATokenType: Integer); |
| begin |
| Create; |
| FTokenType := ATokenType; |
| end; |
| |
| constructor TCommonToken.Create(const AInput: ICharStream; const ATokenType, |
| AChannel, AStart, AStop: Integer); |
| begin |
| Create; |
| FInput := AInput; |
| FTokenType := ATokenType; |
| FChannel := AChannel; |
| FStart := AStart; |
| FStop := AStop; |
| end; |
| |
| constructor TCommonToken.Create(const ATokenType: Integer; const AText: String); |
| begin |
| Create; |
| FTokenType := ATokenType; |
| FChannel := TToken.DEFAULT_CHANNEL; |
| FText := AText; |
| end; |
| |
| function TCommonToken.GetChannel: Integer; |
| begin |
| Result := FChannel; |
| end; |
| |
| function TCommonToken.GetCharPositionInLine: Integer; |
| begin |
| Result := FCharPositionInLine; |
| end; |
| |
| function TCommonToken.GetInputStream: ICharStream; |
| begin |
| Result := FInput; |
| end; |
| |
| function TCommonToken.GetLine: Integer; |
| begin |
| Result := FLine; |
| end; |
| |
| function TCommonToken.GetStartIndex: Integer; |
| begin |
| Result := FStart; |
| end; |
| |
| function TCommonToken.GetStopIndex: Integer; |
| begin |
| Result := FStop; |
| end; |
| |
| function TCommonToken.GetText: String; |
| begin |
| if (FText <> '') then |
| Result := FText |
| else |
| if (FInput = nil) then |
| Result := '' |
| else |
| Result := FInput.Substring(FStart, FStop); |
| end; |
| |
| function TCommonToken.GetTokenIndex: Integer; |
| begin |
| Result := FIndex; |
| end; |
| |
| function TCommonToken.GetTokenType: Integer; |
| begin |
| Result := FTokenType; |
| end; |
| |
| procedure TCommonToken.SetChannel(const Value: Integer); |
| begin |
| FChannel := Value; |
| end; |
| |
| procedure TCommonToken.SetCharPositionInLine(const Value: Integer); |
| begin |
| FCharPositionInLine := Value; |
| end; |
| |
| procedure TCommonToken.SetInputStream(const Value: ICharStream); |
| begin |
| FInput := Value; |
| end; |
| |
| procedure TCommonToken.SetLine(const Value: Integer); |
| begin |
| FLine := Value; |
| end; |
| |
| procedure TCommonToken.SetStartIndex(const Value: Integer); |
| begin |
| FStart := Value; |
| end; |
| |
| procedure TCommonToken.SetStopIndex(const Value: Integer); |
| begin |
| FStop := Value; |
| end; |
| |
| procedure TCommonToken.SetText(const Value: String); |
| begin |
| (* Override the text for this token. The property getter |
| * will return this text rather than pulling from the buffer. |
| * Note that this does not mean that start/stop indexes are |
| * not valid. It means that the input was converted to a new |
| * string in the token object. |
| *) |
| FText := Value; |
| end; |
| |
| procedure TCommonToken.SetTokenIndex(const Value: Integer); |
| begin |
| FIndex := Value; |
| end; |
| |
| procedure TCommonToken.SetTokenType(const Value: Integer); |
| begin |
| FTokenType := Value; |
| end; |
| |
| function TCommonToken.ToString: String; |
| var |
| ChannelStr, Txt: String; |
| begin |
| if (FChannel > 0) then |
| ChannelStr := ',channel=' + IntToStr(FChannel) |
| else |
| ChannelStr := ''; |
| |
| Txt := GetText; |
| if (Txt <> '') then |
| begin |
| Txt := ReplaceStr(Txt,#10,'\n'); |
| Txt := ReplaceStr(Txt,#13,'\r'); |
| Txt := ReplaceStr(Txt,#9,'\t'); |
| end else |
| Txt := '<no text>'; |
| |
| Result := Format('[@%d,%d:%d=''%s'',<%d>%s,%d:%d]', |
| [FIndex,FStart,FStop,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]); |
| end; |
| |
| constructor TCommonToken.Create(const AOldToken: IToken); |
| var |
| OldCommonToken: ICommonToken; |
| begin |
| Create; |
| FText := AOldToken.Text; |
| FTokenType := AOldToken.TokenType; |
| FLine := AOldToken.Line; |
| FIndex := AOldToken.TokenIndex; |
| FCharPositionInLine := AOldToken.CharPositionInLine; |
| FChannel := AOldToken.Channel; |
| if Supports(AOldToken, ICommonToken, OldCommonToken) then |
| begin |
| FStart := OldCommonToken.StartIndex; |
| FStop := OldCommonToken.StopIndex; |
| end; |
| end; |
| |
| { TClassicToken } |
| |
| constructor TClassicToken.Create(const AOldToken: IToken); |
| begin |
| inherited Create; |
| FText := AOldToken.Text; |
| FTokenType := AOldToken.TokenType; |
| FLine := AOldToken.Line; |
| FCharPositionInLine := AOldToken.CharPositionInLine; |
| FChannel := AOldToken.Channel; |
| end; |
| |
| constructor TClassicToken.Create(const ATokenType: Integer); |
| begin |
| inherited Create; |
| FTokenType := ATokenType; |
| end; |
| |
| constructor TClassicToken.Create(const ATokenType: Integer; const AText: String; |
| const AChannel: Integer); |
| begin |
| inherited Create; |
| FTokenType := ATokenType; |
| FText := AText; |
| FChannel := AChannel; |
| end; |
| |
| constructor TClassicToken.Create(const ATokenType: Integer; |
| const AText: String); |
| begin |
| inherited Create; |
| FTokenType := ATokenType; |
| FText := AText; |
| end; |
| |
| function TClassicToken.GetChannel: Integer; |
| begin |
| Result := FChannel; |
| end; |
| |
| function TClassicToken.GetCharPositionInLine: Integer; |
| begin |
| Result := FCharPositionInLine; |
| end; |
| |
| function TClassicToken.GetInputStream: ICharStream; |
| begin |
| // No default implementation |
| Result := nil; |
| end; |
| |
| function TClassicToken.GetLine: Integer; |
| begin |
| Result := FLine; |
| end; |
| |
| function TClassicToken.GetText: String; |
| begin |
| Result := FText; |
| end; |
| |
| function TClassicToken.GetTokenIndex: Integer; |
| begin |
| Result := FIndex; |
| end; |
| |
| function TClassicToken.GetTokenType: Integer; |
| begin |
| Result := FTokenType; |
| end; |
| |
| procedure TClassicToken.SetChannel(const Value: Integer); |
| begin |
| FChannel := Value; |
| end; |
| |
| procedure TClassicToken.SetCharPositionInLine(const Value: Integer); |
| begin |
| FCharPositionInLine := Value; |
| end; |
| |
| procedure TClassicToken.SetInputStream(const Value: ICharStream); |
| begin |
| // No default implementation |
| end; |
| |
| procedure TClassicToken.SetLine(const Value: Integer); |
| begin |
| FLine := Value; |
| end; |
| |
| procedure TClassicToken.SetText(const Value: String); |
| begin |
| FText := Value; |
| end; |
| |
| procedure TClassicToken.SetTokenIndex(const Value: Integer); |
| begin |
| FIndex := Value; |
| end; |
| |
| procedure TClassicToken.SetTokenType(const Value: Integer); |
| begin |
| FTokenType := Value; |
| end; |
| |
| function TClassicToken.ToString: String; |
| var |
| ChannelStr, Txt: String; |
| begin |
| if (FChannel > 0) then |
| ChannelStr := ',channel=' + IntToStr(FChannel) |
| else |
| ChannelStr := ''; |
| Txt := FText; |
| if (Txt <> '') then |
| begin |
| Txt := ReplaceStr(Txt,#10,'\n'); |
| Txt := ReplaceStr(Txt,#13,'\r'); |
| Txt := ReplaceStr(Txt,#9,'\t'); |
| end else |
| Txt := '<no text>'; |
| |
| Result := Format('[@%d,''%s'',<%d>%s,%d:%d]', |
| [FIndex,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]); |
| end; |
| |
| { TToken } |
| |
| class procedure TToken.Initialize; |
| begin |
| EOF_TOKEN := TCommonToken.Create(EOF); |
| INVALID_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE); |
| SKIP_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE); |
| end; |
| |
| { TBaseRecognizer } |
| |
| constructor TBaseRecognizer.Create; |
| begin |
| inherited; |
| FState := TRecognizerSharedState.Create; |
| end; |
| |
| function TBaseRecognizer.AlreadyParsedRule(const Input: IIntStream; |
| const RuleIndex: Integer): Boolean; |
| var |
| StopIndex: Integer; |
| begin |
| StopIndex := GetRuleMemoization(RuleIndex, Input.Index); |
| Result := (StopIndex <> MEMO_RULE_UNKNOWN); |
| if Result then |
| begin |
| if (StopIndex = MEMO_RULE_FAILED) then |
| FState.Failed := True |
| else |
| Input.Seek(StopIndex + 1); // jump to one past stop token |
| end; |
| end; |
| |
| procedure TBaseRecognizer.BeginBacktrack(const Level: Integer); |
| begin |
| // No defeault implementation |
| end; |
| |
| procedure TBaseRecognizer.BeginResync; |
| begin |
| // No defeault implementation |
| end; |
| |
| procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream; |
| const TokenType: Integer); |
| var |
| TType: Integer; |
| begin |
| TType := Input.LA(1); |
| while (TType <> TToken.EOF) and (TType <> TokenType) do |
| begin |
| Input.Consume; |
| TType := Input.LA(1); |
| end; |
| end; |
| |
| function TBaseRecognizer.CombineFollows(const Exact: Boolean): IBitSet; |
| var |
| I, Top: Integer; |
| LocalFollowSet: IBitSet; |
| begin |
| Top := FState.FollowingStackPointer; |
| Result := TBitSet.Create; |
| for I := Top downto 0 do |
| begin |
| LocalFollowSet := FState.Following[I]; |
| Result.OrInPlace(LocalFollowSet); |
| if (Exact) then |
| begin |
| // can we see end of rule? |
| if LocalFollowSet.Member(TToken.EOR_TOKEN_TYPE) then |
| begin |
| // Only leave EOR in set if at top (start rule); this lets |
| // us know if have to include follow(start rule); i.e., EOF |
| if (I > 0) then |
| Result.Remove(TToken.EOR_TOKEN_TYPE); |
| end |
| else |
| // can't see end of rule, quit |
| Break; |
| end; |
| end; |
| end; |
| |
| function TBaseRecognizer.ComputeContextSensitiveRuleFOLLOW: IBitSet; |
| begin |
| Result := CombineFollows(True); |
| end; |
| |
| function TBaseRecognizer.ComputeErrorRecoverySet: IBitSet; |
| begin |
| Result := CombineFollows(False); |
| end; |
| |
| procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream; |
| const BitSet: IBitSet); |
| var |
| TType: Integer; |
| begin |
| TType := Input.LA(1); |
| while (TType <> TToken.EOF) and (not BitSet.Member(TType)) do |
| begin |
| Input.Consume; |
| TType := Input.LA(1); |
| end; |
| end; |
| |
| constructor TBaseRecognizer.Create(const AState: IRecognizerSharedState); |
| begin |
| if (AState = nil) then |
| Create |
| else |
| begin |
| inherited Create; |
| FState := AState; |
| end; |
| end; |
| |
| procedure TBaseRecognizer.DisplayRecognitionError( |
| const TokenNames: TStringArray; const E: ERecognitionException); |
| var |
| Hdr, Msg: String; |
| begin |
| Hdr := GetErrorHeader(E); |
| Msg := GetErrorMessage(E, TokenNames); |
| EmitErrorMessage(Hdr + ' ' + Msg); |
| end; |
| |
| procedure TBaseRecognizer.EmitErrorMessage(const Msg: String); |
| begin |
| WriteLn(Msg); |
| end; |
| |
| procedure TBaseRecognizer.EndBacktrack(const Level: Integer; |
| const Successful: Boolean); |
| begin |
| // No defeault implementation |
| end; |
| |
| procedure TBaseRecognizer.EndResync; |
| begin |
| // No defeault implementation |
| end; |
| |
| function TBaseRecognizer.GetBacktrackingLevel: Integer; |
| begin |
| Result := FState.Backtracking; |
| end; |
| |
| function TBaseRecognizer.GetCurrentInputSymbol( |
| const Input: IIntStream): IANTLRInterface; |
| begin |
| // No defeault implementation |
| Result := nil; |
| end; |
| |
| function TBaseRecognizer.GetErrorHeader(const E: ERecognitionException): String; |
| begin |
| Result := 'line ' + IntToStr(E.Line) + ':' + IntToStr(E.CharPositionInLine); |
| end; |
| |
| function TBaseRecognizer.GetErrorMessage(const E: ERecognitionException; |
| const TokenNames: TStringArray): String; |
| var |
| UTE: EUnwantedTokenException absolute E; |
| MTE: EMissingTokenException absolute E; |
| MMTE: EMismatchedTokenException absolute E; |
| MTNE: EMismatchedTreeNodeException absolute E; |
| NVAE: ENoViableAltException absolute E; |
| EEE: EEarlyExitException absolute E; |
| MSE: EMismatchedSetException absolute E; |
| MNSE: EMismatchedNotSetException absolute E; |
| FPE: EFailedPredicateException absolute E; |
| TokenName: String; |
| begin |
| Result := E.Message; |
| if (E is EUnwantedTokenException) then |
| begin |
| if (UTE.Expecting = TToken.EOF) then |
| TokenName := 'EOF' |
| else |
| TokenName := TokenNames[UTE.Expecting]; |
| Result := 'extraneous input ' + GetTokenErrorDisplay(UTE.UnexpectedToken) |
| + ' expecting ' + TokenName; |
| end |
| else |
| if (E is EMissingTokenException) then |
| begin |
| if (MTE.Expecting = TToken.EOF) then |
| TokenName := 'EOF' |
| else |
| TokenName := TokenNames[MTE.Expecting]; |
| Result := 'missing ' + TokenName + ' at ' + GetTokenErrorDisplay(E.Token); |
| end |
| else |
| if (E is EMismatchedTokenException) then |
| begin |
| if (MMTE.Expecting = TToken.EOF) then |
| TokenName := 'EOF' |
| else |
| TokenName := TokenNames[MMTE.Expecting]; |
| Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) |
| + ' expecting ' + TokenName; |
| end |
| else |
| if (E is EMismatchedTreeNodeException) then |
| begin |
| if (MTNE.Expecting = TToken.EOF) then |
| Result := 'EOF' |
| else |
| Result := TokenNames[MTNE.Expecting]; |
| // The ternary operator is only necessary because of a bug in the .NET framework |
| Result := 'mismatched tree node: '; |
| if (MTNE.Node <> nil) and (MTNE.Node.ToString <> '') then |
| Result := Result + MTNE.Node.ToString; |
| Result := Result + ' expecting ' + TokenName; |
| end |
| else |
| if (E is ENoViableAltException) then |
| begin |
| // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" |
| // and "(decision="+nvae.decisionNumber+") and |
| // "state "+nvae.stateNumber |
| Result := 'no viable alternative at input ' + GetTokenErrorDisplay(E.Token); |
| end |
| else |
| if (E is EEarlyExitException) then |
| begin |
| // for development, can add "(decision="+eee.decisionNumber+")" |
| Result := 'required (...)+ loop did not match anyting at input ' |
| + GetTokenErrorDisplay(E.Token); |
| end else |
| if (E is EMismatchedSetException) then |
| begin |
| Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) |
| + ' expecting set ' + MSE.Expecting.ToString; |
| end |
| else |
| if (E is EMismatchedNotSetException) then |
| begin |
| Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) |
| + ' expecting set ' + MSE.Expecting.ToString; |
| end |
| else |
| if (E is EFailedPredicateException) then |
| begin |
| Result := 'rule ' + FPE.RuleName |
| + ' failed predicate: {' + FPE.PredicateText + '}?'; |
| end; |
| end; |
| |
| function TBaseRecognizer.GetGrammarFileName: String; |
| begin |
| // No defeault implementation |
| Result := ''; |
| end; |
| |
| function TBaseRecognizer.GetMissingSymbol(const Input: IIntStream; |
| const E: ERecognitionException; const ExpectedTokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; |
| begin |
| // No defeault implementation |
| Result := nil; |
| end; |
| |
| function TBaseRecognizer.GetNumberOfSyntaxErrors: Integer; |
| begin |
| Result := FState.SyntaxErrors; |
| end; |
| |
| function TBaseRecognizer.GetRuleMemoization(const RuleIndex, |
| RuleStartIndex: Integer): Integer; |
| var |
| Dict: IDictionary<Integer, Integer>; |
| begin |
| Dict := FState.RuleMemo[RuleIndex]; |
| if (Dict = nil) then |
| begin |
| Dict := TDictionary<Integer, Integer>.Create; |
| FState.RuleMemo[RuleIndex] := Dict; |
| end; |
| if (not Dict.TryGetValue(RuleStartIndex, Result)) then |
| Result := MEMO_RULE_UNKNOWN; |
| end; |
| |
| function TBaseRecognizer.GetRuleMemoizationChaceSize: Integer; |
| var |
| RuleMap: IDictionary<Integer, Integer>; |
| begin |
| Result := 0; |
| if Assigned(FState.RuleMemo) then |
| begin |
| for RuleMap in FState.RuleMemo do |
| if Assigned(RuleMap) then |
| Inc(Result,RuleMap.Count); // how many input indexes are recorded? |
| end; |
| end; |
| |
| function TBaseRecognizer.GetState: IRecognizerSharedState; |
| begin |
| Result := FState; |
| end; |
| |
| function TBaseRecognizer.GetTokenErrorDisplay(const T: IToken): String; |
| begin |
| Result := T.Text; |
| if (Result = '') then |
| begin |
| if (T.TokenType = TToken.EOF) then |
| Result := '<EOF>' |
| else |
| Result := '<' + IntToStr(T.TokenType) + '>'; |
| end; |
| Result := ReplaceStr(Result,#10,'\n'); |
| Result := ReplaceStr(Result,#13,'\r'); |
| Result := ReplaceStr(Result,#9,'\t'); |
| Result := '''' + Result + ''''; |
| end; |
| |
| function TBaseRecognizer.GetTokenNames: TStringArray; |
| begin |
| // no default implementation |
| Result := nil; |
| end; |
| |
| function TBaseRecognizer.Match(const Input: IIntStream; |
| const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; |
| begin |
| Result := GetCurrentInputSymbol(Input); |
| if (Input.LA(1) = TokenType) then |
| begin |
| Input.Consume; |
| FState.ErrorRecovery := False; |
| FState.Failed := False; |
| end else |
| begin |
| if (FState.Backtracking > 0) then |
| FState.Failed := True |
| else |
| begin |
| Mismatch(Input, TokenType, Follow); |
| Result := RecoverFromMismatchedToken(Input, TokenType, Follow); |
| end; |
| end; |
| end; |
| |
| procedure TBaseRecognizer.MatchAny(const Input: IIntStream); |
| begin |
| FState.ErrorRecovery := False; |
| FState.Failed := False; |
| Input.Consume; |
| end; |
| |
| procedure TBaseRecognizer.Memoize(const Input: IIntStream; const RuleIndex, |
| RuleStartIndex: Integer); |
| var |
| StopTokenIndex: Integer; |
| Dict: IDictionary<Integer, Integer>; |
| begin |
| Dict := FState.RuleMemo[RuleIndex]; |
| if Assigned(Dict) then |
| begin |
| if FState.Failed then |
| StopTokenIndex := MEMO_RULE_FAILED |
| else |
| StopTokenIndex := Input.Index - 1; |
| Dict.AddOrSetValue(RuleStartIndex, StopTokenIndex); |
| end; |
| end; |
| |
| procedure TBaseRecognizer.Mismatch(const Input: IIntStream; |
| const TokenType: Integer; const Follow: IBitSet); |
| begin |
| if MismatchIsUnwantedToken(Input, TokenType) then |
| raise EUnwantedTokenException.Create(TokenType, Input) |
| else |
| if MismatchIsMissingToken(Input, Follow) then |
| raise EMissingTokenException.Create(TokenType, Input, nil) |
| else |
| raise EMismatchedTokenException.Create(TokenType, Input); |
| end; |
| |
| function TBaseRecognizer.MismatchIsMissingToken(const Input: IIntStream; |
| const Follow: IBitSet): Boolean; |
| var |
| ViableTokensFollowingThisRule, Follow2: IBitSet; |
| begin |
| if (Follow = nil) then |
| // we have no information about the follow; we can only consume |
| // a single token and hope for the best |
| Result := False |
| else |
| begin |
| Follow2 := Follow; |
| // compute what can follow this grammar element reference |
| if (Follow.Member(TToken.EOR_TOKEN_TYPE)) then |
| begin |
| ViableTokensFollowingThisRule := ComputeContextSensitiveRuleFOLLOW(); |
| Follow2 := Follow.BitSetOr(ViableTokensFollowingThisRule); |
| if (FState.FollowingStackPointer >= 0) then |
| // remove EOR if we're not the start symbol |
| Follow2.Remove(TToken.EOR_TOKEN_TYPE); |
| end; |
| |
| // if current token is consistent with what could come after set |
| // then we know we're missing a token; error recovery is free to |
| // "insert" the missing token |
| |
| // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR |
| // in follow set to indicate that the fall of the start symbol is |
| // in the set (EOF can follow). |
| if (Follow2.Member(Input.LA(1)) or Follow2.Member(TToken.EOR_TOKEN_TYPE)) then |
| Result := True |
| else |
| Result := False; |
| end; |
| end; |
| |
| function TBaseRecognizer.MismatchIsUnwantedToken(const Input: IIntStream; |
| const TokenType: Integer): Boolean; |
| begin |
| Result := (Input.LA(2) = TokenType); |
| end; |
| |
| procedure TBaseRecognizer.PushFollow(const FSet: IBitSet); |
| var |
| F: TBitSetArray; |
| I: Integer; |
| begin |
| if ((FState.FollowingStackPointer + 1) >= Length(FState.Following)) then |
| begin |
| SetLength(F, Length(FState.Following) * 2); |
| FillChar(F[0], Length(F) * SizeOf(IBitSet), 0); |
| for I := 0 to Length(FState.Following) - 1 do |
| F[I] := FState.Following[I]; |
| FState.Following := F; |
| end; |
| FState.FollowingStackPointer := FState.FollowingStackPointer + 1; |
| FState.Following[FState.FollowingStackPointer] := FSet; |
| end; |
| |
| procedure TBaseRecognizer.Recover(const Input: IIntStream; |
| const RE: ERecognitionException); |
| var |
| FollowSet: IBitSet; |
| begin |
| if (FState.LastErrorIndex = Input.Index) then |
| // uh oh, another error at same token index; must be a case |
| // where LT(1) is in the recovery token set so nothing is |
| // consumed; consume a single token so at least to prevent |
| // an infinite loop; this is a failsafe. |
| Input.Consume; |
| FState.LastErrorIndex := Input.Index; |
| FollowSet := ComputeErrorRecoverySet; |
| BeginResync; |
| ConsumeUntil(Input,FollowSet); |
| EndResync; |
| end; |
| |
| function TBaseRecognizer.RecoverFromMismatchedSet(const Input: IIntStream; |
| const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; |
| begin |
| if MismatchIsMissingToken(Input, Follow) then |
| begin |
| ReportError(E); |
| // we don't know how to conjure up a token for sets yet |
| Result := GetMissingSymbol(Input, E, TToken.INVALID_TOKEN_TYPE, Follow); |
| end |
| else |
| begin |
| // TODO do single token deletion like above for Token mismatch |
| Result := nil; |
| raise E; |
| end; |
| end; |
| |
| function TBaseRecognizer.RecoverFromMismatchedToken(const Input: IIntStream; |
| const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; |
| var |
| E: ERecognitionException; |
| begin |
| // if next token is what we are looking for then "delete" this token |
| if MismatchIsUnwantedToken(Input, TokenType) then |
| begin |
| E := EUnwantedTokenException.Create(TokenType, Input); |
| BeginResync; |
| Input.Consume; // simply delete extra token |
| EndResync; |
| ReportError(E); // report after consuming so AW sees the token in the exception |
| // we want to return the token we're actually matching |
| Result := GetCurrentInputSymbol(Input); |
| Input.Consume; // move past ttype token as if all were ok |
| end |
| else |
| begin |
| // can't recover with single token deletion, try insertion |
| if MismatchIsMissingToken(Input, Follow) then |
| begin |
| E := nil; |
| Result := GetMissingSymbol(Input, E, TokenType, Follow); |
| E := EMissingTokenException.Create(TokenType, Input, Result); |
| ReportError(E); // report after inserting so AW sees the token in the exception |
| end |
| else |
| begin |
| // even that didn't work; must throw the exception |
| raise EMismatchedTokenException.Create(TokenType, Input); |
| end; |
| end; |
| end; |
| |
| procedure TBaseRecognizer.ReportError(const E: ERecognitionException); |
| begin |
| // if we've already reported an error and have not matched a token |
| // yet successfully, don't report any errors. |
| if (not FState.ErrorRecovery) then |
| begin |
| FState.SyntaxErrors := FState.SyntaxErrors + 1; // don't count spurious |
| FState.ErrorRecovery := True; |
| DisplayRecognitionError(GetTokenNames, E); |
| end; |
| end; |
| |
| procedure TBaseRecognizer.Reset; |
| var |
| I: Integer; |
| begin |
| // wack everything related to error recovery |
| if (FState = nil) then |
| Exit; // no shared state work to do |
| |
| FState.FollowingStackPointer := -1; |
| FState.ErrorRecovery := False; |
| FState.LastErrorIndex := -1; |
| FState.Failed := False; |
| FState.SyntaxErrors := 0; |
| |
| // wack everything related to backtracking and memoization |
| FState.Backtracking := 0; |
| if Assigned(FState.RuleMemo) then |
| for I := 0 to Length(FState.RuleMemo) - 1 do |
| begin |
| // wipe cache |
| FState.RuleMemo[I] := nil; |
| end; |
| end; |
| |
| function TBaseRecognizer.ToStrings(const Tokens: IList<IToken>): IList<String>; |
| var |
| Token: IToken; |
| begin |
| if (Tokens = nil) then |
| Result := nil |
| else |
| begin |
| Result := TList<String>.Create; |
| for Token in Tokens do |
| Result.Add(Token.Text); |
| end; |
| end; |
| |
| procedure TBaseRecognizer.TraceIn(const RuleName: String; |
| const RuleIndex: Integer; const InputSymbol: String); |
| begin |
| Write('enter ' + RuleName + ' ' + InputSymbol); |
| if (FState.Failed) then |
| WriteLn(' failed=True'); |
| if (FState.Backtracking > 0) then |
| Write(' backtracking=' + IntToStr(FState.Backtracking)); |
| WriteLn; |
| end; |
| |
| procedure TBaseRecognizer.TraceOut(const RuleName: String; |
| const RuleIndex: Integer; const InputSymbol: String); |
| begin |
| Write('exit ' + RuleName + ' ' + InputSymbol); |
| if (FState.Failed) then |
| WriteLn(' failed=True'); |
| if (FState.Backtracking > 0) then |
| Write(' backtracking=' + IntToStr(FState.Backtracking)); |
| WriteLn; |
| end; |
| |
| { TCommonTokenStream } |
| |
| procedure TCommonTokenStream.Consume; |
| begin |
| if (FP < FTokens.Count) then |
| begin |
| Inc(FP); |
| FP := SkipOffTokenChannels(FP); // leave p on valid token |
| end; |
| end; |
| |
| constructor TCommonTokenStream.Create; |
| begin |
| inherited; |
| FP := -1; |
| FChannel := TToken.DEFAULT_CHANNEL; |
| FTokens := TList<IToken>.Create; |
| FTokens.Capacity := 500; |
| end; |
| |
| constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource); |
| begin |
| Create; |
| FTokenSource := ATokenSource; |
| end; |
| |
| procedure TCommonTokenStream.DiscardOffChannelTokens(const Discard: Boolean); |
| begin |
| FDiscardOffChannelTokens := Discard; |
| end; |
| |
| procedure TCommonTokenStream.DiscardTokenType(const TType: Integer); |
| begin |
| if (FDiscardSet = nil) then |
| FDiscardSet := THashList<Integer, Integer>.Create; |
| FDiscardSet.Add(TType, TType); |
| end; |
| |
| procedure TCommonTokenStream.FillBuffer; |
| var |
| Index: Integer; |
| T: IToken; |
| Discard: Boolean; |
| begin |
| Index := 0; |
| T := FTokenSource.NextToken; |
| while Assigned(T) and (T.TokenType <> Integer(cscEOF)) do |
| begin |
| Discard := False; |
| // is there a channel override for token type? |
| if Assigned(FChannelOverrideMap) then |
| if FChannelOverrideMap.ContainsKey(T.TokenType) then |
| T.Channel := FChannelOverrideMap[T.TokenType]; |
| |
| if Assigned(FDiscardSet) and FDiscardSet.ContainsKey(T.TokenType) then |
| Discard := True |
| else |
| if FDiscardOffChannelTokens and (T.Channel <> FChannel) then |
| Discard := True; |
| |
| if (not Discard) then |
| begin |
| T.TokenIndex := Index; |
| FTokens.Add(T); |
| Inc(Index); |
| end; |
| |
| T := FTokenSource.NextToken; |
| end; |
| // leave p pointing at first token on channel |
| FP := 0; |
| FP := SkipOffTokenChannels(FP); |
| end; |
| |
| function TCommonTokenStream.Get(const I: Integer): IToken; |
| begin |
| Result := FTokens[I]; |
| end; |
| |
| function TCommonTokenStream.GetSourceName: String; |
| begin |
| Result := FTokenSource.SourceName; |
| end; |
| |
| function TCommonTokenStream.GetTokens(const Start, Stop: Integer; |
| const Types: IList<Integer>): IList<IToken>; |
| begin |
| Result := GetTokens(Start, Stop, TBitSet.Create(Types)); |
| end; |
| |
| function TCommonTokenStream.GetTokens(const Start, Stop, |
| TokenType: Integer): IList<IToken>; |
| begin |
| Result := GetTokens(Start, Stop, TBitSet.BitSetOf(TokenType)); |
| end; |
| |
| function TCommonTokenStream.GetTokens(const Start, Stop: Integer; |
| const Types: IBitSet): IList<IToken>; |
| var |
| I, StartIndex, StopIndex: Integer; |
| T: IToken; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| StopIndex := Min(Stop,FTokens.Count - 1); |
| StartIndex := Max(Start,0); |
| if (StartIndex > StopIndex) then |
| Result := nil |
| else |
| begin |
| Result := TList<IToken>.Create; |
| for I := StartIndex to StopIndex do |
| begin |
| T := FTokens[I]; |
| if (Types = nil) or Types.Member(T.TokenType) then |
| Result.Add(T); |
| end; |
| if (Result.Count = 0) then |
| Result := nil; |
| end; |
| end; |
| |
| function TCommonTokenStream.GetTokens: IList<IToken>; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| Result := FTokens; |
| end; |
| |
| function TCommonTokenStream.GetTokens(const Start, |
| Stop: Integer): IList<IToken>; |
| begin |
| Result := GetTokens(Start, Stop, IBitSet(nil)); |
| end; |
| |
| function TCommonTokenStream.GetTokenSource: ITokenSource; |
| begin |
| Result := FTokenSource; |
| end; |
| |
| function TCommonTokenStream.Index: Integer; |
| begin |
| Result := FP; |
| end; |
| |
| function TCommonTokenStream.LA(I: Integer): Integer; |
| begin |
| Result := LT(I).TokenType; |
| end; |
| |
| function TCommonTokenStream.LAChar(I: Integer): Char; |
| begin |
| Result := Char(LA(I)); |
| end; |
| |
| function TCommonTokenStream.LB(const K: Integer): IToken; |
| var |
| I, N: Integer; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| if (K = 0) then |
| Result := nil |
| else |
| if ((FP - K) < 0) then |
| Result := nil |
| else |
| begin |
| I := FP; |
| N := 1; |
| // find k good tokens looking backwards |
| while (N <= K) do |
| begin |
| // skip off-channel tokens |
| I := SkipOffTokenChannelsReverse(I - 1); // leave p on valid token |
| Inc(N); |
| end; |
| if (I < 0) then |
| Result := nil |
| else |
| Result := FTokens[I]; |
| end; |
| end; |
| |
| function TCommonTokenStream.LT(const K: Integer): IToken; |
| var |
| I, N: Integer; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| if (K = 0) then |
| Result := nil |
| else |
| if (K < 0) then |
| Result := LB(-K) |
| else |
| if ((FP + K - 1) >= FTokens.Count) then |
| Result := TToken.EOF_TOKEN |
| else |
| begin |
| I := FP; |
| N := 1; |
| // find k good tokens |
| while (N < K) do |
| begin |
| // skip off-channel tokens |
| I := SkipOffTokenChannels(I + 1); // leave p on valid token |
| Inc(N); |
| end; |
| if (I >= FTokens.Count) then |
| Result := TToken.EOF_TOKEN |
| else |
| Result := FTokens[I]; |
| end; |
| end; |
| |
| function TCommonTokenStream.Mark: Integer; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| FLastMarker := Index; |
| Result := FLastMarker; |
| end; |
| |
| procedure TCommonTokenStream.Release(const Marker: Integer); |
| begin |
| // no resources to release |
| end; |
| |
| procedure TCommonTokenStream.Reset; |
| begin |
| FP := 0; |
| FLastMarker := 0; |
| end; |
| |
| procedure TCommonTokenStream.Rewind(const Marker: Integer); |
| begin |
| Seek(Marker); |
| end; |
| |
| procedure TCommonTokenStream.Rewind; |
| begin |
| Seek(FLastMarker); |
| end; |
| |
| procedure TCommonTokenStream.Seek(const Index: Integer); |
| begin |
| FP := Index; |
| end; |
| |
| procedure TCommonTokenStream.SetTokenSource(const Value: ITokenSource); |
| begin |
| FTokenSource := Value; |
| FTokens.Clear; |
| FP := -1; |
| FChannel := TToken.DEFAULT_CHANNEL; |
| end; |
| |
| procedure TCommonTokenStream.SetTokenTypeChannel(const TType, Channel: Integer); |
| begin |
| if (FChannelOverrideMap = nil) then |
| FChannelOverrideMap := TDictionary<Integer, Integer>.Create; |
| FChannelOverrideMap[TType] := Channel; |
| end; |
| |
| function TCommonTokenStream.Size: Integer; |
| begin |
| Result := FTokens.Count; |
| end; |
| |
| function TCommonTokenStream.SkipOffTokenChannels(const I: Integer): Integer; |
| var |
| N: Integer; |
| begin |
| Result := I; |
| N := FTokens.Count; |
| while (Result < N) and (FTokens[Result].Channel <> FChannel) do |
| Inc(Result); |
| end; |
| |
| function TCommonTokenStream.SkipOffTokenChannelsReverse( |
| const I: Integer): Integer; |
| begin |
| Result := I; |
| while (Result >= 0) and (FTokens[Result].Channel <> FChannel) do |
| Dec(Result); |
| end; |
| |
| function TCommonTokenStream.ToString: String; |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| Result := ToString(0, FTokens.Count - 1); |
| end; |
| |
| function TCommonTokenStream.ToString(const Start, Stop: Integer): String; |
| var |
| I, Finish: Integer; |
| Buf: TStringBuilder; |
| T: IToken; |
| begin |
| if (Start < 0) or (Stop < 0) then |
| Result := '' |
| else |
| begin |
| if (FP = -1) then |
| FillBuffer; |
| if (Stop >= FTokens.Count) then |
| Finish := FTokens.Count - 1 |
| else |
| Finish := Stop; |
| Buf := TStringBuilder.Create; |
| try |
| for I := Start to Finish do |
| begin |
| T := FTokens[I]; |
| Buf.Append(T.Text); |
| end; |
| Result := Buf.ToString; |
| finally |
| Buf.Free; |
| end; |
| end; |
| end; |
| |
| function TCommonTokenStream.ToString(const Start, Stop: IToken): String; |
| begin |
| if Assigned(Start) and Assigned(Stop) then |
| Result := ToString(Start.TokenIndex, Stop.TokenIndex) |
| else |
| Result := ''; |
| end; |
| |
| constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource; |
| const AChannel: Integer); |
| begin |
| Create(ATokenSource); |
| FChannel := AChannel; |
| end; |
| |
| constructor TCommonTokenStream.Create(const ALexer: ILexer); |
| begin |
| Create(ALexer as ITokenSource); |
| end; |
| |
| constructor TCommonTokenStream.Create(const ALexer: ILexer; |
| const AChannel: Integer); |
| begin |
| Create(ALexer as ITokenSource, AChannel); |
| end; |
| |
| { TDFA } |
| |
| function TDFA.Description: String; |
| begin |
| Result := 'n/a'; |
| end; |
| |
| procedure TDFA.Error(const NVAE: ENoViableAltException); |
| begin |
| // No default implementation |
| end; |
| |
| function TDFA.GetRecognizer: IBaseRecognizer; |
| begin |
| Result := IBaseRecognizer(FRecognizer); |
| end; |
| |
| function TDFA.GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; |
| begin |
| Result := FSpecialStateTransitionHandler; |
| end; |
| |
| procedure TDFA.NoViableAlt(const S: Integer; const Input: IIntStream); |
| var |
| NVAE: ENoViableAltException; |
| begin |
| if (Recognizer.State.Backtracking > 0) then |
| Recognizer.State.Failed := True |
| else |
| begin |
| NVAE := ENoViableAltException.Create(Description, FDecisionNumber, S, Input); |
| Error(NVAE); |
| raise NVAE; |
| end; |
| end; |
| |
| function TDFA.Predict(const Input: IIntStream): Integer; |
| var |
| Mark, S, SNext, SpecialState: Integer; |
| C: Char; |
| begin |
| Result := 0; |
| Mark := Input.Mark; // remember where decision started in input |
| S := 0; // we always start at s0 |
| try |
| while True do |
| begin |
| SpecialState := FSpecial[S]; |
| if (SpecialState >= 0) then |
| begin |
| S := FSpecialStateTransitionHandler(Self, SpecialState, Input); |
| if (S = -1) then |
| begin |
| NoViableAlt(S, Input); |
| Exit; |
| end; |
| Input.Consume; |
| Continue; |
| end; |
| |
| if (FAccept[S] >= 1) then |
| begin |
| Result := FAccept[S]; |
| Exit; |
| end; |
| |
| // look for a normal char transition |
| C := Char(Input.LA(1)); // -1 == \uFFFF, all tokens fit in 65000 space |
| if (C >= FMin[S]) and (C <= FMax[S]) then |
| begin |
| SNext := FTransition[S,Integer(C) - Integer(FMin[S])]; // move to next state |
| if (SNext < 0) then |
| begin |
| // was in range but not a normal transition |
| // must check EOT, which is like the else clause. |
| // eot[s]>=0 indicates that an EOT edge goes to another |
| // state. |
| if (FEOT[S] >= 0) then // EOT Transition to accept state? |
| begin |
| S := FEOT[S]; |
| Input.Consume; |
| // TODO: I had this as return accept[eot[s]] |
| // which assumed here that the EOT edge always |
| // went to an accept...faster to do this, but |
| // what about predicated edges coming from EOT |
| // target? |
| Continue; |
| end; |
| |
| NoViableAlt(S, Input); |
| Exit; |
| end; |
| S := SNext; |
| Input.Consume; |
| Continue; |
| end; |
| |
| if (FEOT[S] >= 0) then |
| begin |
| // EOT Transition? |
| S := FEOT[S]; |
| Input.Consume; |
| Continue; |
| end; |
| |
| if (C = Char(TToken.EOF)) and (FEOF[S] >= 0) then |
| begin |
| // EOF Transition to accept state? |
| Result := FAccept[FEOF[S]]; |
| Exit; |
| end; |
| |
| // not in range and not EOF/EOT, must be invalid symbol |
| NoViableAlt(S, Input); |
| Exit; |
| end; |
| finally |
| Input.Rewind(Mark); |
| end; |
| end; |
| |
| procedure TDFA.SetRecognizer(const Value: IBaseRecognizer); |
| begin |
| FRecognizer := Pointer(Value); |
| end; |
| |
| procedure TDFA.SetSpecialStateTransitionHandler( |
| const Value: TSpecialStateTransitionHandler); |
| begin |
| FSpecialStateTransitionHandler := Value; |
| end; |
| |
| function TDFA.SpecialStateTransition(const S: Integer; |
| const Input: IIntStream): Integer; |
| begin |
| // No default implementation |
| Result := -1; |
| end; |
| |
| function TDFA.SpecialTransition(const State, Symbol: Integer): Integer; |
| begin |
| Result := 0; |
| end; |
| |
| class function TDFA.UnpackEncodedString( |
| const EncodedString: String): TSmallintArray; |
| var |
| I, J, DI, Size: Integer; |
| N, V: Char; |
| begin |
| Size := 0; |
| I := 1; |
| while (I <= Length(EncodedString)) do |
| begin |
| Inc(Size,Integer(EncodedString[I])); |
| Inc(I,2); |
| end; |
| |
| SetLength(Result,Size); |
| DI := 0; |
| I := 1; |
| while (I <= Length(EncodedString)) do |
| begin |
| N := EncodedString[I]; |
| V := EncodedString[I + 1]; |
| // add v n times to data |
| for J := 1 to Integer(N) do |
| begin |
| Result[DI] := Smallint(V); |
| Inc(DI); |
| end; |
| Inc(I,2); |
| end; |
| end; |
| |
| class function TDFA.UnpackEncodedStringArray( |
| const EncodedStrings: array of String): TSmallintMatrix; |
| var |
| I: Integer; |
| begin |
| SetLength(Result,Length(EncodedStrings)); |
| for I := 0 to Length(EncodedStrings) - 1 do |
| Result[I] := UnpackEncodedString(EncodedStrings[I]); |
| end; |
| |
| class function TDFA.UnpackEncodedStringArray( |
| const EncodedStrings: TStringArray): TSmallintMatrix; |
| var |
| I: Integer; |
| begin |
| SetLength(Result,Length(EncodedStrings)); |
| for I := 0 to Length(EncodedStrings) - 1 do |
| Result[I] := UnpackEncodedString(EncodedStrings[I]); |
| end; |
| |
| class function TDFA.UnpackEncodedStringToUnsignedChars( |
| const EncodedString: String): TCharArray; |
| var |
| I, J, DI, Size: Integer; |
| N, V: Char; |
| begin |
| Size := 0; |
| I := 1; |
| while (I <= Length(EncodedString)) do |
| begin |
| Inc(Size,Integer(EncodedString[I])); |
| Inc(I,2); |
| end; |
| |
| SetLength(Result,Size); |
| DI := 0; |
| I := 1; |
| while (I <= Length(EncodedString)) do |
| begin |
| N := EncodedString[I]; |
| V := EncodedString[I + 1]; |
| // add v n times to data |
| for J := 1 to Integer(N) do |
| begin |
| Result[DI] := V; |
| Inc(DI); |
| end; |
| Inc(I,2); |
| end; |
| end; |
| |
| { TLexer } |
| |
| constructor TLexer.Create; |
| begin |
| inherited; |
| end; |
| |
| constructor TLexer.Create(const AInput: ICharStream); |
| begin |
| inherited Create; |
| FInput := AInput; |
| end; |
| |
| constructor TLexer.Create(const AInput: ICharStream; |
| const AState: IRecognizerSharedState); |
| begin |
| inherited Create(AState); |
| FInput := AInput; |
| end; |
| |
| function TLexer.Emit: IToken; |
| begin |
| Result := TCommonToken.Create(FInput, FState.TokenType, FState.Channel, |
| FState.TokenStartCharIndex, GetCharIndex - 1); |
| Result.Line := FState.TokenStartLine; |
| Result.Text := FState.Text; |
| Result.CharPositionInLine := FState.TokenStartCharPositionInLine; |
| Emit(Result); |
| end; |
| |
| procedure TLexer.Emit(const Token: IToken); |
| begin |
| FState.Token := Token; |
| end; |
| |
| function TLexer.GetCharErrorDisplay(const C: Integer): String; |
| begin |
| case C of |
| // TToken.EOF |
| TOKEN_dot_EOF: |
| Result := '<EOF>'; |
| 10: |
| Result := '\n'; |
| 9: |
| Result := '\t'; |
| 13: |
| Result := '\r'; |
| else |
| Result := Char(C); |
| end; |
| Result := '''' + Result + ''''; |
| end; |
| |
| function TLexer.GetCharIndex: Integer; |
| begin |
| Result := FInput.Index; |
| end; |
| |
| function TLexer.GetCharPositionInLine: Integer; |
| begin |
| Result := FInput.CharPositionInLine; |
| end; |
| |
| function TLexer.GetCharStream: ICharStream; |
| begin |
| Result := FInput; |
| end; |
| |
| function TLexer.GetErrorMessage(const E: ERecognitionException; |
| const TokenNames: TStringArray): String; |
| var |
| MTE: EMismatchedTokenException absolute E; |
| NVAE: ENoViableAltException absolute E; |
| EEE: EEarlyExitException absolute E; |
| MNSE: EMismatchedNotSetException absolute E; |
| MSE: EMismatchedSetException absolute E; |
| MRE: EMismatchedRangeException absolute E; |
| begin |
| if (E is EMismatchedTokenException) then |
| Result := 'mismatched character ' + GetCharErrorDisplay(E.Character) |
| + ' expecting ' + GetCharErrorDisplay(MTE.Expecting) |
| else |
| if (E is ENoViableAltException) then |
| // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" |
| // and "(decision="+nvae.decisionNumber+") and |
| // "state "+nvae.stateNumber |
| Result := 'no viable alternative at character ' + GetCharErrorDisplay(NVAE.Character) |
| else |
| if (E is EEarlyExitException) then |
| // for development, can add "(decision="+eee.decisionNumber+")" |
| Result := 'required (...)+ loop did not match anything at character ' |
| + GetCharErrorDisplay(EEE.Character) |
| else |
| if (E is EMismatchedNotSetException) then |
| Result := 'mismatched character ' + GetCharErrorDisplay(MNSE.Character) |
| + ' expecting set ' + MNSE.Expecting.ToString |
| else |
| if (E is EMismatchedSetException) then |
| Result := 'mismatched character ' + GetCharErrorDisplay(MSE.Character) |
| + ' expecting set ' + MSE.Expecting.ToString |
| else |
| if (E is EMismatchedRangeException) then |
| Result := 'mismatched character ' + GetCharErrorDisplay(MRE.Character) |
| + ' expecting set ' + GetCharErrorDisplay(MRE.A) + '..' |
| + GetCharErrorDisplay(MRE.B) |
| else |
| Result := inherited GetErrorMessage(E, TokenNames); |
| end; |
| |
| function TLexer.GetInput: IIntStream; |
| begin |
| Result := FInput; |
| end; |
| |
| function TLexer.GetLine: Integer; |
| begin |
| Result := FInput.Line; |
| end; |
| |
| function TLexer.GetSourceName: String; |
| begin |
| Result := FInput.SourceName; |
| end; |
| |
| function TLexer.GetText: String; |
| begin |
| if (FState.Text <> '') then |
| Result := FState.Text |
| else |
| Result := FInput.Substring(FState.TokenStartCharIndex, GetCharIndex - 1) |
| end; |
| |
| procedure TLexer.Match(const S: String); |
| var |
| I: Integer; |
| MTE: EMismatchedTokenException; |
| begin |
| for I := 1 to Length(S) do |
| begin |
| if (FInput.LA(1) <> Integer(S[I])) then |
| begin |
| if (FState.Backtracking > 0) then |
| begin |
| FState.Failed := True; |
| Exit; |
| end; |
| MTE := EMismatchedTokenException.Create(Integer(S[I]), FInput); |
| Recover(MTE); // don't really recover; just consume in lexer |
| raise MTE; |
| end; |
| FInput.Consume; |
| FState.Failed := False; |
| end; |
| end; |
| |
| procedure TLexer.Match(const C: Integer); |
| var |
| MTE: EMismatchedTokenException; |
| begin |
| if (FInput.LA(1) <> C) then |
| begin |
| if (FState.Backtracking > 0) then |
| begin |
| FState.Failed := True; |
| Exit; |
| end; |
| MTE := EMismatchedTokenException.Create(C, FInput); |
| Recover(MTE); |
| raise MTE; |
| end; |
| FInput.Consume; |
| FState.Failed := False; |
| end; |
| |
| procedure TLexer.MatchAny; |
| begin |
| FInput.Consume; |
| end; |
| |
| procedure TLexer.MatchRange(const A, B: Integer); |
| var |
| MRE: EMismatchedRangeException; |
| begin |
| if (FInput.LA(1) < A) or (FInput.LA(1) > B) then |
| begin |
| if (FState.Backtracking > 0) then |
| begin |
| FState.Failed := True; |
| Exit; |
| end; |
| MRE := EMismatchedRangeException.Create(A, B, FInput); |
| Recover(MRE); |
| raise MRE; |
| end; |
| FInput.Consume; |
| FState.Failed := False; |
| end; |
| |
| function TLexer.NextToken: IToken; |
| begin |
| while True do |
| begin |
| FState.Token := nil; |
| FState.Channel := TToken.DEFAULT_CHANNEL; |
| FState.TokenStartCharIndex := FInput.Index; |
| FState.TokenStartCharPositionInLine := FInput.CharPositionInLine; |
| FState.TokenStartLine := Finput.Line; |
| FState.Text := ''; |
| if (FInput.LA(1) = Integer(cscEOF)) then |
| begin |
| Result := TToken.EOF_TOKEN; |
| Exit; |
| end; |
| |
| try |
| DoTokens; |
| if (FState.Token = nil) then |
| Emit |
| else |
| if (FState.Token = TToken.SKIP_TOKEN) then |
| Continue; |
| Exit(FState.Token); |
| except |
| on NVA: ENoViableAltException do |
| begin |
| ReportError(NVA); |
| Recover(NVA); // throw out current char and try again |
| end; |
| |
| on RE: ERecognitionException do |
| begin |
| ReportError(RE); |
| // Match() routine has already called Recover() |
| end; |
| end; |
| end; |
| end; |
| |
| procedure TLexer.Recover(const RE: ERecognitionException); |
| begin |
| FInput.Consume; |
| end; |
| |
| procedure TLexer.ReportError(const E: ERecognitionException); |
| begin |
| DisplayRecognitionError(GetTokenNames, E); |
| end; |
| |
| procedure TLexer.Reset; |
| begin |
| inherited; // reset all recognizer state variables |
| // wack Lexer state variables |
| if Assigned(FInput) then |
| FInput.Seek(0); // rewind the input |
| if (FState = nil) then |
| Exit; // no shared state work to do |
| FState.Token := nil; |
| FState.TokenType := TToken.INVALID_TOKEN_TYPE; |
| FState.Channel := TToken.DEFAULT_CHANNEL; |
| FState.TokenStartCharIndex := -1; |
| FState.TokenStartCharPositionInLine := -1; |
| FState.TokenStartLine := -1; |
| FState.Text := ''; |
| end; |
| |
| procedure TLexer.SetCharStream(const Value: ICharStream); |
| begin |
| FInput := nil; |
| Reset; |
| FInput := Value; |
| end; |
| |
| procedure TLexer.SetText(const Value: String); |
| begin |
| FState.Text := Value; |
| end; |
| |
| procedure TLexer.Skip; |
| begin |
| FState.Token := TToken.SKIP_TOKEN; |
| end; |
| |
| procedure TLexer.TraceIn(const RuleName: String; const RuleIndex: Integer); |
| var |
| InputSymbol: String; |
| begin |
| InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':' |
| + IntToStr(GetCharPositionInLine); |
| inherited TraceIn(RuleName, RuleIndex, InputSymbol); |
| end; |
| |
| procedure TLexer.TraceOut(const RuleName: String; const RuleIndex: Integer); |
| var |
| InputSymbol: String; |
| begin |
| InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':' |
| + IntToStr(GetCharPositionInLine); |
| inherited TraceOut(RuleName, RuleIndex, InputSymbol); |
| end; |
| |
| { TParser } |
| |
| constructor TParser.Create(const AInput: ITokenStream); |
| begin |
| inherited Create; // highlight that we go to base class to set state object |
| SetTokenStream(AInput); |
| end; |
| |
| constructor TParser.Create(const AInput: ITokenStream; |
| const AState: IRecognizerSharedState); |
| begin |
| inherited Create(AState); // share the state object with another parser |
| SetTokenStream(AInput); |
| end; |
| |
| function TParser.GetCurrentInputSymbol( |
| const Input: IIntStream): IANTLRInterface; |
| begin |
| Result := FInput.LT(1) |
| end; |
| |
| function TParser.GetInput: IIntStream; |
| begin |
| Result := FInput; |
| end; |
| |
| function TParser.GetMissingSymbol(const Input: IIntStream; |
| const E: ERecognitionException; const ExpectedTokenType: Integer; |
| const Follow: IBitSet): IANTLRInterface; |
| var |
| TokenText: String; |
| T: ICommonToken; |
| Current: IToken; |
| begin |
| if (ExpectedTokenType = TToken.EOF) then |
| TokenText := '<missing EOF>' |
| else |
| TokenText := '<missing ' + GetTokenNames[ExpectedTokenType] + '>'; |
| T := TCommonToken.Create(ExpectedTokenType, TokenText); |
| Current := FInput.LT(1); |
| if (Current.TokenType = TToken.EOF) then |
| Current := FInput.LT(-1); |
| T.Line := Current.Line; |
| T.CharPositionInLine := Current.CharPositionInLine; |
| T.Channel := DEFAULT_TOKEN_CHANNEL; |
| Result := T; |
| end; |
| |
| function TParser.GetSourceName: String; |
| begin |
| Result := FInput.SourceName; |
| end; |
| |
| function TParser.GetTokenStream: ITokenStream; |
| begin |
| Result := FInput; |
| end; |
| |
| procedure TParser.Reset; |
| begin |
| inherited; // reset all recognizer state variables |
| if Assigned(FInput) then |
| FInput.Seek(0); // rewind the input |
| end; |
| |
| procedure TParser.SetTokenStream(const Value: ITokenStream); |
| begin |
| FInput := nil; |
| Reset; |
| FInput := Value; |
| end; |
| |
| procedure TParser.TraceIn(const RuleName: String; const RuleIndex: Integer); |
| begin |
| inherited TraceIn(RuleName, RuleIndex, FInput.LT(1).ToString); |
| end; |
| |
| procedure TParser.TraceOut(const RuleName: String; const RuleIndex: Integer); |
| begin |
| inherited TraceOut(RuleName, RuleIndex, FInput.LT(1).ToString); |
| end; |
| |
| { TRuleReturnScope } |
| |
| function TRuleReturnScope.GetStart: IANTLRInterface; |
| begin |
| Result := nil; |
| end; |
| |
| function TRuleReturnScope.GetStop: IANTLRInterface; |
| begin |
| Result := nil; |
| end; |
| |
| function TRuleReturnScope.GetTemplate: IANTLRInterface; |
| begin |
| Result := nil; |
| end; |
| |
| function TRuleReturnScope.GetTree: IANTLRInterface; |
| begin |
| Result := nil; |
| end; |
| |
| procedure TRuleReturnScope.SetStart(const Value: IANTLRInterface); |
| begin |
| raise EInvalidOperation.Create('Setter has not been defined for this property.'); |
| end; |
| |
| procedure TRuleReturnScope.SetStop(const Value: IANTLRInterface); |
| begin |
| raise EInvalidOperation.Create('Setter has not been defined for this property.'); |
| end; |
| |
| procedure TRuleReturnScope.SetTree(const Value: IANTLRInterface); |
| begin |
| raise EInvalidOperation.Create('Setter has not been defined for this property.'); |
| end; |
| |
| { TParserRuleReturnScope } |
| |
| function TParserRuleReturnScope.GetStart: IANTLRInterface; |
| begin |
| Result := FStart; |
| end; |
| |
| function TParserRuleReturnScope.GetStop: IANTLRInterface; |
| begin |
| Result := FStop; |
| end; |
| |
| procedure TParserRuleReturnScope.SetStart(const Value: IANTLRInterface); |
| begin |
| FStart := Value as IToken; |
| end; |
| |
| procedure TParserRuleReturnScope.SetStop(const Value: IANTLRInterface); |
| begin |
| FStop := Value as IToken; |
| end; |
| |
| { TTokenRewriteStream } |
| |
| procedure TTokenRewriteStream.Delete(const Start, Stop: IToken); |
| begin |
| Delete(DEFAULT_PROGRAM_NAME, Start, Stop); |
| end; |
| |
| procedure TTokenRewriteStream.Delete(const IndexT: IToken); |
| begin |
| Delete(DEFAULT_PROGRAM_NAME, IndexT, IndexT); |
| end; |
| |
| constructor TTokenRewriteStream.Create; |
| begin |
| inherited; |
| Init; |
| end; |
| |
| constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource); |
| begin |
| inherited Create(ATokenSource); |
| Init; |
| end; |
| |
| constructor TTokenRewriteStream.Create(const ALexer: ILexer); |
| begin |
| Create(ALexer as ITokenSource); |
| end; |
| |
| constructor TTokenRewriteStream.Create(const ALexer: ILexer; |
| const AChannel: Integer); |
| begin |
| Create(ALexer as ITokenSource, AChannel); |
| end; |
| |
| function TTokenRewriteStream.CatOpText(const A, B: IANTLRInterface): IANTLRInterface; |
| var |
| X, Y: String; |
| begin |
| if Assigned(A) then |
| X := A.ToString |
| else |
| X := ''; |
| |
| if Assigned(B) then |
| Y := B.ToString |
| else |
| Y := ''; |
| |
| Result := TANTLRString.Create(X + Y); |
| end; |
| |
| constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource; |
| const AChannel: Integer); |
| begin |
| inherited Create(ATokenSource, AChannel); |
| Init; |
| end; |
| |
| procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start, |
| Stop: IToken); |
| begin |
| Replace(ProgramName, Start, Stop, nil); |
| end; |
| |
| procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start, |
| Stop: Integer); |
| begin |
| Replace(ProgramName, Start, Stop, nil); |
| end; |
| |
| procedure TTokenRewriteStream.Delete(const Start, Stop: Integer); |
| begin |
| Delete(DEFAULT_PROGRAM_NAME, Start, Stop); |
| end; |
| |
| procedure TTokenRewriteStream.Delete(const Index: Integer); |
| begin |
| Delete(DEFAULT_PROGRAM_NAME, Index, Index); |
| end; |
| |
| procedure TTokenRewriteStream.DeleteProgram(const ProgramName: String); |
| begin |
| Rollback(ProgramName, MIN_TOKEN_INDEX); |
| end; |
| |
| procedure TTokenRewriteStream.DeleteProgram; |
| begin |
| DeleteProgram(DEFAULT_PROGRAM_NAME); |
| end; |
| |
| function TTokenRewriteStream.GetLastRewriteTokenIndex: Integer; |
| begin |
| Result := GetLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); |
| end; |
| |
| function TTokenRewriteStream.GetKindOfOps( |
| const Rewrites: IList<IRewriteOperation>; |
| const Kind: TGUID): IList<IRewriteOperation>; |
| begin |
| Result := GetKindOfOps(Rewrites, Kind, Rewrites.Count); |
| end; |
| |
| function TTokenRewriteStream.GetKindOfOps( |
| const Rewrites: IList<IRewriteOperation>; const Kind: TGUID; |
| const Before: Integer): IList<IRewriteOperation>; |
| var |
| I: Integer; |
| Op: IRewriteOperation; |
| Obj: IInterface; |
| begin |
| Result := TList<IRewriteOperation>.Create; |
| I := 0; |
| while (I < Before) and (I < Rewrites.Count) do |
| begin |
| Op := Rewrites[I]; |
| if Assigned(Op) and (Op.QueryInterface(Kind, Obj) = 0) then |
| Result.Add(Op); |
| Inc(I); |
| end; |
| end; |
| |
| function TTokenRewriteStream.GetLastRewriteTokenIndex( |
| const ProgramName: String): Integer; |
| begin |
| if (not FLastRewriteTokenIndexes.TryGetValue(ProgramName, Result)) then |
| Result := -1; |
| end; |
| |
| function TTokenRewriteStream.GetProgram( |
| const Name: String): IList<IRewriteOperation>; |
| var |
| InstructionStream: IList<IRewriteOperation>; |
| begin |
| InstructionStream := FPrograms[Name]; |
| if (InstructionStream = nil) then |
| InstructionStream := InitializeProgram(Name); |
| Result := InstructionStream; |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; |
| const T: IToken; const Text: IANTLRInterface); |
| begin |
| InsertAfter(ProgramName, T.TokenIndex, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Init; |
| var |
| List: IList<IRewriteOperation>; |
| begin |
| FPrograms := TDictionary<String, IList<IRewriteOperation>>.Create; |
| List := TList<IRewriteOperation>.Create; |
| List.Capacity := PROGRAM_INIT_SIZE; |
| FPrograms.Add(DEFAULT_PROGRAM_NAME, List); |
| FLastRewriteTokenIndexes := TDictionary<String, Integer>.Create; |
| end; |
| |
| function TTokenRewriteStream.InitializeProgram( |
| const Name: String): IList<IRewriteOperation>; |
| begin |
| Result := TList<IRewriteOperation>.Create; |
| Result.Capacity := PROGRAM_INIT_SIZE; |
| FPrograms[Name] := Result; |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; |
| const Index: Integer; const Text: IANTLRInterface); |
| begin |
| // to insert after, just insert before next index (even if past end) |
| InsertBefore(ProgramName, Index + 1, Text); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const T: IToken; |
| const Text: IANTLRInterface); |
| begin |
| InsertAfter(DEFAULT_PROGRAM_NAME, T, Text); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const Index: Integer; |
| const Text: IANTLRInterface); |
| begin |
| InsertAfter(DEFAULT_PROGRAM_NAME, Index, Text); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const Index: Integer; |
| const Text: IANTLRInterface); |
| begin |
| InsertBefore(DEFAULT_PROGRAM_NAME, Index, Text); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; |
| const T: IToken; const Text: IANTLRInterface); |
| begin |
| InsertBefore(ProgramName, T.TokenIndex, Text); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; |
| const Index: Integer; const Text: IANTLRInterface); |
| var |
| Op: IRewriteOperation; |
| begin |
| Op := TInsertBeforeOp.Create(Index, Text, Self); |
| GetProgram(ProgramName).Add(Op); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const T: IToken; |
| const Text: IANTLRInterface); |
| begin |
| InsertBefore(DEFAULT_PROGRAM_NAME, T, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Start, Stop: IToken; |
| const Text: IANTLRInterface); |
| begin |
| Replace(DEFAULT_PROGRAM_NAME, Stop, Stop, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const IndexT: IToken; |
| const Text: IANTLRInterface); |
| begin |
| Replace(DEFAULT_PROGRAM_NAME, IndexT, IndexT, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, |
| Stop: Integer; const Text: IANTLRInterface); |
| var |
| Op: IRewriteOperation; |
| Rewrites: IList<IRewriteOperation>; |
| begin |
| if (Start > Stop) or (Start < 0) or (Stop < 0) or (Stop >= GetTokens.Count) then |
| raise EArgumentOutOfRangeException.Create('replace: range invalid: ' |
| + IntToStr(Start) + '..' + IntToStr(Stop) + '(size=' |
| + IntToStr(GetTokens.Count) + ')'); |
| |
| Op := TReplaceOp.Create(Start, Stop, Text, Self); |
| Rewrites := GetProgram(ProgramName); |
| Op.InstructionIndex := Rewrites.Count; |
| Rewrites.Add(Op); |
| end; |
| |
| function TTokenRewriteStream.ReduceToSingleOperationPerIndex( |
| const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>; |
| var |
| I, J: Integer; |
| Op: IRewriteOperation; |
| ROp, PrevROp: IReplaceOp; |
| IOp, PrevIOp: IInsertBeforeOp; |
| Inserts, PrevInserts, PrevReplaces: IList<IRewriteOperation>; |
| Disjoint, Same: Boolean; |
| begin |
| // WALK REPLACES |
| for I := 0 to Rewrites.Count - 1 do |
| begin |
| Op := Rewrites[I]; |
| if (Op = nil) then |
| Continue; |
| if (not Supports(Op, IReplaceOp, ROp)) then |
| Continue; |
| |
| // Wipe prior inserts within range |
| Inserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I); |
| for J := 0 to Inserts.Count - 1 do |
| begin |
| IOp := Inserts[J] as IInsertBeforeOp; |
| if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then |
| begin |
| // delete insert as it's a no-op. |
| Rewrites[IOp.InstructionIndex] := nil; |
| end; |
| end; |
| |
| // Drop any prior replaces contained within |
| PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I); |
| for J := 0 to PrevReplaces.Count - 1 do |
| begin |
| PrevROp := PrevReplaces[J] as IReplaceOp; |
| if (PrevROp.Index >= ROp.Index) and (PrevROp.LastIndex <= ROp.LastIndex) then |
| begin |
| // delete replace as it's a no-op. |
| Rewrites[PrevROp.InstructionIndex] := nil; |
| Continue; |
| end; |
| // throw exception unless disjoint or identical |
| Disjoint := (PrevROp.LastIndex < ROp.Index) or (PrevROp.Index > ROp.LastIndex); |
| Same := (PrevROp.Index = ROp.Index) and (PrevROp.LastIndex = ROp.LastIndex); |
| if (not Disjoint) and (not Same) then |
| raise EArgumentOutOfRangeException.Create('replace of boundaries of ' |
| + ROp.ToString + ' overlap with previous ' + PrevROp.ToString); |
| end; |
| end; |
| |
| // WALK INSERTS |
| for I := 0 to Rewrites.Count - 1 do |
| begin |
| Op := Rewrites[I]; |
| if (Op = nil) then |
| Continue; |
| if (not Supports(Op, IInsertBeforeOp, IOp)) then |
| Continue; |
| |
| // combine current insert with prior if any at same index |
| PrevInserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I); |
| for J := 0 to PrevInserts.Count - 1 do |
| begin |
| PrevIOp := PrevInserts[J] as IInsertBeforeOp; |
| if (PrevIOp.Index = IOp.Index) then |
| begin |
| // combine objects |
| // convert to strings...we're in process of toString'ing |
| // whole token buffer so no lazy eval issue with any templates |
| IOp.Text := CatOpText(IOp.Text, PrevIOp.Text); |
| // delete redundant prior insert |
| Rewrites[PrevIOp.InstructionIndex] := nil; |
| end; |
| end; |
| |
| // look for replaces where iop.index is in range; error |
| PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I); |
| for J := 0 to PrevReplaces.Count - 1 do |
| begin |
| Rop := PrevReplaces[J] as IReplaceOp; |
| if (IOp.Index = ROp.Index) then |
| begin |
| ROp.Text := CatOpText(IOp.Text, ROp.Text); |
| Rewrites[I] := nil; // delete current insert |
| Continue; |
| end; |
| if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then |
| raise EArgumentOutOfRangeException.Create('insert op ' |
| + IOp.ToString + ' within boundaries of previous ' + ROp.ToString); |
| end; |
| end; |
| |
| Result := TDictionary<Integer, IRewriteOperation>.Create; |
| for Op in Rewrites do |
| begin |
| if (Op = nil) then |
| Continue; // ignore deleted ops |
| if (Result.ContainsKey(Op.Index)) then |
| raise Exception.Create('should only be one op per index'); |
| Result.Add(Op.Index, Op); |
| end; |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, |
| Stop: IToken; const Text: IANTLRInterface); |
| begin |
| Replace(ProgramName, Start.TokenIndex, Stop.TokenIndex, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Index: Integer; |
| const Text: IANTLRInterface); |
| begin |
| Replace(DEFAULT_PROGRAM_NAME, Index, Index, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Start, Stop: Integer; |
| const Text: IANTLRInterface); |
| begin |
| Replace(DEFAULT_PROGRAM_NAME, Start, Stop, Text); |
| end; |
| |
| procedure TTokenRewriteStream.Rollback(const InstructionIndex: Integer); |
| begin |
| Rollback(DEFAULT_PROGRAM_NAME, InstructionIndex); |
| end; |
| |
| procedure TTokenRewriteStream.Rollback(const ProgramName: String; |
| const InstructionIndex: Integer); |
| var |
| InstructionStream: IList<IRewriteOperation>; |
| begin |
| InstructionStream := FPrograms[ProgramName]; |
| if Assigned(InstructionStream) then |
| FPrograms[ProgramName] := InstructionStream.GetRange(MIN_TOKEN_INDEX, |
| InstructionIndex - MIN_TOKEN_INDEX); |
| end; |
| |
| procedure TTokenRewriteStream.SetLastRewriteTokenIndex( |
| const ProgramName: String; const I: Integer); |
| begin |
| FLastRewriteTokenIndexes[ProgramName] := I; |
| end; |
| |
| function TTokenRewriteStream.ToDebugString: String; |
| begin |
| Result := ToDebugString(MIN_TOKEN_INDEX, Size - 1); |
| end; |
| |
| function TTokenRewriteStream.ToDebugString(const Start, Stop: Integer): String; |
| var |
| Buf: TStringBuilder; |
| I: Integer; |
| begin |
| Buf := TStringBuilder.Create; |
| try |
| if (Start >= MIN_TOKEN_INDEX) then |
| for I := Start to Min(Stop,GetTokens.Count - 1) do |
| Buf.Append(Get(I).ToString); |
| finally |
| Buf.Free; |
| end; |
| end; |
| |
| function TTokenRewriteStream.ToOriginalString: String; |
| begin |
| Result := ToOriginalString(MIN_TOKEN_INDEX, Size - 1); |
| end; |
| |
| function TTokenRewriteStream.ToOriginalString(const Start, |
| Stop: Integer): String; |
| var |
| Buf: TStringBuilder; |
| I: Integer; |
| begin |
| Buf := TStringBuilder.Create; |
| try |
| if (Start >= MIN_TOKEN_INDEX) then |
| for I := Start to Min(Stop, GetTokens.Count - 1) do |
| Buf.Append(Get(I).Text); |
| Result := Buf.ToString; |
| finally |
| Buf.Free; |
| end; |
| end; |
| |
| function TTokenRewriteStream.ToString: String; |
| begin |
| Result := ToString(MIN_TOKEN_INDEX, Size - 1); |
| end; |
| |
| function TTokenRewriteStream.ToString(const ProgramName: String): String; |
| begin |
| Result := ToString(ProgramName, MIN_TOKEN_INDEX, Size - 1); |
| end; |
| |
| function TTokenRewriteStream.ToString(const ProgramName: String; const Start, |
| Stop: Integer): String; |
| var |
| Rewrites: IList<IRewriteOperation>; |
| I, StartIndex, StopIndex: Integer; |
| IndexToOp: IDictionary<Integer, IRewriteOperation>; |
| Buf: TStringBuilder; |
| Tokens: IList<IToken>; |
| T: IToken; |
| Op: IRewriteOperation; |
| Pair: TPair<Integer, IRewriteOperation>; |
| begin |
| Rewrites := FPrograms[ProgramName]; |
| Tokens := GetTokens; |
| // ensure start/end are in range |
| StopIndex := Min(Stop,Tokens.Count - 1); |
| StartIndex := Max(Start,0); |
| |
| if (Rewrites = nil) or (Rewrites.Count = 0) then |
| begin |
| // no instructions to execute |
| Result := ToOriginalString(StartIndex, StopIndex); |
| Exit; |
| end; |
| |
| Buf := TStringBuilder.Create; |
| try |
| // First, optimize instruction stream |
| IndexToOp := ReduceToSingleOperationPerIndex(Rewrites); |
| |
| // Walk buffer, executing instructions and emitting tokens |
| I := StartIndex; |
| while (I <= StopIndex) and (I < Tokens.Count) do |
| begin |
| if (not IndexToOp.TryGetValue(I, Op)) then |
| Op := nil; |
| IndexToOp.Remove(I); // remove so any left have index size-1 |
| T := Tokens[I]; |
| if (Op = nil) then |
| begin |
| // no operation at that index, just dump token |
| Buf.Append(T.Text); |
| Inc(I); // move to next token |
| end |
| else |
| I := Op.Execute(Buf); // execute operation and skip |
| end; |
| |
| // include stuff after end if it's last index in buffer |
| // So, if they did an insertAfter(lastValidIndex, "foo"), include |
| // foo if end==lastValidIndex. |
| if (StopIndex = Tokens.Count - 1) then |
| begin |
| // Scan any remaining operations after last token |
| // should be included (they will be inserts). |
| for Pair in IndexToOp do |
| begin |
| if (Pair.Value.Index >= Tokens.Count - 1) then |
| Buf.Append(Pair.Value.Text.ToString); |
| end; |
| end; |
| Result := Buf.ToString; |
| finally |
| Buf.Free; |
| end; |
| end; |
| |
| function TTokenRewriteStream.ToString(const Start, Stop: Integer): String; |
| begin |
| Result := ToString(DEFAULT_PROGRAM_NAME, Start, Stop); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const Index: Integer; |
| const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertBefore(Index, S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const T: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertBefore(T, S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; |
| const Index: Integer; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertBefore(ProgramName, Index, S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; |
| const T: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertBefore(ProgramName, T, S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const Index: Integer; |
| const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertAfter(Index,S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const T: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertAfter(T,S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; |
| const Index: Integer; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertAfter(ProgramName,Index,S); |
| end; |
| |
| procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; |
| const T: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| InsertAfter(ProgramName,T,S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const IndexT: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(IndexT, S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Start, Stop: Integer; |
| const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(Start, Stop, S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Index: Integer; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(Index, S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, |
| Stop: IToken; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(ProgramName, Start, Stop, S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, |
| Stop: Integer; const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(ProgramName, Start, Stop, S); |
| end; |
| |
| procedure TTokenRewriteStream.Replace(const Start, Stop: IToken; |
| const Text: String); |
| var |
| S: IANTLRString; |
| begin |
| S := TANTLRString.Create(Text); |
| Replace(Start, Stop, S); |
| end; |
| |
| { TTokenRewriteStream.TRewriteOperation } |
| |
| constructor TTokenRewriteStream.TRewriteOperation.Create(const AIndex: Integer; |
| const AText: IANTLRInterface; const AParent: ITokenRewriteStream); |
| begin |
| inherited Create; |
| FIndex := AIndex; |
| FText := AText; |
| FParent := Pointer(AParent); |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.Execute( |
| const Buf: TStringBuilder): Integer; |
| begin |
| Result := FIndex; |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.GetIndex: Integer; |
| begin |
| Result := FIndex; |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.GetInstructionIndex: Integer; |
| begin |
| Result := FInstructionIndex; |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.GetParent: ITokenRewriteStream; |
| begin |
| Result := ITokenRewriteStream(FParent); |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.GetText: IANTLRInterface; |
| begin |
| Result := FText; |
| end; |
| |
| procedure TTokenRewriteStream.TRewriteOperation.SetIndex(const Value: Integer); |
| begin |
| FIndex := Value; |
| end; |
| |
| procedure TTokenRewriteStream.TRewriteOperation.SetInstructionIndex( |
| const Value: Integer); |
| begin |
| FInstructionIndex := Value; |
| end; |
| |
| procedure TTokenRewriteStream.TRewriteOperation.SetParent( |
| const Value: ITokenRewriteStream); |
| begin |
| FParent := Pointer(Value); |
| end; |
| |
| procedure TTokenRewriteStream.TRewriteOperation.SetText( |
| const Value: IANTLRInterface); |
| begin |
| FText := Value; |
| end; |
| |
| function TTokenRewriteStream.TRewriteOperation.ToString: String; |
| var |
| OpName: String; |
| DollarIndex: Integer; |
| begin |
| OpName := ClassName; |
| DollarIndex := Pos('$',OpName) - 1; // Delphi strings are 1-based |
| if (DollarIndex >= 0) then |
| OpName := Copy(OpName,DollarIndex + 1,Length(OpName) - (DollarIndex + 1)); |
| Result := '<' + OpName + '@' + IntToStr(FIndex) + ':"' + FText.ToString + '">'; |
| end; |
| |
| { TTokenRewriteStream.TRewriteOpComparer<T> } |
| |
| function TTokenRewriteStream.TRewriteOpComparer<T>.Compare(const Left, |
| Right: T): Integer; |
| begin |
| if (Left.GetIndex < Right.GetIndex) then |
| Result := -1 |
| else |
| if (Left.GetIndex > Right.GetIndex) then |
| Result := 1 |
| else |
| Result := 0; |
| end; |
| |
| { TTokenRewriteStream.TInsertBeforeOp } |
| |
| function TTokenRewriteStream.TInsertBeforeOp.Execute( |
| const Buf: TStringBuilder): Integer; |
| begin |
| Buf.Append(Text.ToString); |
| Buf.Append(Parent.Get(Index).Text); |
| Result := Index + 1; |
| end; |
| |
| { TTokenRewriteStream.TReplaceOp } |
| |
| constructor TTokenRewriteStream.TReplaceOp.Create(const AStart, AStop: Integer; |
| const AText: IANTLRInterface; const AParent: ITokenRewriteStream); |
| begin |
| inherited Create(AStart, AText, AParent); |
| FLastIndex := AStop; |
| end; |
| |
| function TTokenRewriteStream.TReplaceOp.Execute( |
| const Buf: TStringBuilder): Integer; |
| begin |
| if (Text <> nil) then |
| Buf.Append(Text.ToString); |
| Result := FLastIndex + 1; |
| end; |
| |
| function TTokenRewriteStream.TReplaceOp.GetLastIndex: Integer; |
| begin |
| Result := FLastIndex; |
| end; |
| |
| procedure TTokenRewriteStream.TReplaceOp.SetLastIndex(const Value: Integer); |
| begin |
| FLastIndex := Value; |
| end; |
| |
| function TTokenRewriteStream.TReplaceOp.ToString: String; |
| begin |
| Result := '<ReplaceOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex) |
| + ':"' + Text.ToString + '">'; |
| end; |
| |
| { TTokenRewriteStream.TDeleteOp } |
| |
| function TTokenRewriteStream.TDeleteOp.ToString: String; |
| begin |
| Result := '<DeleteOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex) + '>'; |
| end; |
| |
| { Utilities } |
| |
| var |
| EmptyToken: IToken = nil; |
| EmptyRuleReturnScope: IRuleReturnScope = nil; |
| |
| function Def(const X: IToken): IToken; overload; |
| begin |
| if Assigned(X) then |
| Result := X |
| else |
| begin |
| if (EmptyToken = nil) then |
| EmptyToken := TCommonToken.Create; |
| Result := EmptyToken; |
| end; |
| end; |
| |
| function Def(const X: IRuleReturnScope): IRuleReturnScope; |
| begin |
| if Assigned(X) then |
| Result := X |
| else |
| begin |
| if (EmptyRuleReturnScope = nil) then |
| EmptyRuleReturnScope := TRuleReturnScope.Create; |
| Result := EmptyRuleReturnScope; |
| end; |
| end; |
| |
| initialization |
| TToken.Initialize; |
| |
| end. |