| // [The "BSD licence"] |
| // Copyright (c) 2006-2007 Kay Roepke 2010 Alan Condit |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // 3. The name of the author may not be used to endorse or promote products |
| // derived from this software without specific prior written permission. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| #import <ANTLR/antlr.h> |
| #import "ANTLRLexer.h" |
| |
| @implementation ANTLRLexer |
| |
| @synthesize input; |
| @synthesize ruleNestingLevel; |
| #pragma mark Initializer |
| |
| - (id) initWithCharStream:(id<ANTLRCharStream>)anInput |
| { |
| self = [super initWithState:[[ANTLRRecognizerSharedState alloc] init]]; |
| if ( self != nil ) { |
| input = [anInput retain]; |
| if (state.token != nil) |
| [((ANTLRCommonToken *)state.token) setInput:anInput]; |
| ruleNestingLevel = 0; |
| } |
| return self; |
| } |
| |
| - (id) initWithCharStream:(id<ANTLRCharStream>)anInput State:(ANTLRRecognizerSharedState *)aState |
| { |
| self = [super initWithState:aState]; |
| if ( self != nil ) { |
| input = [anInput retain]; |
| if (state.token != nil) |
| [((ANTLRCommonToken *)state.token) setInput:anInput]; |
| ruleNestingLevel = 0; |
| } |
| return self; |
| } |
| |
| - (void) dealloc |
| { |
| if ( input ) [input release]; |
| [super dealloc]; |
| } |
| |
| - (id) copyWithZone:(NSZone *)aZone |
| { |
| ANTLRLexer *copy; |
| |
| copy = [[[self class] allocWithZone:aZone] init]; |
| // copy = [super copyWithZone:aZone]; // allocation occurs here |
| if ( input != nil ) |
| copy.input = input; |
| copy.ruleNestingLevel = ruleNestingLevel; |
| return copy; |
| } |
| |
| - (void) reset |
| { |
| [super reset]; // reset all recognizer state variables |
| // wack Lexer state variables |
| if ( input != nil ) { |
| [input seek:0]; // rewind the input |
| } |
| if ( state == nil ) { |
| return; // no shared state work to do |
| } |
| state.token = nil; |
| state.type = ANTLRCommonToken.INVALID_TOKEN_TYPE; |
| state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; |
| state.tokenStartCharIndex = -1; |
| state.tokenStartCharPositionInLine = -1; |
| state.tokenStartLine = -1; |
| state.text = nil; |
| } |
| |
| // token stuff |
| #pragma mark Tokens |
| |
| - (id<ANTLRToken>)getToken |
| { |
| return [state getToken]; |
| } |
| |
| - (void) setToken: (id<ANTLRToken>) aToken |
| { |
| if (state.token != aToken) { |
| [aToken retain]; |
| state.token = aToken; |
| } |
| } |
| |
| |
| // this method may be overridden in the generated lexer if we generate a filtering lexer. |
| - (id<ANTLRToken>) nextToken |
| { |
| while (YES) { |
| [self setToken:nil]; |
| state.channel = ANTLRCommonToken.DEFAULT_CHANNEL; |
| state.tokenStartCharIndex = input.index; |
| state.tokenStartCharPositionInLine = input.charPositionInLine; |
| state.tokenStartLine = input.line; |
| state.text = nil; |
| |
| // [self setText:[self text]]; |
| if ([input LA:1] == ANTLRCharStreamEOF) { |
| ANTLRCommonToken *eof = [ANTLRCommonToken newToken:input |
| Type:ANTLRTokenTypeEOF |
| Channel:ANTLRCommonToken.DEFAULT_CHANNEL |
| Start:input.index |
| Stop:input.index]; |
| [eof setLine:input.line]; |
| [eof setCharPositionInLine:input.charPositionInLine]; |
| return eof; |
| } |
| @try { |
| [self mTokens]; |
| // SEL aMethod = @selector(mTokens); |
| // [[self class] instancesRespondToSelector:aMethod]; |
| if ( state.token == nil) |
| [self emit]; |
| else if ( state.token == [ANTLRCommonToken skipToken] ) { |
| continue; |
| } |
| return state.token; |
| } |
| @catch (ANTLRNoViableAltException *nva) { |
| [self reportError:nva]; |
| [self recover:nva]; |
| } |
| @catch (ANTLRRecognitionException *e) { |
| [self reportError:e]; |
| } |
| } |
| } |
| |
| - (void) mTokens |
| { // abstract, defined in generated source as a starting point for matching |
| [self doesNotRecognizeSelector:_cmd]; |
| } |
| |
| - (void) skip |
| { |
| state.token = [ANTLRCommonToken skipToken]; |
| } |
| |
| - (id<ANTLRCharStream>) input |
| { |
| return input; |
| } |
| |
| - (void) setInput:(id<ANTLRCharStream>) anInput |
| { |
| if ( anInput != input ) { |
| if ( input ) [input release]; |
| } |
| input = nil; |
| [self reset]; |
| input = anInput; |
| [input retain]; |
| } |
| |
| /** Currently does not support multiple emits per nextToken invocation |
| * for efficiency reasons. Subclass and override this method and |
| * nextToken (to push tokens into a list and pull from that list rather |
| * than a single variable as this implementation does). |
| */ |
| - (void) emit:(id<ANTLRToken>)aToken |
| { |
| state.token = aToken; |
| } |
| |
| /** The standard method called to automatically emit a token at the |
| * outermost lexical rule. The token object should point into the |
| * char buffer start..stop. If there is a text override in 'text', |
| * use that to set the token's text. Override this method to emit |
| * custom Token objects. |
| * |
| * If you are building trees, then you should also override |
| * Parser or TreeParser.getMissingSymbol(). |
| */ |
| - (void) emit |
| { |
| id<ANTLRToken> aToken = [ANTLRCommonToken newToken:input |
| Type:state.type |
| Channel:state.channel |
| Start:state.tokenStartCharIndex |
| Stop:input.index-1]; |
| [aToken setLine:state.tokenStartLine]; |
| aToken.text = [self text]; |
| [aToken setCharPositionInLine:state.tokenStartCharPositionInLine]; |
| [aToken retain]; |
| [self emit:aToken]; |
| // [aToken release]; |
| } |
| |
| // matching |
| #pragma mark Matching |
| - (void) matchString:(NSString *)aString |
| { |
| unichar c; |
| unsigned int i = 0; |
| unsigned int stringLength = [aString length]; |
| while ( i < stringLength ) { |
| c = [input LA:1]; |
| if ( c != [aString characterAtIndex:i] ) { |
| if ([state getBacktracking] > 0) { |
| state.failed = YES; |
| return; |
| } |
| ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:[aString characterAtIndex:i] Stream:input]; |
| mte.c = c; |
| [self recover:mte]; |
| @throw mte; |
| } |
| i++; |
| [input consume]; |
| state.failed = NO; |
| } |
| } |
| |
| - (void) matchAny |
| { |
| [input consume]; |
| } |
| |
| - (void) matchChar:(unichar) aChar |
| { |
| // TODO: -LA: is returning an int because it sometimes is used in the generated parser to compare lookahead with a tokentype. |
| // try to change all those occurrences to -LT: if possible (i.e. if ANTLR can be made to generate LA only for lexer code) |
| unichar charLA; |
| charLA = [input LA:1]; |
| if ( charLA != aChar) { |
| if ([state getBacktracking] > 0) { |
| state.failed = YES; |
| return; |
| } |
| ANTLRMismatchedTokenException *mte = [ANTLRMismatchedTokenException newExceptionChar:aChar Stream:input]; |
| mte.c = charLA; |
| [self recover:mte]; |
| @throw mte; |
| } |
| [input consume]; |
| state.failed = NO; |
| } |
| |
| - (void) matchRangeFromChar:(unichar)fromChar to:(unichar)toChar |
| { |
| unichar charLA = (unichar)[input LA:1]; |
| if ( charLA < fromChar || charLA > toChar ) { |
| if ([state getBacktracking] > 0) { |
| state.failed = YES; |
| return; |
| } |
| ANTLRMismatchedRangeException *mre = [ANTLRMismatchedRangeException |
| newException:NSMakeRange((NSUInteger)fromChar,(NSUInteger)toChar) |
| stream:input]; |
| mre.c = charLA; |
| [self recover:mre]; |
| @throw mre; |
| } |
| [input consume]; |
| state.failed = NO; |
| } |
| |
| // info |
| #pragma mark Informational |
| |
| - (NSUInteger) line |
| { |
| return input.line; |
| } |
| |
| - (NSUInteger) charPositionInLine |
| { |
| return input.charPositionInLine; |
| } |
| |
| - (NSInteger) index |
| { |
| return 0; |
| } |
| |
| - (NSString *) text |
| { |
| if (state.text != nil) { |
| return state.text; |
| } |
| return [input substringWithRange:NSMakeRange(state.tokenStartCharIndex, input.index-state.tokenStartCharIndex)]; |
| } |
| |
| - (void) setText:(NSString *) theText |
| { |
| state.text = theText; |
| } |
| |
| // error handling |
| - (void) reportError:(ANTLRRecognitionException *)e |
| { |
| /** TODO: not thought about recovery in lexer yet. |
| * |
| // if we've already reported an error and have not matched a token |
| // yet successfully, don't report any errors. |
| if ( errorRecovery ) { |
| //System.err.print("[SPURIOUS] "); |
| return; |
| } |
| errorRecovery = true; |
| */ |
| |
| [self displayRecognitionError:[self getTokenNames] Exception:e]; |
| } |
| |
| - (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)tokenNames |
| { |
| /* NSString *msg = [NSString stringWithFormat:@"Gotta fix getErrorMessage in ANTLRLexer.m--%@\n", |
| e.name]; |
| */ |
| NSString *msg = nil; |
| if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) { |
| ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e; |
| msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting \"%@\"", |
| [self getCharErrorDisplay:mte.c], [self getCharErrorDisplay:mte.expecting]]; |
| } |
| else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) { |
| ANTLRNoViableAltException *nvae = (ANTLRNoViableAltException *)e; |
| // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" |
| // and "(decision="+nvae.decisionNumber+") and |
| // "state "+nvae.stateNumber |
| msg = [NSString stringWithFormat:@"no viable alternative at character \"%@\"", |
| [self getCharErrorDisplay:(nvae.c)]]; |
| } |
| else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) { |
| ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e; |
| // for development, can add "(decision="+eee.decisionNumber+")" |
| msg = [NSString stringWithFormat:@"required (...)+ loop did not match anything at character \"%@\"", |
| [self getCharErrorDisplay:(eee.c)]]; |
| } |
| else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class]] ) { |
| ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e; |
| msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", |
| [self getCharErrorDisplay:(mse.c)], mse.expecting]; |
| } |
| else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) { |
| ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e; |
| msg = [NSString stringWithFormat:@"mismatched character \"%@\" expecting set \"%@\"", |
| [self getCharErrorDisplay:(mse.c)], mse.expecting]; |
| } |
| else if ( [e isKindOfClass:[ANTLRMismatchedRangeException class]] ) { |
| ANTLRMismatchedRangeException *mre = (ANTLRMismatchedRangeException *)e; |
| msg = [NSString stringWithFormat:@"mismatched character \"%@\" \"%@..%@\"", |
| [self getCharErrorDisplay:(mre.c)], [self getCharErrorDisplay:(mre.range.location)], |
| [self getCharErrorDisplay:(mre.range.location+mre.range.length-1)]]; |
| } |
| else { |
| msg = [super getErrorMessage:e TokenNames:[self getTokenNames]]; |
| } |
| return msg; |
| } |
| |
| - (NSString *)getCharErrorDisplay:(NSInteger)c |
| { |
| NSString *s; |
| switch ( c ) { |
| case ANTLRTokenTypeEOF : |
| s = @"<EOF>"; |
| break; |
| case '\n' : |
| s = @"\\n"; |
| break; |
| case '\t' : |
| s = @"\\t"; |
| break; |
| case '\r' : |
| s = @"\\r"; |
| break; |
| default: |
| s = [NSString stringWithFormat:@"%c", (char)c]; |
| break; |
| } |
| return s; |
| } |
| |
| /** Lexers can normally match any char in it's vocabulary after matching |
| * a token, so do the easy thing and just kill a character and hope |
| * it all works out. You can instead use the rule invocation stack |
| * to do sophisticated error recovery if you are in a fragment rule. |
| */ |
| - (void)recover:(ANTLRRecognitionException *)re |
| { |
| //System.out.println("consuming char "+(char)input.LA(1)+" during recovery"); |
| //re.printStackTrace(); |
| [input consume]; |
| } |
| |
| - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex |
| { |
| NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; |
| [super traceIn:ruleName Index:ruleIndex Object:inputSymbol]; |
| } |
| |
| - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex |
| { |
| NSString *inputSymbol = [NSString stringWithFormat:@"%c line=%d:%d\n", [input LT:1], input.line, input.charPositionInLine]; |
| [super traceOut:ruleName Index:ruleIndex Object:inputSymbol]; |
| } |
| |
| @end |