| #!/usr/bin/ruby |
| # encoding: utf-8 |
| |
| require 'antlr3/test/functional' |
| |
| class TestBacktracking < ANTLR3::Test::Functional |
| |
| inline_grammar( <<-'END' ) |
| grammar Backtrack; |
| options { |
| language = Ruby; |
| backtrack=true; |
| memoize=true; |
| k=2; |
| } |
| |
| scope Symbols { |
| types; |
| } |
| |
| @members { |
| def is_type_name?(name) |
| @Symbols_stack.reverse_each do |scope| |
| scope.types.include?(name) and return true |
| end |
| return false |
| end |
| |
| def report_error(e) |
| # do nothing |
| end |
| |
| } |
| |
| translation_unit |
| scope Symbols; // entire file is a scope |
| @init { |
| $Symbols::types = Set.new |
| } |
| : external_declaration+ |
| ; |
| |
| /** Either a function definition or any other kind of C decl/def. |
| * The LL(*) analysis algorithm fails to deal with this due to |
| * recursion in the declarator rules. I'm putting in a |
| * manual predicate here so that we don't backtrack over |
| * the entire function. Further, you get a better error |
| * as errors within the function itself don't make it fail |
| * to predict that it's a function. Weird errors previously. |
| * Remember: the goal is to avoid backtrack like the plague |
| * because it makes debugging, actions, and errors harder. |
| * |
| * Note that k=1 results in a much smaller predictor for the |
| * fixed look; k=2 made a few extra thousand lines. ;) |
| * I'll have to optimize that in the future. |
| */ |
| external_declaration |
| options {k=1;} |
| : ( declaration_specifiers? declarator declaration* '{' )=> function_definition |
| | declaration |
| ; |
| |
| function_definition |
| scope Symbols; // put parameters and locals into same scope for now |
| @init { |
| $Symbols::types = set() |
| } |
| : declaration_specifiers? declarator |
| ; |
| |
| declaration |
| scope { |
| is_type_def; |
| } |
| @init { |
| $declaration::is_type_def = false |
| } |
| : 'typedef' declaration_specifiers? {$declaration::is_type_def = true} |
| init_declarator_list ';' // special case, looking for typedef |
| | declaration_specifiers init_declarator_list? ';' |
| ; |
| |
| declaration_specifiers |
| : ( storage_class_specifier |
| | type_specifier |
| | type_qualifier |
| )+ |
| ; |
| |
| init_declarator_list |
| : init_declarator (',' init_declarator)* |
| ; |
| |
| init_declarator |
| : declarator //('=' initializer)? |
| ; |
| |
| storage_class_specifier |
| : 'extern' |
| | 'static' |
| | 'auto' |
| | 'register' |
| ; |
| |
| type_specifier |
| : 'void' |
| | 'char' |
| | 'short' |
| | 'int' |
| | 'long' |
| | 'float' |
| | 'double' |
| | 'signed' |
| | 'unsigned' |
| | type_id |
| ; |
| |
| type_id |
| : { is_type_name?(@input.look(1).text)}? IDENTIFIER |
| ; |
| |
| type_qualifier |
| : 'const' |
| | 'volatile' |
| ; |
| |
| declarator |
| : pointer? direct_declarator |
| | pointer |
| ; |
| |
| direct_declarator |
| : ( IDENTIFIER |
| { |
| if $declaration.length > 0 && $declaration::is_type_def |
| $Symbols::types.add($IDENTIFIER.text) |
| end |
| } |
| | '(' declarator ')' |
| ) |
| declarator_suffix* |
| ; |
| |
| declarator_suffix |
| : /*'[' constant_expression ']' |
| |*/ '[' ']' |
| | '(' ')' |
| ; |
| |
| pointer |
| : '*' type_qualifier+ pointer? |
| | '*' pointer |
| | '*' |
| ; |
| |
| IDENTIFIER |
| : LETTER (LETTER|'0'..'9')* |
| ; |
| |
| fragment |
| LETTER |
| : '$' |
| | 'A'..'Z' |
| | 'a'..'z' |
| | '_' |
| ; |
| |
| CHARACTER_LITERAL |
| : '\'' ( EscapeSequence | ~('\''|'\\') ) '\'' |
| ; |
| |
| STRING_LITERAL |
| : '"' ( EscapeSequence | ~('\\'|'"') )* '"' |
| ; |
| |
| HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ; |
| |
| DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ; |
| |
| OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ; |
| |
| fragment |
| HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ; |
| |
| fragment |
| IntegerTypeSuffix |
| : ('u'|'U')? ('l'|'L') |
| | ('u'|'U') ('l'|'L')? |
| ; |
| |
| FLOATING_POINT_LITERAL |
| : ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix? |
| | '.' ('0'..'9')+ Exponent? FloatTypeSuffix? |
| | ('0'..'9')+ Exponent FloatTypeSuffix? |
| | ('0'..'9')+ Exponent? FloatTypeSuffix |
| ; |
| |
| fragment |
| Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ; |
| |
| fragment |
| FloatTypeSuffix : ('f'|'F'|'d'|'D') ; |
| |
| fragment |
| EscapeSequence |
| : '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\') |
| | OctalEscape |
| ; |
| |
| fragment |
| OctalEscape |
| : '\\' ('0'..'3') ('0'..'7') ('0'..'7') |
| | '\\' ('0'..'7') ('0'..'7') |
| | '\\' ('0'..'7') |
| ; |
| |
| fragment |
| UnicodeEscape |
| : '\\' 'u' HexDigit HexDigit HexDigit HexDigit |
| ; |
| |
| WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;} |
| ; |
| |
| COMMENT |
| : '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;} |
| ; |
| |
| LINE_COMMENT |
| : '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} |
| ; |
| LINE_COMMAND |
| : '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;} |
| ; |
| END |
| |
| example "grammar with backtracking and memoization" do |
| lexer = Backtrack::Lexer.new( 'int a;' ) |
| parser = Backtrack::Parser.new lexer |
| events = parser.translation_unit |
| end |
| |
| end |