blob: 3860012bcc2dd8aae66b7548576649c8d9f6e478 [file] [log] [blame]
#!/usr/bin/ruby
# encoding: utf-8
require 'antlr3/test/functional'
class TestBacktracking < ANTLR3::Test::Functional
inline_grammar( <<-'END' )
grammar Backtrack;
options {
language = Ruby;
backtrack=true;
memoize=true;
k=2;
}
scope Symbols {
types;
}
@members {
def is_type_name?(name)
@Symbols_stack.reverse_each do |scope|
scope.types.include?(name) and return true
end
return false
end
def report_error(e)
# do nothing
end
}
translation_unit
scope Symbols; // entire file is a scope
@init {
$Symbols::types = Set.new
}
: external_declaration+
;
/** Either a function definition or any other kind of C decl/def.
* The LL(*) analysis algorithm fails to deal with this due to
* recursion in the declarator rules. I'm putting in a
* manual predicate here so that we don't backtrack over
* the entire function. Further, you get a better error
* as errors within the function itself don't make it fail
* to predict that it's a function. Weird errors previously.
* Remember: the goal is to avoid backtrack like the plague
* because it makes debugging, actions, and errors harder.
*
* Note that k=1 results in a much smaller predictor for the
* fixed look; k=2 made a few extra thousand lines. ;)
* I'll have to optimize that in the future.
*/
external_declaration
options {k=1;}
: ( declaration_specifiers? declarator declaration* '{' )=> function_definition
| declaration
;
function_definition
scope Symbols; // put parameters and locals into same scope for now
@init {
$Symbols::types = set()
}
: declaration_specifiers? declarator
;
declaration
scope {
is_type_def;
}
@init {
$declaration::is_type_def = false
}
: 'typedef' declaration_specifiers? {$declaration::is_type_def = true}
init_declarator_list ';' // special case, looking for typedef
| declaration_specifiers init_declarator_list? ';'
;
declaration_specifiers
: ( storage_class_specifier
| type_specifier
| type_qualifier
)+
;
init_declarator_list
: init_declarator (',' init_declarator)*
;
init_declarator
: declarator //('=' initializer)?
;
storage_class_specifier
: 'extern'
| 'static'
| 'auto'
| 'register'
;
type_specifier
: 'void'
| 'char'
| 'short'
| 'int'
| 'long'
| 'float'
| 'double'
| 'signed'
| 'unsigned'
| type_id
;
type_id
: { is_type_name?(@input.look(1).text)}? IDENTIFIER
;
type_qualifier
: 'const'
| 'volatile'
;
declarator
: pointer? direct_declarator
| pointer
;
direct_declarator
: ( IDENTIFIER
{
if $declaration.length > 0 && $declaration::is_type_def
$Symbols::types.add($IDENTIFIER.text)
end
}
| '(' declarator ')'
)
declarator_suffix*
;
declarator_suffix
: /*'[' constant_expression ']'
|*/ '[' ']'
| '(' ')'
;
pointer
: '*' type_qualifier+ pointer?
| '*' pointer
| '*'
;
IDENTIFIER
: LETTER (LETTER|'0'..'9')*
;
fragment
LETTER
: '$'
| 'A'..'Z'
| 'a'..'z'
| '_'
;
CHARACTER_LITERAL
: '\'' ( EscapeSequence | ~('\''|'\\') ) '\''
;
STRING_LITERAL
: '"' ( EscapeSequence | ~('\\'|'"') )* '"'
;
HEX_LITERAL : '0' ('x'|'X') HexDigit+ IntegerTypeSuffix? ;
DECIMAL_LITERAL : ('0' | '1'..'9' '0'..'9'*) IntegerTypeSuffix? ;
OCTAL_LITERAL : '0' ('0'..'7')+ IntegerTypeSuffix? ;
fragment
HexDigit : ('0'..'9'|'a'..'f'|'A'..'F') ;
fragment
IntegerTypeSuffix
: ('u'|'U')? ('l'|'L')
| ('u'|'U') ('l'|'L')?
;
FLOATING_POINT_LITERAL
: ('0'..'9')+ '.' ('0'..'9')* Exponent? FloatTypeSuffix?
| '.' ('0'..'9')+ Exponent? FloatTypeSuffix?
| ('0'..'9')+ Exponent FloatTypeSuffix?
| ('0'..'9')+ Exponent? FloatTypeSuffix
;
fragment
Exponent : ('e'|'E') ('+'|'-')? ('0'..'9')+ ;
fragment
FloatTypeSuffix : ('f'|'F'|'d'|'D') ;
fragment
EscapeSequence
: '\\' ('b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\')
| OctalEscape
;
fragment
OctalEscape
: '\\' ('0'..'3') ('0'..'7') ('0'..'7')
| '\\' ('0'..'7') ('0'..'7')
| '\\' ('0'..'7')
;
fragment
UnicodeEscape
: '\\' 'u' HexDigit HexDigit HexDigit HexDigit
;
WS : (' '|'\r'|'\t'|'\u000C'|'\n') {$channel=HIDDEN;}
;
COMMENT
: '/*' ( options {greedy=false;} : . )* '*/' {$channel=HIDDEN;}
;
LINE_COMMENT
: '//' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
;
LINE_COMMAND
: '#' ~('\n'|'\r')* '\r'? '\n' {$channel=HIDDEN;}
;
END
example "grammar with backtracking and memoization" do
lexer = Backtrack::Lexer.new( 'int a;' )
parser = Backtrack::Parser.new lexer
events = parser.translation_unit
end
end