| package ANTLR::Runtime::BaseRecognizer; |
| |
| use Readonly; |
| use Carp; |
| |
| use ANTLR::Runtime::RecognizerSharedState; |
| use ANTLR::Runtime::Token; |
| use ANTLR::Runtime::UnwantedTokenException; |
| use ANTLR::Runtime::MissingTokenException; |
| use ANTLR::Runtime::MismatchedTokenException; |
| |
| use Moose; |
| |
| Readonly my $MEMO_RULE_FAILED => -2; |
| sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED } |
| |
| Readonly my $MEMO_RULE_UNKNOWN => -1; |
| sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN } |
| |
| Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100; |
| sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE } |
| |
| # copies from Token object for convenience in actions |
| Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL; |
| sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL } |
| |
| Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL; |
| sub HIDDEN { $HIDDEN } |
| |
| Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token'; |
| sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME } |
| |
| # State of a lexer, parser, or tree parser are collected into a state |
| # object so the state can be shared. This sharing is needed to |
| # have one grammar import others and share same error variables |
| # and other state variables. It's a kind of explicit multiple |
| # inheritance via delegation of methods and shared state. |
| has 'state' => ( |
| is => 'rw', |
| isa => 'ANTLR::Runtime::RecognizerSharedState', |
| default => sub { ANTLR::Runtime::RecognizerSharedState->new() }, |
| ); |
| |
| sub reset { |
| my ($self) = @_; |
| |
| if (!defined $self->state) { |
| return; |
| } |
| |
| my $state = $self->state; |
| $state->_fsp(-1); |
| $state->error_recovery(0); |
| $state->last_error_index(-1); |
| $state->failed(0); |
| $state->syntax_errors(0); |
| |
| # wack everything related to backtracking and memoization |
| $state->backtracking(0); |
| # wipe cache |
| $state->rule_memo([]); |
| } |
| |
| sub match { |
| Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $ttype, $follow) = @_; |
| |
| my $matched_symbol = $self->get_current_input_symbol($input); |
| if ($input->LA(1) eq $ttype) { |
| $input->consume(); |
| $self->state->error_recovery(0); |
| $self->state->failed(0); |
| return $matched_symbol; |
| } |
| |
| if ($self->state->backtracking > 0) { |
| $self->state->failed(1); |
| return $matched_symbol; |
| } |
| |
| return $self->recover_from_mismatched_token($input, $ttype, $follow); |
| } |
| |
| sub match_any { |
| Readonly my $usage => 'void match_any(IntStream input)'; |
| croak $usage if @_ != 2; |
| my ($self, $input) = @_; |
| |
| $self->state->error_recovery(0); |
| $self->state->failed(0); |
| $input->consume(); |
| } |
| |
| sub mismatch_is_unwanted_token { |
| my ($self, $input, $ttype) = @_; |
| return $input->LA(2) == $ttype; |
| } |
| |
| sub mismatch_is_missing_token { |
| my ($self, $input, $follow) = @_; |
| |
| if (!defined $follow) { |
| return 0; |
| } |
| |
| if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { |
| my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); |
| $follow = $follow->or($viable_tokens_following_this_rule); |
| if ($self->state->_fsp >= 0) { |
| $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); |
| } |
| } |
| |
| if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { |
| return 1; |
| } |
| return 0; |
| } |
| |
| sub mismatch { |
| Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $ttype, $follow) = @_; |
| |
| if ($self->mismatch_is_unwanted_token($input, $ttype)) { |
| ANTLR::Runtime::UnwantedTokenException->new({ |
| expecting => $ttype, |
| input => $input |
| })->throw(); |
| } |
| elsif ($self->mismatch_is_missing_token($input, $follow)) { |
| ANTLR::Runtime::MissingTokenException->new({ |
| expecting => $ttype, |
| input => $input |
| })->throw(); |
| } |
| else { |
| ANTLR::Runtime::MismatchedTokenException->new({ |
| expecting => $ttype, |
| input => $input |
| })->throw(); |
| } |
| } |
| |
| sub report_error { |
| Readonly my $usage => 'void report_error(RecognitionException e)'; |
| croak $usage if @_ != 2; |
| my ($self, $e) = @_; |
| |
| if ($self->state->error_recovery) { |
| return; |
| } |
| $self->state->syntax_errors($self->state->syntax_errors + 1); |
| $self->state->error_recovery(1); |
| |
| $self->display_recognition_error($self->get_token_names(), $e); |
| return; |
| } |
| |
| sub display_recognition_error { |
| Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)'; |
| croak $usage if @_ != 3; |
| my ($self, $token_names, $e) = @_; |
| |
| my $hdr = $self->get_error_header($e); |
| my $msg = $self->get_error_message($e, $token_names); |
| $self->emit_error_message("$hdr $msg"); |
| } |
| |
| sub get_error_message { |
| Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)'; |
| croak $usage if @_ != 3; |
| my ($self, $e, $token_names) = @_; |
| |
| if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { |
| my $token_name; |
| if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { |
| $token_name = 'EOF'; |
| } else { |
| $token_name = $token_names->[$e->get_expecting]; |
| } |
| |
| return 'mismatched input ' . $self->get_token_error_display($e->get_token) |
| . ' expecting ' . $token_name; |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) { |
| my $token_name; |
| if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { |
| $token_name = 'EOF'; |
| } else { |
| $token_name = $token_names->[$e->get_expecting]; |
| } |
| |
| return 'mismatched tree node: ' . $e->node |
| . ' expecting ' . $token_name; |
| } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { |
| return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token); |
| } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { |
| return 'required (...)+ loop did not match anything at input ' |
| . get_token_error_display($e->get_token); |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { |
| return 'mismatched input ' . $self->get_token_error_display($e->get_token) |
| . ' expecting set ' . $e->get_expecting; |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { |
| return 'mismatched input ' . $self->get_token_error_display($e->get_token) |
| . ' expecting set ' . $e->get_expecting; |
| } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) { |
| return 'rule ' . $e->rule_name . ' failed predicate: {' |
| . $e->predicate_text . '}?'; |
| } else { |
| return undef; |
| } |
| } |
| |
| sub get_number_of_syntax_errors { |
| my ($self) = @_; |
| return $self->state->syntax_errors; |
| } |
| |
| sub get_error_header { |
| Readonly my $usage => 'String get_error_header(RecognitionException e)'; |
| croak $usage if @_ != 2; |
| my ($self, $e) = @_; |
| |
| my $line = $e->get_line(); |
| my $col = $e->get_char_position_in_line(); |
| |
| return "line $line:$col"; |
| } |
| |
| sub get_token_error_display { |
| Readonly my $usage => 'String get_token_error_display(Token t)'; |
| croak $usage if @_ != 2; |
| my ($self, $t) = @_; |
| |
| my $s = $t->get_text(); |
| if (!defined $s) { |
| if ($t->get_type() == ANTLR::Runtime::Token->EOF) { |
| $s = '<EOF>'; |
| } else { |
| my $ttype = $t->get_type(); |
| $s = "<$ttype>"; |
| } |
| } |
| |
| $s =~ s/\n/\\\\n/g; |
| $s =~ s/\r/\\\\r/g; |
| $s =~ s/\t/\\\\t/g; |
| |
| return "'$s'"; |
| } |
| |
| sub emit_error_message { |
| Readonly my $usage => 'void emit_error_message(String msg)'; |
| croak $usage if @_ != 2; |
| my ($self, $msg) = @_; |
| |
| print STDERR $msg, "\n"; |
| } |
| |
| sub recover { |
| Readonly my $usage => 'void recover(IntStream input, RecognitionException re)'; |
| croak $usage if @_ != 3; |
| my ($self, $input, $re) = @_; |
| |
| if ($self->state->last_error_index == $input->index()) { |
| # uh oh, another error at same token index; must be a case |
| # where LT(1) is in the recovery token set so nothing is |
| # consumed; consume a single token so at least to prevent |
| # an infinite loop; this is a failsafe. |
| $input->consume(); |
| } |
| |
| $self->state->last_error_index($input->index()); |
| my $follow_set = $self->compute_error_recovery_set(); |
| $self->begin_resync(); |
| $self->consume_until($input, $follow_set); |
| $self->end_resync(); |
| } |
| |
| sub begin_resync { |
| } |
| |
| sub end_resync { |
| } |
| |
| sub compute_error_recovery_set { |
| Readonly my $usage => 'void compute_error_recovery_set()'; |
| croak $usage if @_ != 1; |
| my ($self) = @_; |
| |
| $self->combine_follows(0); |
| } |
| |
| sub compute_context_sensitive_rule_FOLLOW { |
| Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()'; |
| croak $usage if @_ != 1; |
| my ($self) = @_; |
| |
| $self->combine_follows(1); |
| } |
| |
| sub combine_follows { |
| Readonly my $usage => 'BitSet combine_follows(boolean exact)'; |
| croak $usage if @_ != 2; |
| my ($self, $exact) = @_; |
| |
| my $top = $self->state->_fsp; |
| my $follow_set = ANTLR::Runtime::BitSet->new(); |
| |
| foreach my $local_follow_set (reverse @{$self->state->following}) { |
| $follow_set |= $local_follow_set; |
| if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { |
| last; |
| } |
| } |
| #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); |
| return $follow_set; |
| } |
| |
| sub recover_from_mismatched_token { |
| Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $ttype, $follow) = @_; |
| |
| if ($self->mismatch_is_unwanted_token($input, $ttype)) { |
| my $ex = ANTLR::Runtime::UnwantedTokenException->new({ |
| expecting => $ttype, |
| input => $input |
| }); |
| |
| $self->begin_resync(); |
| $input->consume(); |
| $self->end_resync(); |
| $self->report_error($ex); |
| |
| my $matched_symbol = $self->get_current_input_symbol($input); |
| $input->consume(); |
| return $matched_symbol; |
| } |
| |
| if ($self->mismatch_is_missing_token($input, $follow)) { |
| my $inserted = $self->get_missing_symbol({ |
| input => $input, |
| expected_token_type => $ttype, |
| follow => $follow, |
| }); |
| my $ex = ANTLR::Runtime::MissingTokenException({ |
| expecting => $ttype, |
| input => $input, |
| inserted => $inserted, |
| }); |
| $self->report_error($ex); |
| return $inserted; |
| } |
| |
| ANTLR::Runtime::MismatchedTokenException->new({ |
| expecting => $ttype, |
| input => $input, |
| })->throw(); |
| } |
| |
| sub recover_from_mismatched_set { |
| Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $e, $follow) = @_; |
| |
| if ($self->mismatch_is_missing_token($input, $follow)) { |
| $self->report_error($e); |
| return $self->get_missing_symbol({ |
| input => $input, |
| exception => $e, |
| expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, |
| follow => $follow, |
| }); |
| } |
| |
| $e->throw(); |
| } |
| |
| sub recover_from_mismatched_element { |
| Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $e, $follow) = @_; |
| |
| return 0 if (!defined $follow); |
| |
| if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { |
| my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); |
| $follow |= $viable_tokens_following_this_rule; |
| $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); |
| } |
| |
| if ($follow->member($input->LA(1))) { |
| $self->report_error($e); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| sub get_current_input_symbol { |
| my ($self, $input) = @_; |
| return undef; |
| } |
| |
| sub get_missing_symbol { |
| my ($self, $arg_ref) = @_; |
| my $input = $arg_ref->{input}; |
| my $exception = $arg_ref->{exception}; |
| my $expected_token_type = $arg_ref->{expected_token_type}; |
| my $follow = $arg_ref->{follow}; |
| |
| return undef; |
| } |
| |
| sub consume_until { |
| Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))'; |
| croak $usage if @_ != 3; |
| |
| if ($_[2]->isa('ANTLR::Runtime::BitSet')) { |
| my ($self, $input, $set) = @_; |
| |
| my $ttype = $input->LA(1); |
| while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) { |
| $input->consume(); |
| $ttype = $input->LA(1); |
| } |
| } else { |
| my ($self, $input, $token_type) = @_; |
| |
| my $ttype = $input->LA(1); |
| while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) { |
| $input->consume(); |
| $ttype = $input->LA(1); |
| } |
| } |
| } |
| |
| sub push_follow { |
| Readonly my $usage => 'void push_follow(BitSet fset)'; |
| croak $usage if @_ != 2; |
| my ($self, $fset) = @_; |
| |
| push @{$self->state->following}, $fset; |
| $self->state->_fsp($self->state->_fsp + 1); |
| } |
| |
| sub get_rule_invocation_stack { |
| Readonly my $usage => 'List get_rule_invocation_stack()'; |
| croak $usage if @_ != 1; |
| my ($self) = @_; |
| |
| my $rules = []; |
| for (my $i = 0; ; ++$i) { |
| my @frame = caller $i; |
| last if !@frame; |
| |
| my ($package, $filename, $line, $subroutine) = @frame; |
| |
| if ($package =~ /^ANTLR::Runtime::/) { |
| next; |
| } |
| |
| if ($subroutine eq NEXT_TOKEN_RULE_NAME) { |
| next; |
| } |
| |
| if ($package ne ref $self) { |
| next; |
| } |
| |
| push @{$rules}, $subroutine; |
| } |
| } |
| |
| sub get_backtracking_level { |
| Readonly my $usage => 'int get_backtracking_level()'; |
| croak $usage if @_ != 1; |
| my ($self) = @_; |
| |
| return $self->state->backtracking; |
| } |
| |
| sub set_backtracking_level { |
| my ($self, $n) = @_; |
| $self->state->backtracking($n); |
| } |
| |
| sub failed { |
| my ($self) = @_; |
| return $self->state->failed; |
| } |
| |
| sub get_token_names { |
| return undef; |
| } |
| |
| sub get_grammar_file_name { |
| return undef; |
| } |
| |
| sub to_strings { |
| Readonly my $usage => 'List to_strings(List tokens)'; |
| croak $usage if @_ != 2; |
| my ($self, $tokens) = @_; |
| |
| if (!defined $tokens) { |
| return undef; |
| } |
| |
| return map { $_->get_text() } @{$tokens}; |
| } |
| |
| sub get_rule_memoization { |
| Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)'; |
| croak $usage if @_ != 3; |
| my ($self, $rule_index, $rule_start_index) = @_; |
| |
| if (!defined $self->rule_memo->[$rule_index]) { |
| $self->rule_memo->[$rule_index] = {}; |
| } |
| |
| my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index}; |
| if (!defined $stop_index) { |
| return $self->MEMO_RULE_UNKNOWN; |
| } |
| return $stop_index; |
| } |
| |
| sub alredy_parsed_rule { |
| Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)'; |
| croak $usage if @_ != 3; |
| my ($self, $input, $rule_index) = @_; |
| |
| my $stop_index = $self->get_rule_memoization($rule_index, $input->index()); |
| if ($stop_index == $self->MEMO_RULE_UNKNOWN) { |
| return 0; |
| } |
| |
| if ($stop_index == $self->MEMO_RULE_FAILED) { |
| $self->state->failed(1); |
| } else { |
| $input->seek($stop_index + 1); |
| } |
| return 1; |
| } |
| |
| sub memoize { |
| Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)'; |
| croak $usage if @_ != 4; |
| my ($self, $input, $rule_index, $rule_start_index) = @_; |
| |
| my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1; |
| if (defined $self->state->rule_memo->[$rule_index]) { |
| $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index; |
| } |
| } |
| |
| sub get_rule_memoization_cache_size { |
| Readonly my $usage => 'int get_rule_memoization_cache_size()'; |
| croak $usage if @_ != 1; |
| my ($self) = @_; |
| |
| my $n = 0; |
| foreach my $m (@{$self->state->rule_memo}) { |
| $n += keys %{$m} if defined $m; |
| } |
| |
| return $n; |
| } |
| |
| sub trace_in { |
| Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)'; |
| croak $usage if @_ != 4; |
| my ($self, $rule_name, $rule_index, $input_symbol) = @_; |
| |
| print "enter $rule_name $input_symbol"; |
| if ($self->state->failed) { |
| print ' failed=', $self->state->failed; |
| } |
| if ($self->state->backtracking > 0) { |
| print ' backtracking=', $self->state->backtracking; |
| } |
| print "\n"; |
| } |
| |
| sub trace_out { |
| Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)'; |
| croak $usage if @_ != 4; |
| my ($self, $rule_name, $rule_index, $input_symbol) = @_; |
| |
| print "exit $rule_name $input_symbol"; |
| if ($self->state->failed) { |
| print ' failed=', $self->state->failed; |
| } |
| if ($self->state->backtracking > 0) { |
| print ' backtracking=', $self->state->backtracking; |
| } |
| print "\n"; |
| } |
| |
| no Moose; |
| __PACKAGE__->meta->make_immutable(); |
| 1; |
| __END__ |
| |
| =head1 NAME |
| |
| ANTLR::Runtime::BaseRecognizer |
| |
| =head1 DESCRIPTION |
| |
| A generic recognizer that can handle recognizers generated from |
| lexer, parser, and tree grammars. This is all the parsing |
| support code essentially; most of it is error recovery stuff and |
| backtracking. |