| package ANTLR::Runtime::Lexer; |
| |
| use English qw( -no_match_vars ); |
| use Readonly; |
| use Carp; |
| use Switch; |
| |
| use ANTLR::Runtime::Token; |
| use ANTLR::Runtime::CommonToken; |
| use ANTLR::Runtime::CharStream; |
| use ANTLR::Runtime::MismatchedTokenException; |
| |
| use Moose; |
| |
| extends 'ANTLR::Runtime::BaseRecognizer'; |
| with 'ANTLR::Runtime::TokenSource'; |
| |
| has 'input' => ( |
| is => 'rw', |
| does => 'ANTLR::Runtime::CharStream', |
| ); |
| |
| sub reset { |
| my ($self) = @_; |
| |
| # reset all recognizer state variables |
| $self->SUPER::reset(); |
| |
| # wack Lexer state variables |
| if (defined $self->input) { |
| # rewind the input |
| $self->input->seek(0); |
| } |
| |
| if (defined $self->state) { |
| $self->state->token(undef); |
| $self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE); |
| $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); |
| $self->state->token_start_char_index(-1); |
| $self->state->token_start_char_position_in_line(-1); |
| $self->state->start_line(-1); |
| $self->state->text(undef); |
| } |
| } |
| |
| # Return a token from this source; i.e., match a token on the char |
| # stream. |
| sub next_token { |
| my ($self) = @_; |
| |
| while (1) { |
| $self->state->token(undef); |
| $self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL); |
| $self->state->token_start_char_index($self->input->index()); |
| $self->state->token_start_char_position_in_line($self->input->get_char_position_in_line()); |
| $self->state->token_start_line($self->input->get_line()); |
| $self->state->text(undef); |
| |
| if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) { |
| return ANTLR::Runtime::Token->EOF_TOKEN; |
| } |
| |
| my $rv; |
| my $op = ''; |
| eval { |
| $self->m_tokens(); |
| if (!defined $self->state->token) { |
| $self->emit(); |
| } |
| elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) { |
| $op = 'next'; |
| return; |
| } |
| $op = 'return'; |
| $rv = $self->state->token; |
| }; |
| return $rv if $op eq 'return'; |
| next if $op eq 'next'; |
| |
| if ($EVAL_ERROR) { |
| my $exception = $EVAL_ERROR; |
| if ($exception->isa('ANTLR::Runtime::RecognitionException')) { |
| $self->report_error($exception); |
| $self->recover($exception); |
| } else { |
| croak $exception; |
| } |
| } |
| } |
| } |
| |
| # Instruct the lexer to skip creating a token for current lexer rule |
| # and look for another token. nextToken() knows to keep looking when |
| # a lexer rule finishes with token set to SKIP_TOKEN. Recall that |
| # if token==null at end of any token rule, it creates one for you |
| # and emits it. |
| sub skip { |
| my ($self) = @_; |
| |
| $self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN); |
| return; |
| } |
| |
| # This is the lexer entry point that sets instance var 'token' |
| sub m_tokens { |
| croak "Unimplemented"; |
| } |
| |
| # Set the char stream and reset the lexer |
| sub set_char_stream { |
| my ($self, $input) = @_; |
| |
| $self->input(undef); |
| $self->reset(); |
| $self->input($input); |
| } |
| |
| sub get_char_stream { |
| my ($self) = @_; |
| return $self->input; |
| } |
| |
| sub get_source_name { |
| my ($self) = @_; |
| return $self->input->get_source_name(); |
| } |
| |
| sub emit { |
| if (@_ == 1) { |
| my ($self) = @_; |
| # The standard method called to automatically emit a token at the |
| # outermost lexical rule. The token object should point into the |
| # char buffer start..stop. If there is a text override in 'text', |
| # use that to set the token's text. Override this method to emit |
| # custom Token objects. |
| my $t = ANTLR::Runtime::CommonToken->new({ |
| input => $self->input, |
| type => $self->state->type, |
| channel => $self->state->channel, |
| start => $self->state->token_start_char_index, |
| stop => $self->get_char_index() - 1 |
| }); |
| |
| $t->set_line($self->state->token_start_line); |
| $t->set_text($self->state->text); |
| $t->set_char_position_in_line($self->state->token_start_char_position_in_line); |
| $self->emit($t); |
| return $t; |
| } elsif (@_ == 2) { |
| my ($self, $token) = @_; |
| # Currently does not support multiple emits per nextToken invocation |
| # for efficiency reasons. Subclass and override this method and |
| # nextToken (to push tokens into a list and pull from that list rather |
| # than a single variable as this implementation does). |
| $self->state->token($token); |
| } |
| } |
| |
| sub match { |
| my ($self, $s) = @_; |
| |
| foreach my $c (split //, $s) { |
| if ($self->input->LA(1) ne $c) { |
| if ($self->state->backtracking > 0) { |
| $self->state->failed(1); |
| return; |
| } |
| my $mte = ANTLR::Runtime::MismatchedTokenException->new({ |
| expecting => $c, |
| input => $self->input |
| }); |
| $self->recover($mte); |
| croak $mte; |
| } |
| $self->input->consume(); |
| $self->state->failed(0); |
| } |
| } |
| |
| sub match_any { |
| my ($self) = @_; |
| |
| $self->input->consume(); |
| } |
| |
| sub match_range { |
| my ($self, $a, $b) = @_; |
| |
| if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) { |
| if ($self->state->backtracking > 0) { |
| $self->state->failed(1); |
| return; |
| } |
| |
| my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input); |
| $self->recover($mre); |
| croak $mre; |
| } |
| |
| $self->input->consume(); |
| $self->state->failed(0); |
| } |
| |
| sub get_line { |
| my ($self) = @_; |
| |
| return $self->input->get_line(); |
| } |
| |
| sub get_char_position_in_line { |
| my ($self) = @_; |
| |
| return $self->input->get_char_position_in_line(); |
| } |
| |
| # What is the index of the current character of lookahead? |
| sub get_char_index { |
| my ($self) = @_; |
| |
| return $self->input->index(); |
| } |
| |
| # Return the text matched so far for the current token or any |
| # text override. |
| sub get_text { |
| my ($self) = @_; |
| |
| if (defined $self->state->text) { |
| return $self->state->text; |
| } |
| return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1); |
| } |
| |
| # Set the complete text of this token; it wipes any previous |
| # changes to the text. |
| sub set_text { |
| my ($self, $text) = @_; |
| |
| $self->state->text($text); |
| } |
| |
| sub report_error { |
| Readonly my $usage => 'void report_error(RecognitionException e)'; |
| croak $usage if @_ != 2; |
| my ($self, $e) = @_; |
| |
| $self->display_recognition_error($self->get_token_names(), $e); |
| } |
| |
| sub get_error_message { |
| my ($self, $e, $token_names) = @_; |
| |
| my $msg; |
| if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { |
| $msg = 'mismatched character ' |
| . $self->get_char_error_display($e->get_c()) |
| . ' expecting ' |
| . $self->get_char_error_display($e->expecting); |
| } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { |
| $msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c()); |
| } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { |
| $msg = 'required (...)+ loop did not match anything at character ' |
| . $self->get_char_error_display($e->get_c()); |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { |
| $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) |
| . ' expecting set ' . $e->expecting; |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { |
| $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) |
| . ' expecting set ' . $e->expecting; |
| } elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) { |
| $msg = 'mismatched character ' . $self->get_char_error_display($e->get_c()) |
| . ' expecting set ' . $self->get_char_error_display($e->a) |
| . '..' . $self->get_char_error_display($e->b); |
| } else { |
| $msg = $self->SUPER::get_error_message($e, $token_names); |
| } |
| return $msg; |
| } |
| |
| sub get_char_error_display { |
| my ($self, $c) = @_; |
| |
| my $s; |
| if ($c eq ANTLR::Runtime::Token->EOF) { |
| $s = '<EOF>'; |
| } elsif ($c eq "\n") { |
| $s = '\n'; |
| } elsif ($c eq "\t") { |
| $s = '\t'; |
| } elsif ($c eq "\r") { |
| $s = '\r'; |
| } else { |
| $s = $c; |
| } |
| |
| return "'$s'"; |
| } |
| |
| # Lexers can normally match any char in it's vocabulary after matching |
| # a token, so do the easy thing and just kill a character and hope |
| # it all works out. You can instead use the rule invocation stack |
| # to do sophisticated error recovery if you are in a fragment rule. |
| sub recover { |
| my ($self, $re) = @_; |
| |
| $self->input->consume(); |
| } |
| |
| sub trace_in { |
| my ($self, $rule_name, $rule_index) = @_; |
| |
| my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); |
| $self->SUPER::trace_in($rule_name, $rule_index, $input_symbol); |
| } |
| |
| sub trace_out { |
| my ($self, $rule_name, $rule_index) = @_; |
| |
| my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line(); |
| $self->SUPER::trace_out($rule_name, $rule_index, $input_symbol); |
| } |
| |
| no Moose; |
| __PACKAGE__->meta->make_immutable(); |
| 1; |