blob: 5acbaec50e784534a006ab240b56505ca516ca7b [file] [log] [blame]
package ANTLR::Runtime::Lexer;
use English qw( -no_match_vars );
use Readonly;
use Carp;
use Switch;
use ANTLR::Runtime::Token;
use ANTLR::Runtime::CommonToken;
use ANTLR::Runtime::CharStream;
use ANTLR::Runtime::MismatchedTokenException;
use Moose;
extends 'ANTLR::Runtime::BaseRecognizer';
with 'ANTLR::Runtime::TokenSource';
has 'input' => (
is => 'rw',
does => 'ANTLR::Runtime::CharStream',
);
sub reset {
my ($self) = @_;
# reset all recognizer state variables
$self->SUPER::reset();
# wack Lexer state variables
if (defined $self->input) {
# rewind the input
$self->input->seek(0);
}
if (defined $self->state) {
$self->state->token(undef);
$self->state->type(ANTLR::Runtime::Token->INVALID_TOKEN_TYPE);
$self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL);
$self->state->token_start_char_index(-1);
$self->state->token_start_char_position_in_line(-1);
$self->state->start_line(-1);
$self->state->text(undef);
}
}
# Return a token from this source; i.e., match a token on the char
# stream.
sub next_token {
my ($self) = @_;
while (1) {
$self->state->token(undef);
$self->state->channel(ANTLR::Runtime::Token->DEFAULT_CHANNEL);
$self->state->token_start_char_index($self->input->index());
$self->state->token_start_char_position_in_line($self->input->get_char_position_in_line());
$self->state->token_start_line($self->input->get_line());
$self->state->text(undef);
if ($self->input->LA(1) eq ANTLR::Runtime::CharStream->EOF) {
return ANTLR::Runtime::Token->EOF_TOKEN;
}
my $rv;
my $op = '';
eval {
$self->m_tokens();
if (!defined $self->state->token) {
$self->emit();
}
elsif ($self->state->token == ANTLR::Runtime::Token->SKIP_TOKEN) {
$op = 'next';
return;
}
$op = 'return';
$rv = $self->state->token;
};
return $rv if $op eq 'return';
next if $op eq 'next';
if ($EVAL_ERROR) {
my $exception = $EVAL_ERROR;
if ($exception->isa('ANTLR::Runtime::RecognitionException')) {
$self->report_error($exception);
$self->recover($exception);
} else {
croak $exception;
}
}
}
}
# Instruct the lexer to skip creating a token for current lexer rule
# and look for another token. nextToken() knows to keep looking when
# a lexer rule finishes with token set to SKIP_TOKEN. Recall that
# if token==null at end of any token rule, it creates one for you
# and emits it.
sub skip {
my ($self) = @_;
$self->state->token(ANTLR::Runtime::Token->SKIP_TOKEN);
return;
}
# This is the lexer entry point that sets instance var 'token'
sub m_tokens {
croak "Unimplemented";
}
# Set the char stream and reset the lexer
sub set_char_stream {
my ($self, $input) = @_;
$self->input(undef);
$self->reset();
$self->input($input);
}
sub get_char_stream {
my ($self) = @_;
return $self->input;
}
sub get_source_name {
my ($self) = @_;
return $self->input->get_source_name();
}
sub emit {
if (@_ == 1) {
my ($self) = @_;
# The standard method called to automatically emit a token at the
# outermost lexical rule. The token object should point into the
# char buffer start..stop. If there is a text override in 'text',
# use that to set the token's text. Override this method to emit
# custom Token objects.
my $t = ANTLR::Runtime::CommonToken->new({
input => $self->input,
type => $self->state->type,
channel => $self->state->channel,
start => $self->state->token_start_char_index,
stop => $self->get_char_index() - 1
});
$t->set_line($self->state->token_start_line);
$t->set_text($self->state->text);
$t->set_char_position_in_line($self->state->token_start_char_position_in_line);
$self->emit($t);
return $t;
} elsif (@_ == 2) {
my ($self, $token) = @_;
# Currently does not support multiple emits per nextToken invocation
# for efficiency reasons. Subclass and override this method and
# nextToken (to push tokens into a list and pull from that list rather
# than a single variable as this implementation does).
$self->state->token($token);
}
}
sub match {
my ($self, $s) = @_;
foreach my $c (split //, $s) {
if ($self->input->LA(1) ne $c) {
if ($self->state->backtracking > 0) {
$self->state->failed(1);
return;
}
my $mte = ANTLR::Runtime::MismatchedTokenException->new({
expecting => $c,
input => $self->input
});
$self->recover($mte);
croak $mte;
}
$self->input->consume();
$self->state->failed(0);
}
}
sub match_any {
my ($self) = @_;
$self->input->consume();
}
sub match_range {
my ($self, $a, $b) = @_;
if ($self->input->LA(1) lt $a || $self->input->LA(1) gt $b) {
if ($self->state->backtracking > 0) {
$self->state->failed(1);
return;
}
my $mre = ANTLR::Runtime::MismatchedRangeException($a, $b, $self->input);
$self->recover($mre);
croak $mre;
}
$self->input->consume();
$self->state->failed(0);
}
sub get_line {
my ($self) = @_;
return $self->input->get_line();
}
sub get_char_position_in_line {
my ($self) = @_;
return $self->input->get_char_position_in_line();
}
# What is the index of the current character of lookahead?
sub get_char_index {
my ($self) = @_;
return $self->input->index();
}
# Return the text matched so far for the current token or any
# text override.
sub get_text {
my ($self) = @_;
if (defined $self->state->text) {
return $self->state->text;
}
return $self->input->substring($self->state->token_start_char_index, $self->get_char_index() - 1);
}
# Set the complete text of this token; it wipes any previous
# changes to the text.
sub set_text {
my ($self, $text) = @_;
$self->state->text($text);
}
sub report_error {
Readonly my $usage => 'void report_error(RecognitionException e)';
croak $usage if @_ != 2;
my ($self, $e) = @_;
$self->display_recognition_error($self->get_token_names(), $e);
}
sub get_error_message {
my ($self, $e, $token_names) = @_;
my $msg;
if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) {
$msg = 'mismatched character '
. $self->get_char_error_display($e->get_c())
. ' expecting '
. $self->get_char_error_display($e->expecting);
} elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) {
$msg = 'no viable alternative at character ' . $self->get_char_error_display($e->get_c());
} elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) {
$msg = 'required (...)+ loop did not match anything at character '
. $self->get_char_error_display($e->get_c());
} elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) {
$msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
. ' expecting set ' . $e->expecting;
} elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) {
$msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
. ' expecting set ' . $e->expecting;
} elsif ($e->isa('ANTLR::Runtime::MismatchedRangeException')) {
$msg = 'mismatched character ' . $self->get_char_error_display($e->get_c())
. ' expecting set ' . $self->get_char_error_display($e->a)
. '..' . $self->get_char_error_display($e->b);
} else {
$msg = $self->SUPER::get_error_message($e, $token_names);
}
return $msg;
}
sub get_char_error_display {
my ($self, $c) = @_;
my $s;
if ($c eq ANTLR::Runtime::Token->EOF) {
$s = '<EOF>';
} elsif ($c eq "\n") {
$s = '\n';
} elsif ($c eq "\t") {
$s = '\t';
} elsif ($c eq "\r") {
$s = '\r';
} else {
$s = $c;
}
return "'$s'";
}
# Lexers can normally match any char in it's vocabulary after matching
# a token, so do the easy thing and just kill a character and hope
# it all works out. You can instead use the rule invocation stack
# to do sophisticated error recovery if you are in a fragment rule.
sub recover {
my ($self, $re) = @_;
$self->input->consume();
}
sub trace_in {
my ($self, $rule_name, $rule_index) = @_;
my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line();
$self->SUPER::trace_in($rule_name, $rule_index, $input_symbol);
}
sub trace_out {
my ($self, $rule_name, $rule_index) = @_;
my $input_symbol = $self->input->LT(1) . ' line=' . $self->get_line() . ':' . $self->get_char_position_in_line();
$self->SUPER::trace_out($rule_name, $rule_index, $input_symbol);
}
no Moose;
__PACKAGE__->meta->make_immutable();
1;