| /* |
| * Copyright 2016 Google Inc. All Rights Reserved. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.google.turbine.parse; |
| |
| import static com.google.common.truth.Truth.assertThat; |
| |
| import com.google.common.escape.SourceCodeEscapers; |
| import com.google.turbine.diag.SourceFile; |
| import java.util.ArrayList; |
| import java.util.List; |
| import org.junit.Test; |
| import org.junit.runner.RunWith; |
| import org.junit.runners.JUnit4; |
| |
| @RunWith(JUnit4.class) |
| public class LexerTest { |
| |
| @Test |
| public void testSimple() { |
| assertThat(lex("\nasd dsa\n")).containsExactly("IDENT(asd)", "IDENT(dsa)", "EOF"); |
| } |
| |
| @Test |
| public void testOperator() { |
| assertThat(lex("\nasd++asd\n")).containsExactly("IDENT(asd)", "INCR", "IDENT(asd)", "EOF"); |
| } |
| |
| @Test |
| public void unterminated() { |
| assertThat(lex("/* foo")).containsExactly("EOF"); |
| assertThat(lex("\" foo")).containsExactly("EOF"); |
| } |
| |
| @Test |
| public void boolLiteral() { |
| lexerComparisonTest("0b0101__01010"); |
| assertThat(lex("1 + 0b1000100101")) |
| .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(0b1000100101)", "EOF"); |
| } |
| |
| @Test |
| public void octalLiteral() { |
| assertThat(lex("1 + 01234567")) |
| .containsExactly("INT_LITERAL(1)", "PLUS", "INT_LITERAL(01234567)", "EOF"); |
| } |
| |
| @Test |
| public void testLiteral() { |
| assertThat(lex("0L")).containsExactly("LONG_LITERAL(0L)", "EOF"); |
| assertThat(lex("0")).containsExactly("INT_LITERAL(0)", "EOF"); |
| assertThat(lex("0x7fff_ffff")).containsExactly("INT_LITERAL(0x7fff_ffff)", "EOF"); |
| assertThat(lex("0177_7777_7777")).containsExactly("INT_LITERAL(0177_7777_7777)", "EOF"); |
| assertThat(lex("0b0111_1111_1111_1111_1111_1111_1111_1111")) |
| .containsExactly("INT_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); |
| assertThat(lex("0x8000_0000")).containsExactly("INT_LITERAL(0x8000_0000)", "EOF"); |
| assertThat(lex("0200_0000_0000")).containsExactly("INT_LITERAL(0200_0000_0000)", "EOF"); |
| assertThat(lex("0b1000_0000_0000_0000_0000_0000_0000_0000")) |
| .containsExactly("INT_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000)", "EOF"); |
| assertThat(lex("0xffff_ffff")).containsExactly("INT_LITERAL(0xffff_ffff)", "EOF"); |
| assertThat(lex("0377_7777_7777")).containsExactly("INT_LITERAL(0377_7777_7777)", "EOF"); |
| assertThat(lex("0b1111_1111_1111_1111_1111_1111_1111_1111")) |
| .containsExactly("INT_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111)", "EOF"); |
| } |
| |
| @Test |
| public void testLong() { |
| assertThat(lex("1l")).containsExactly("LONG_LITERAL(1l)", "EOF"); |
| assertThat(lex("9223372036854775807L")) |
| .containsExactly("LONG_LITERAL(9223372036854775807L)", "EOF"); |
| assertThat(lex("-9223372036854775808L")) |
| .containsExactly("MINUS", "LONG_LITERAL(9223372036854775808L)", "EOF"); |
| assertThat(lex("0x7fff_ffff_ffff_ffffL")) |
| .containsExactly("LONG_LITERAL(0x7fff_ffff_ffff_ffffL)", "EOF"); |
| assertThat(lex("07_7777_7777_7777_7777_7777L")) |
| .containsExactly("LONG_LITERAL(07_7777_7777_7777_7777_7777L)", "EOF"); |
| assertThat( |
| lex( |
| "0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) |
| .containsExactly( |
| "LONG_LITERAL(0b0111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", |
| "EOF"); |
| assertThat(lex("0x8000_0000_0000_0000L")) |
| .containsExactly("LONG_LITERAL(0x8000_0000_0000_0000L)", "EOF"); |
| assertThat(lex("010_0000_0000_0000_0000_0000L")) |
| .containsExactly("LONG_LITERAL(010_0000_0000_0000_0000_0000L)", "EOF"); |
| assertThat( |
| lex( |
| "0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L")) |
| .containsExactly( |
| "LONG_LITERAL(0b1000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000L)", |
| "EOF"); |
| assertThat(lex("0xffff_ffff_ffff_ffffL")) |
| .containsExactly("LONG_LITERAL(0xffff_ffff_ffff_ffffL)", "EOF"); |
| assertThat(lex("017_7777_7777_7777_7777_7777L")) |
| .containsExactly("LONG_LITERAL(017_7777_7777_7777_7777_7777L)", "EOF"); |
| assertThat( |
| lex( |
| "0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L")) |
| .containsExactly( |
| "LONG_LITERAL(0b1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111L)", |
| "EOF"); |
| } |
| |
| @Test |
| public void testDoubleLiteral() { |
| assertThat(lex("1D")).containsExactly("DOUBLE_LITERAL(1D)", "EOF"); |
| assertThat(lex("123d")).containsExactly("DOUBLE_LITERAL(123d)", "EOF"); |
| assertThat(lex("1.7976931348623157e308")) |
| .containsExactly("DOUBLE_LITERAL(1.7976931348623157e308)", "EOF"); |
| assertThat(lex("4.9e-324")).containsExactly("DOUBLE_LITERAL(4.9e-324)", "EOF"); |
| } |
| |
| @Test |
| public void testFloatLiteral() { |
| assertThat(lex("1F")).containsExactly("FLOAT_LITERAL(1F)", "EOF"); |
| assertThat(lex("123f")).containsExactly("FLOAT_LITERAL(123f)", "EOF"); |
| assertThat(lex("3.4028235e38f")).containsExactly("FLOAT_LITERAL(3.4028235e38f)", "EOF"); |
| assertThat(lex("1.40e-45f")).containsExactly("FLOAT_LITERAL(1.40e-45f)", "EOF"); |
| } |
| |
| @Test |
| public void testComment() { |
| assertThat(lex("a//comment\nb //comment")).containsExactly("IDENT(a)", "IDENT(b)", "EOF"); |
| assertThat(lex("a/*comment*/\nb /*comment**/c/*asd*/")) |
| .containsExactly("IDENT(a)", "IDENT(b)", "IDENT(c)", "EOF"); |
| } |
| |
| @Test |
| public void testStringLiteral() { |
| assertThat(lex("\"asd\" \"\\n\"")) |
| .containsExactly("STRING_LITERAL(asd)", "STRING_LITERAL(\\n)", "EOF"); |
| } |
| |
| @Test |
| public void charLiteral() { |
| assertThat(lex("'a' '\\t' '\\r'")) |
| .containsExactly("CHAR_LITERAL(a)", "CHAR_LITERAL(\\t)", "CHAR_LITERAL(\\r)", "EOF"); |
| } |
| |
| @Test |
| public void negativeInt() { |
| assertThat(lex("(int)-1")) |
| .containsExactly("LPAREN", "INT", "RPAREN", "MINUS", "INT_LITERAL(1)", "EOF"); |
| } |
| |
| @Test |
| public void importStmt() { |
| assertThat(lex("import com.google.Foo;")) |
| .containsExactly( |
| "IMPORT", "IDENT(com)", "DOT", "IDENT(google)", "DOT", "IDENT(Foo)", "SEMI", "EOF"); |
| } |
| |
| @Test |
| public void annotation() { |
| assertThat(lex("@GwtCompatible(serializable = true, emulated = true)")) |
| .containsExactly( |
| "AT", |
| "IDENT(GwtCompatible)", |
| "LPAREN", |
| "IDENT(serializable)", |
| "ASSIGN", |
| "TRUE", |
| "COMMA", |
| "IDENT(emulated)", |
| "ASSIGN", |
| "TRUE", |
| "RPAREN", |
| "EOF"); |
| } |
| |
| @Test |
| public void operators() { |
| assertThat( |
| lex( |
| "= > < ! ~ ? : ->\n" |
| + "== >= <= != && || ++ --\n" |
| + "+ - * / & | ^ % << >> >>>\n" |
| + "+= -= *= /= &= |= ^= %= <<= >>= >>>=")) |
| .containsExactly( |
| "ASSIGN", "GT", "LT", "NOT", "TILDE", "COND", "COLON", "ARROW", "EQ", "GTE", "LTE", |
| "NOTEQ", "ANDAND", "OROR", "INCR", "DECR", "PLUS", "MINUS", "MULT", "DIV", "AND", "OR", |
| "XOR", "MOD", "LTLT", "GTGT", "GTGTGT", "PLUSEQ", "MINUSEQ", "MULTEQ", "DIVEQ", "ANDEQ", |
| "OREQ", "XOREQ", "MODEQ", "LTLTE", "GTGTE", "GTGTGTE", "EOF"); |
| } |
| |
| @Test |
| public void keywords() { |
| assertThat( |
| lex( |
| " abstract continue for new switch\n" |
| + " assert default if package synchronized\n" |
| + " boolean do goto private this\n" |
| + " break double implements protected throw\n" |
| + " byte else import public throws\n" |
| + " case enum instanceof return transient\n" |
| + " catch extends int short try\n" |
| + " char final interface static void\n" |
| + " class finally long strictfp volatile\n" |
| + " const float native super while\n" |
| + "= > < ! ~ ? : ->\n")) |
| .containsExactly( |
| "ABSTRACT", |
| "CONTINUE", |
| "FOR", |
| "NEW", |
| "SWITCH", |
| "ASSERT", |
| "DEFAULT", |
| "IF", |
| "PACKAGE", |
| "SYNCHRONIZED", |
| "BOOLEAN", |
| "DO", |
| "GOTO", |
| "PRIVATE", |
| "THIS", |
| "BREAK", |
| "DOUBLE", |
| "IMPLEMENTS", |
| "PROTECTED", |
| "THROW", |
| "BYTE", |
| "ELSE", |
| "IMPORT", |
| "PUBLIC", |
| "THROWS", |
| "CASE", |
| "ENUM", |
| "INSTANCEOF", |
| "RETURN", |
| "TRANSIENT", |
| "CATCH", |
| "EXTENDS", |
| "INT", |
| "SHORT", |
| "TRY", |
| "CHAR", |
| "FINAL", |
| "INTERFACE", |
| "STATIC", |
| "VOID", |
| "CLASS", |
| "FINALLY", |
| "LONG", |
| "STRICTFP", |
| "VOLATILE", |
| "CONST", |
| "FLOAT", |
| "NATIVE", |
| "SUPER", |
| "WHILE", |
| "ASSIGN", |
| "GT", |
| "LT", |
| "NOT", |
| "TILDE", |
| "COND", |
| "COLON", |
| "ARROW", |
| "EOF"); |
| } |
| |
| @Test |
| public void hexFloat() { |
| lexerComparisonTest("0x1.0p31"); |
| lexerComparisonTest("0x1p31"); |
| } |
| |
| @Test |
| public void zeroFloat() { |
| lexerComparisonTest("0f"); |
| } |
| |
| @Test |
| public void escape() { |
| lexerComparisonTest("'\\b'"); |
| lexerComparisonTest("'\\0'"); |
| lexerComparisonTest("'\\01'"); |
| lexerComparisonTest("'\\001'"); |
| } |
| |
| @Test |
| public void floatLiteral() { |
| lexerComparisonTest(".123321f"); |
| lexerComparisonTest(".123321F"); |
| lexerComparisonTest(".123321d"); |
| lexerComparisonTest(".123321D"); |
| lexerComparisonTest("0.0e+1f"); |
| lexerComparisonTest("0.0e-1f"); |
| lexerComparisonTest(".123321"); |
| } |
| |
| @Test |
| public void digitsUnderscore() { |
| lexerComparisonTest("123__123______3"); |
| } |
| |
| @Test |
| public void moreOperators() { |
| lexerComparisonTest("* / %"); |
| } |
| |
| @Test |
| public void unusualKeywords() { |
| lexerComparisonTest("const goto assert"); |
| } |
| |
| @Test |
| public void specialCharLiteral() { |
| lexerComparisonTest("'\\013'"); |
| } |
| |
| @Test |
| public void stringEscape() { |
| lexerComparisonTest("\"asd\\\"dsa\""); |
| } |
| |
| @Test |
| public void blockCommentEndingSlash() { |
| lexerComparisonTest("foo /*/*/ bar"); |
| } |
| |
| private void lexerComparisonTest(String s) { |
| assertThat(lex(s)).containsExactlyElementsIn(JavacLexer.javacLex(s)); |
| } |
| |
| public static List<String> lex(String input) { |
| Lexer lexer = new StreamLexer(new UnicodeEscapePreprocessor(new SourceFile(null, input))); |
| List<String> tokens = new ArrayList<>(); |
| Token token; |
| do { |
| token = lexer.next(); |
| String tokenString; |
| switch (token) { |
| case IDENT: |
| case INT_LITERAL: |
| case LONG_LITERAL: |
| case FLOAT_LITERAL: |
| case DOUBLE_LITERAL: |
| tokenString = String.format("%s(%s)", token.name(), lexer.stringValue()); |
| break; |
| case CHAR_LITERAL: |
| case STRING_LITERAL: |
| tokenString = |
| String.format( |
| "%s(%s)", |
| token.name(), SourceCodeEscapers.javaCharEscaper().escape(lexer.stringValue())); |
| break; |
| default: |
| tokenString = token.name(); |
| break; |
| } |
| tokens.add(tokenString); |
| } while (token != Token.EOF); |
| return tokens; |
| } |
| } |