blob: afa99a900d2e2e99987ac2379e9209261e9c21c8 [file] [log] [blame]
/*
* Copyright 2000-2013 JetBrains s.r.o.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.jetbrains.python;
import com.jetbrains.python.fixtures.PyLexerTestCase;
import com.jetbrains.python.lexer.PythonIndentingLexer;
/**
* @author yole
*/
public class PythonLexerTest extends PyLexerTestCase {
public void testSimpleExpression() {
doTest("a=1", "Py:IDENTIFIER", "Py:EQ", "Py:INTEGER_LITERAL", "Py:STATEMENT_BREAK");
}
public void testMergeSpaces() {
doTest("a = 1", "Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:INTEGER_LITERAL", "Py:STATEMENT_BREAK");
}
public void testLineBreakInBraces() {
doTest("[a,\n b]", "Py:LBRACKET", "Py:IDENTIFIER", "Py:COMMA", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:RBRACKET", "Py:STATEMENT_BREAK");
}
public void testLineBreakInBraces2() {
doTest("x=[a,\n b]", "Py:IDENTIFIER", "Py:EQ", "Py:LBRACKET", "Py:IDENTIFIER", "Py:COMMA", "Py:LINE_BREAK",
"Py:IDENTIFIER", "Py:RBRACKET", "Py:STATEMENT_BREAK");
}
public void testLineBreakInBracesAfterComment() {
doTest("x=[a, #c\n b]",
"Py:IDENTIFIER", "Py:EQ", "Py:LBRACKET", "Py:IDENTIFIER", "Py:COMMA", "Py:SPACE", "Py:END_OF_LINE_COMMENT",
"Py:LINE_BREAK", "Py:IDENTIFIER", "Py:RBRACKET", "Py:STATEMENT_BREAK");
}
public void testBraceAfterIndent() {
doTest("x=\n [a,\n b]",
"Py:IDENTIFIER", "Py:EQ", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:LBRACKET", "Py:IDENTIFIER", "Py:COMMA", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:RBRACKET", "Py:STATEMENT_BREAK");
}
public void testBackslash() {
doTest("a=\\\nb",
"Py:IDENTIFIER", "Py:EQ", "Py:SPACE", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testBackslashAfterSpace() {
doTest("a = \\\n b",
"Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testMultipleBackslashes() {
doTest("[\\\n\\\n]",
"Py:LBRACKET", "Py:SPACE", "Py:LINE_BREAK", "Py:SPACE", "Py:LINE_BREAK", "Py:RBRACKET", "Py:STATEMENT_BREAK");
}
public void testIndent() {
doTest("if a:\n b",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testMultiLineIndent() {
doTest("if a:\n b\n c",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testDedent() {
doTest("if a:\n b\nc",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK",
"Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testMultiDedent() {
doTest("if a:\n b\n c\nd",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:INDENT",
"Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:DEDENT",
"Py:DEDENT", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testEmptyLine() {
doTest("if a:\n b\n \n c",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:INDENT",
"Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testEndOfLineSpace() {
doTest("if a:\n b\n c \n \n d",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK",
"Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testComment() {
doTest("if a:\n b\n #comment\nc",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:END_OF_LINE_COMMENT", "Py:DEDENT", "Py:LINE_BREAK",
"Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testIndentedComment() {
doTest("#a\n #b\n#c",
"Py:END_OF_LINE_COMMENT", "Py:LINE_BREAK", "Py:END_OF_LINE_COMMENT", "Py:LINE_BREAK", "Py:END_OF_LINE_COMMENT", "Py:STATEMENT_BREAK");
}
public void testIndentedCommentAndCode() {
doTest("if a:\n #b\n c",
"Py:IF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:END_OF_LINE_COMMENT", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testWithNotKeyword() {
doTest("with=as", "Py:IDENTIFIER", "Py:EQ", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testBytesLiteral() {
doTest("a=b'ABC'", "Py:IDENTIFIER", "Py:EQ", "Py:SINGLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testOctalLiteral() {
doTest("0o123", "Py:INTEGER_LITERAL", "Py:STATEMENT_BREAK");
}
public void testBinaryLiteral() {
doTest("0b0101", "Py:INTEGER_LITERAL", "Py:STATEMENT_BREAK");
}
private static final String THREE_QUOTES = "\"\"\"";
public void testLongString() {
doTest(THREE_QUOTES + THREE_QUOTES + "\nb=" + THREE_QUOTES + THREE_QUOTES,
"Py:DOCSTRING", "Py:STATEMENT_BREAK", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:EQ", "Py:TRIPLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testLongStringEscaped() {
doTest("''' \\'''foo\\''' ''';", "Py:DOCSTRING", "Py:SEMICOLON", "Py:STATEMENT_BREAK");
}
public void testLongStringWithLineBreak() {
doTest(THREE_QUOTES + "\\\na\n\n" + THREE_QUOTES + ";", "Py:DOCSTRING", "Py:SEMICOLON", "Py:STATEMENT_BREAK");
}
public void _testWithKeyword() throws Exception {
// processing of 'from __future__ import' is now done on parser level, so a pure lexer test won't handle
// this correctly
doTest("from __future__ import with_statement\nwith x as y", new String[] { "Py:FROM_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:SPACE", "Py:IMPORT_KEYWORD", "Py:SPACE", "Py:IDENTIFIER",
"Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:WITH_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:SPACE", "Py:AS_KEYWORD", "Py:SPACE", "Py:IDENTIFIER" });
}
public void testBackslashBeforeEmptyLine() {
doTest("x=\"a\" + \\\n \"b\" \\\n\nprint x", "Py:IDENTIFIER", "Py:EQ", "Py:SINGLE_QUOTED_STRING", "Py:SPACE", "Py:PLUS", "Py:SPACE", "Py:LINE_BREAK",
"Py:SINGLE_QUOTED_STRING", "Py:SPACE", "Py:LINE_BREAK", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:IDENTIFIER" , "Py:SPACE", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
public void testBackslashFuction() {
doTest("def test():\n" +
" print \\\n" +
"\n" +
"def test2():\n" +
" pass\n" +
"\n" +
"print 'hello'", "Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK",
"Py:LINE_BREAK", "Py:INDENT", "Py:IDENTIFIER", "Py:SPACE", "Py:LINE_BREAK", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK",
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK", "Py:IDENTIFIER", "Py:SPACE",
"Py:SINGLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testIncompleteTripleQuotedString() { // PY-1768
doTest("tmp = '''abc\nd", "Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:TRIPLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testEscapedClosingTripleApos() { // PY-1777
doTest("a=''' foo '\\''' bar '''", "Py:IDENTIFIER", "Py:EQ", "Py:TRIPLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testEscapedClosingTripleQuote() { // PY-1777
doTest("a="+THREE_QUOTES + " foo \"\\" + THREE_QUOTES + " bar " + THREE_QUOTES, "Py:IDENTIFIER", "Py:EQ", "Py:TRIPLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testEOFDocstring() { // PY-4169
doTest(THREE_QUOTES + " foo \"\\" + THREE_QUOTES + " bar " + THREE_QUOTES,"Py:DOCSTRING", "Py:STATEMENT_BREAK");
}
public void testOddNumberOfQuotes() { // PY-2802
doTest("'''foo''''", "Py:TRIPLE_QUOTED_STRING", "Py:SINGLE_QUOTED_STRING", "Py:STATEMENT_BREAK");
}
public void testDedentBeforeComment() { // PY-2209 & friends
doTest("class UserProfile:\n" +
" pass\n" +
"\n" +
"#noinspection PyUnusedLocal\n" +
"def foo(sender):\n" +
" pass",
"Py:CLASS_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:DEDENT", "Py:END_OF_LINE_COMMENT", "Py:LINE_BREAK",
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:IDENTIFIER", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK");
}
public void testDedentAfterComment() { // PY-2137
doTest("def foo():\n" +
" pass\n" +
" #comment\n",
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:END_OF_LINE_COMMENT", "Py:DEDENT", "Py:LINE_BREAK", "Py:STATEMENT_BREAK");
}
public void testIndentAtStartOfFile() { // PY-4941
doTest(" a", "Py:SPACE", "Py:INDENT", "Py:IDENTIFIER", "Py:STATEMENT_BREAK");
}
// PY-3067
public void testErrorOpenParInExpr() {
doTest("def f():\n" +
" (\n" +
"\n" +
"def g():\n" +
" pass\n",
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:LPAR", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK", // Error recovery
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK");
}
// PY-3067
public void testErrorOpenParInExprBeforeComment() {
doTest("def f():\n" +
" (\n" +
"#comment\n" +
"def g():\n" +
" pass\n",
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:LPAR", "Py:LINE_BREAK",
"Py:END_OF_LINE_COMMENT", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK", // Error recovery
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK");
}
// PY-7255
public void testDocstringInDict() {
doTest("d = {\n" +
" 'foo':\n" +
" 'bar'\n" +
" 'baz'\n" +
"}",
"Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:LBRACE", "Py:LINE_BREAK",
"Py:SINGLE_QUOTED_STRING", "Py:COLON", "Py:LINE_BREAK", "Py:SINGLE_QUOTED_STRING", "Py:LINE_BREAK",
"Py:SINGLE_QUOTED_STRING", "Py:LINE_BREAK", "Py:RBRACE", "Py:STATEMENT_BREAK");
}
// PY-3067
public void testErrorOpenParInMethod() {
doTest("class C:\n" +
" def f(self):\n" +
" (\n" +
"\n" +
" def g(self):\n" +
" pass\n",
"Py:CLASS_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:IDENTIFIER", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:LPAR", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK", "Py:DEDENT", "Py:LINE_BREAK", // Error recovery
"Py:DEF_KEYWORD", "Py:SPACE", "Py:IDENTIFIER", "Py:LPAR", "Py:IDENTIFIER", "Py:RPAR", "Py:COLON", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:INDENT", "Py:PASS_KEYWORD", "Py:STATEMENT_BREAK", "Py:DEDENT", "Py:DEDENT", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK");
}
// PY-6722
public void testBackslashAndEmptyLineInsideSquareBrackets() {
doTest("xs = [ \\\n" +
"\n" +
"]\n" +
"print(xs)\n",
"Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:LBRACKET", "Py:SPACE", "Py:LINE_BREAK",
"Py:LINE_BREAK",
"Py:RBRACKET", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:IDENTIFIER", "Py:LPAR", "Py:IDENTIFIER", "Py:RPAR", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK");
}
// PY-6722
public void testSingleBackslashLineInsideSquareBrackets() {
doTest("xs = [\n" +
"\\\n" +
"]\n",
"Py:IDENTIFIER", "Py:SPACE", "Py:EQ", "Py:SPACE", "Py:LBRACKET", "Py:LINE_BREAK",
"Py:SPACE", "Py:LINE_BREAK",
"Py:RBRACKET", "Py:STATEMENT_BREAK", "Py:LINE_BREAK",
"Py:STATEMENT_BREAK");
}
private static void doTest(String text, String... expectedTokens) {
PyLexerTestCase.doLexerTest(text, new PythonIndentingLexer(), expectedTokens);
}
}