Implement enough of a lexer and parser for MLIR to parse extfunc's without
arguments.
PiperOrigin-RevId: 201706570
diff --git a/include/mlir/Parser.h b/include/mlir/Parser.h
new file mode 100644
index 0000000..cb5f1c0
--- /dev/null
+++ b/include/mlir/Parser.h
@@ -0,0 +1,38 @@
+//===- Parser.h - MLIR Parser Library Interface -----------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file is contains the interface to the MLIR parser library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_PARSER_H
+#define MLIR_PARSER_H
+
+namespace llvm {
+ class SourceMgr;
+}
+
+namespace mlir {
+class Module;
+
+/// This parses the file specified by the indicated SourceMgr and returns an
+/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
+Module *parseSourceFile(llvm::SourceMgr &sourceMgr);
+
+} // end namespace mlir
+
+#endif // MLIR_PARSER_H
diff --git a/lib/Parser/Lexer.cpp b/lib/Parser/Lexer.cpp
new file mode 100644
index 0000000..5958658
--- /dev/null
+++ b/lib/Parser/Lexer.cpp
@@ -0,0 +1,137 @@
+//===- Lexer.cpp - MLIR Lexer Implementation ------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the lexer for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Lexer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace mlir;
+using llvm::SMLoc;
+using llvm::SourceMgr;
+
+Lexer::Lexer(llvm::SourceMgr &sourceMgr) : sourceMgr(sourceMgr) {
+ auto bufferID = sourceMgr.getMainFileID();
+ curBuffer = sourceMgr.getMemoryBuffer(bufferID)->getBuffer();
+ curPtr = curBuffer.begin();
+}
+
+/// emitError - Emit an error message and return an Token::error token.
+Token Lexer::emitError(const char *loc, const Twine &message) {
+ // TODO(clattner): If/when we want to implement a -verify mode, this will need
+ // to package up errors into SMDiagnostic and report them.
+ sourceMgr.PrintMessage(SMLoc::getFromPointer(loc), SourceMgr::DK_Error,
+ message);
+ return formToken(Token::error, loc);
+}
+
+Token Lexer::lexToken() {
+ const char *tokStart = curPtr;
+
+ switch (*curPtr++) {
+ default:
+ // Handle bare identifiers.
+ if (isalpha(curPtr[-1]))
+ return lexBareIdentifierOrKeyword(tokStart);
+
+ // Unknown character, emit an error.
+ return emitError(tokStart, "unexpected character");
+
+ case 0:
+ // This may either be a nul character in the source file or may be the EOF
+ // marker that llvm::MemoryBuffer guarantees will be there.
+ if (curPtr-1 == curBuffer.end())
+ return formToken(Token::eof, tokStart);
+
+ LLVM_FALLTHROUGH;
+ case ' ':
+ case '\t':
+ case '\n':
+ case '\r':
+ // Ignore whitespace.
+ return lexToken();
+
+ case '(': return formToken(Token::l_paren, tokStart);
+ case ')': return formToken(Token::r_paren, tokStart);
+ case '<': return formToken(Token::less, tokStart);
+ case '>': return formToken(Token::greater, tokStart);
+
+ case ';': return lexComment();
+ case '@': return lexAtIdentifier(tokStart);
+ }
+}
+
+/// Lex a comment line, starting with a semicolon.
+///
+/// TODO: add a regex for comments here and to the spec.
+///
+Token Lexer::lexComment() {
+ while (true) {
+ switch (*curPtr++) {
+ case '\n':
+ case '\r':
+ // Newline is end of comment.
+ return lexToken();
+ case 0:
+ // If this is the end of the buffer, end the comment.
+ if (curPtr-1 == curBuffer.end()) {
+ --curPtr;
+ return lexToken();
+ }
+ LLVM_FALLTHROUGH;
+ default:
+ // Skip over other characters.
+ break;
+ }
+ }
+}
+
+/// Lex a bare identifier or keyword that starts with a letter.
+///
+/// bare-id ::= letter (letter|digit)*
+///
+Token Lexer::lexBareIdentifierOrKeyword(const char *tokStart) {
+ // Match the rest of the identifier regex: [0-9a-zA-Z]*
+ while (isalpha(*curPtr) || isdigit(*curPtr))
+ ++curPtr;
+
+ // Check to see if this identifier is a keyword.
+ StringRef spelling(tokStart, curPtr-tokStart);
+
+ Token::TokenKind kind = llvm::StringSwitch<Token::TokenKind>(spelling)
+ .Case("cfgfunc", Token::kw_cfgfunc)
+ .Case("extfunc", Token::kw_extfunc)
+ .Case("mlfunc", Token::kw_mlfunc)
+ .Default(Token::bare_identifier);
+
+ return Token(kind, spelling);
+}
+
+/// Lex an '@foo' identifier.
+///
+/// function-id ::= `@` bare-id
+///
+Token Lexer::lexAtIdentifier(const char *tokStart) {
+ // These always start with a letter.
+ if (!isalpha(*curPtr++))
+ return emitError(curPtr-1, "expected letter in @ identifier");
+
+ while (isalpha(*curPtr) || isdigit(*curPtr))
+ ++curPtr;
+ return formToken(Token::at_identifier, tokStart);
+}
diff --git a/lib/Parser/Lexer.h b/lib/Parser/Lexer.h
new file mode 100644
index 0000000..5886c5c
--- /dev/null
+++ b/lib/Parser/Lexer.h
@@ -0,0 +1,65 @@
+//===- Lexer.h - MLIR Lexer Interface ---------------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file declares the MLIR Lexer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_LIB_PARSER_LEXER_H
+#define MLIR_LIB_PARSER_LEXER_H
+
+#include "Token.h"
+
+namespace llvm {
+ class SourceMgr;
+}
+
+namespace mlir {
+
+/// This class breaks up the current file into a token stream.
+class Lexer {
+ llvm::SourceMgr &sourceMgr;
+
+ StringRef curBuffer;
+ const char *curPtr;
+
+ Lexer(const Lexer&) = delete;
+ void operator=(const Lexer&) = delete;
+public:
+ explicit Lexer(llvm::SourceMgr &sourceMgr);
+
+ llvm::SourceMgr &getSourceMgr() { return sourceMgr; }
+
+ Token lexToken();
+
+private:
+ // Helpers.
+ Token formToken(Token::TokenKind kind, const char *tokStart) {
+ return Token(kind, StringRef(tokStart, curPtr-tokStart));
+ }
+
+ Token emitError(const char *loc, const Twine &message);
+
+ // Lexer implementation methods.
+ Token lexComment();
+ Token lexBareIdentifierOrKeyword(const char *tokStart);
+ Token lexAtIdentifier(const char *tokStart);
+};
+
+} // end namespace mlir
+
+#endif // MLIR_LIB_PARSER_LEXER_H
diff --git a/lib/Parser/Parser.cpp b/lib/Parser/Parser.cpp
new file mode 100644
index 0000000..abad611
--- /dev/null
+++ b/lib/Parser/Parser.cpp
@@ -0,0 +1,186 @@
+//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the parser for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Parser.h"
+#include "Lexer.h"
+#include "mlir/IR/Module.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace mlir;
+using llvm::SourceMgr;
+
+namespace {
+/// Simple enum to make code read better. Failure is "true" in a boolean
+/// context.
+enum ParseResult {
+ ParseSuccess,
+ ParseFailure
+};
+
+/// Main parser implementation.
+class Parser {
+ public:
+ Parser(llvm::SourceMgr &sourceMgr) : lex(sourceMgr), curToken(lex.lexToken()){
+ module.reset(new Module());
+ }
+
+ Module *parseModule();
+private:
+ // State.
+ Lexer lex;
+
+ // This is the next token that hasn't been consumed yet.
+ Token curToken;
+
+ // This is the result module we are parsing into.
+ std::unique_ptr<Module> module;
+
+private:
+ // Helper methods.
+
+ /// Emit an error and return failure.
+ ParseResult emitError(const Twine &message);
+
+ /// Advance the current lexer onto the next token.
+ void consumeToken() {
+ assert(curToken.isNot(Token::eof, Token::error) &&
+ "shouldn't advance past EOF or errors");
+ curToken = lex.lexToken();
+ }
+
+ /// Advance the current lexer onto the next token, asserting what the expected
+ /// current token is. This is preferred to the above method because it leads
+ /// to more self-documenting code with better checking.
+ void consumeToken(Token::TokenKind kind) {
+ assert(curToken.is(kind) && "consumed an unexpected token");
+ consumeToken();
+ }
+
+ // Type parsing.
+
+ // Top level entity parsing.
+ ParseResult parseFunctionSignature(StringRef &name);
+ ParseResult parseExtFunc();
+};
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Helper methods.
+//===----------------------------------------------------------------------===//
+
+ParseResult Parser::emitError(const Twine &message) {
+ // TODO(clattner): If/when we want to implement a -verify mode, this will need
+ // to package up errors into SMDiagnostic and report them.
+ lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error,
+ message);
+ return ParseFailure;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Type Parsing
+//===----------------------------------------------------------------------===//
+
+// ... TODO
+
+//===----------------------------------------------------------------------===//
+// Top-level entity parsing.
+//===----------------------------------------------------------------------===//
+
+/// Parse a function signature, starting with a name and including the parameter
+/// list.
+///
+/// argument-list ::= type (`,` type)* | /*empty*/
+/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)?
+///
+ParseResult Parser::parseFunctionSignature(StringRef &name) {
+ if (curToken.isNot(Token::at_identifier))
+ return emitError("expected a function identifier like '@foo'");
+
+ name = curToken.getSpelling().drop_front();
+ consumeToken(Token::at_identifier);
+
+ if (curToken.isNot(Token::l_paren))
+ return emitError("expected '(' in function signature");
+ consumeToken(Token::l_paren);
+
+ // TODO: This should actually parse the full grammar here.
+
+ if (curToken.isNot(Token::r_paren))
+ return emitError("expected ')' in function signature");
+ consumeToken(Token::r_paren);
+
+ return ParseSuccess;
+}
+
+
+/// External function declarations.
+///
+/// ext-func ::= `extfunc` function-signature
+///
+ParseResult Parser::parseExtFunc() {
+ consumeToken(Token::kw_extfunc);
+
+ StringRef name;
+ if (parseFunctionSignature(name))
+ return ParseFailure;
+
+
+ // Okay, the external function definition was parsed correctly.
+ module->functionList.push_back(new Function(name));
+ return ParseSuccess;
+}
+
+
+/// This is the top-level module parser.
+Module *Parser::parseModule() {
+ while (1) {
+ switch (curToken.getKind()) {
+ default:
+ emitError("expected a top level entity");
+ return nullptr;
+
+ // If we got to the end of the file, then we're done.
+ case Token::eof:
+ return module.release();
+
+ // If we got an error token, then the lexer already emitted an error, just
+ // stop. Someday we could introduce error recovery if there was demand for
+ // it.
+ case Token::error:
+ return nullptr;
+
+ case Token::kw_extfunc:
+ if (parseExtFunc())
+ return nullptr;
+ break;
+
+ // TODO: cfgfunc, mlfunc, affine entity declarations, etc.
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+
+/// This parses the file specified by the indicated SourceMgr and returns an
+/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
+Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr) {
+ return Parser(sourceMgr).parseModule();
+}
diff --git a/lib/Parser/Token.cpp b/lib/Parser/Token.cpp
new file mode 100644
index 0000000..551bd1e
--- /dev/null
+++ b/lib/Parser/Token.cpp
@@ -0,0 +1,37 @@
+//===- Token.cpp - MLIR Token Implementation ------------------------------===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+//
+// This file implements the Token class for the MLIR textual form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Token.h"
+using namespace mlir;
+using llvm::SMLoc;
+using llvm::SMRange;
+
+SMLoc Token::getLoc() const {
+ return SMLoc::getFromPointer(spelling.data());
+}
+
+SMLoc Token::getEndLoc() const {
+ return SMLoc::getFromPointer(spelling.data() + spelling.size());
+}
+
+SMRange Token::getLocRange() const {
+ return SMRange(getLoc(), getEndLoc());
+}
diff --git a/lib/Parser/Token.h b/lib/Parser/Token.h
new file mode 100644
index 0000000..03c967e
--- /dev/null
+++ b/lib/Parser/Token.h
@@ -0,0 +1,98 @@
+//===- Token.h - MLIR Token Interface ---------------------------*- C++ -*-===//
+//
+// Copyright 2019 The MLIR Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#ifndef MLIR_LIB_PARSER_TOKEN_H
+#define MLIR_LIB_PARSER_TOKEN_H
+
+#include "mlir/Support/LLVM.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SMLoc.h"
+
+namespace mlir {
+
+/// This represents a token in the MLIR syntax.
+class Token {
+public:
+ enum TokenKind {
+ // Markers
+ eof, error,
+
+ // Identifiers.
+ bare_identifier, // foo
+ at_identifier, // @foo
+ // TODO: @@foo, etc.
+
+ // Punctuation.
+ l_paren, r_paren, // ( )
+ less, greater, // < >
+ // TODO: More punctuation.
+
+ // Keywords.
+ kw_cfgfunc,
+ kw_extfunc,
+ kw_mlfunc,
+ // TODO: More keywords.
+ };
+
+ Token(TokenKind kind, StringRef spelling)
+ : kind(kind), spelling(spelling) {}
+
+ // Return the bytes that make up this token.
+ StringRef getSpelling() const { return spelling; }
+
+ // Token classification.
+ TokenKind getKind() const { return kind; }
+ bool is(TokenKind K) const { return kind == K; }
+
+ bool isAny(TokenKind k1, TokenKind k2) const {
+ return is(k1) || is(k2);
+ }
+
+ /// Return true if this token is one of the specified kinds.
+ template <typename ...T>
+ bool isAny(TokenKind k1, TokenKind k2, TokenKind k3, T... others) const {
+ if (is(k1))
+ return true;
+ return isAny(k2, k3, others...);
+ }
+
+ bool isNot(TokenKind k) const { return kind != k; }
+
+ /// Return true if this token isn't one of the specified kinds.
+ template <typename ...T>
+ bool isNot(TokenKind k1, TokenKind k2, T... others) const {
+ return !isAny(k1, k2, others...);
+ }
+
+
+ /// Location processing.
+ llvm::SMLoc getLoc() const;
+ llvm::SMLoc getEndLoc() const;
+ llvm::SMRange getLocRange() const;
+
+private:
+ /// Discriminator that indicates the sort of token this is.
+ TokenKind kind;
+
+ /// A reference to the entire token contents; this is always a pointer into
+ /// a memory buffer owned by the source manager.
+ StringRef spelling;
+};
+
+} // end namespace mlir
+
+#endif // MLIR_LIB_PARSER_TOKEN_H
diff --git a/test/IR/check-help-output.mlir b/test/IR/check-help-output.mlir
index 3a62414..617ae78 100644
--- a/test/IR/check-help-output.mlir
+++ b/test/IR/check-help-output.mlir
@@ -1,15 +1,7 @@
-// TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
-// statements (perhaps through using lit config substitutions).
-//
-// RUN: %S/../../mlir-opt --help | FileCheck --check-prefix=CHECKHELP %s
-// RUN: %S/../../mlir-opt %s -o - | FileCheck %s
-//
-// CHECKHELP: OVERVIEW: MLIR modular optimizer driver
+; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
+; statements (perhaps through using lit config substitutions).
+;
+; RUN: %S/../../mlir-opt --help | FileCheck %s
+;
+; CHECK: OVERVIEW: MLIR modular optimizer driver
-
-// Right now the input is completely ignored.
-extfunc @foo()
-extfunc @bar()
-
-// CHECK: extfunc @foo()
-// CHECK: extfunc @bar()
diff --git a/test/IR/parser.mlir b/test/IR/parser.mlir
new file mode 100644
index 0000000..21b6a04
--- /dev/null
+++ b/test/IR/parser.mlir
@@ -0,0 +1,15 @@
+; TODO(andydavis) Resolve relative path issue w.r.t invoking mlir-opt in RUN
+; statements (perhaps through using lit config substitutions).
+;
+; RUN: %S/../../mlir-opt %s -o - | FileCheck %s
+
+
+; CHECK: extfunc @foo()
+extfunc @foo()
+
+; CHECK: extfunc @bar()
+extfunc @bar()
+
+; CHECK: extfunc @baz()
+extfunc @baz()
+
diff --git a/tools/mlir-opt/mlir-opt.cpp b/tools/mlir-opt/mlir-opt.cpp
index dee86ed..b5a548d 100644
--- a/tools/mlir-opt/mlir-opt.cpp
+++ b/tools/mlir-opt/mlir-opt.cpp
@@ -22,7 +22,9 @@
//===----------------------------------------------------------------------===//
#include "mlir/IR/Module.h"
+#include "mlir/Parser.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/ToolOutputFile.h"
@@ -56,13 +58,27 @@
cl::ParseCommandLineOptions(argc, argv, "MLIR modular optimizer driver\n");
- // Instantiate an IR object.
- Module m;
- m.functionList.push_back(new Function("foo"));
- m.functionList.push_back(new Function("bar"));
+ // Set up the input file.
+ auto fileOrErr = MemoryBuffer::getFileOrSTDIN(inputFilename);
+ if (std::error_code error = fileOrErr.getError()) {
+ llvm::errs() << argv[0] << ": could not open input file '" << inputFilename
+ << "': " << error.message() << "\n";
+ return 1;
+ }
+
+ // Tell sourceMgr about this buffer, which is what the parser will pick up.
+ SourceMgr sourceMgr;
+ sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), SMLoc());
+
+ // Parse the input file and emit any errors.
+ std::unique_ptr<Module> module(parseSourceFile(sourceMgr));
+ if (!module) return 1;
// Print the output.
auto output = getOutputStream();
- m.print(output->os());
+ module->print(output->os());
output->keep();
+
+ // Success.
+ return 0;
}