blob: 7e3076fca5be55678e91589094c71bc6662c949b [file] [log] [blame]
//===- Parser.cpp - MLIR Parser Implementation ----------------------------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the parser for the MLIR textual form.
//
//===----------------------------------------------------------------------===//
#include "mlir/Parser.h"
#include "Lexer.h"
#include "mlir/IR/Module.h"
#include "mlir/IR/Types.h"
#include "llvm/Support/SourceMgr.h"
using namespace mlir;
using llvm::SourceMgr;
namespace {
/// Simple enum to make code read better in cases that would otherwise return a
/// bool value. Failure is "true" in a boolean context.
enum ParseResult {
ParseSuccess,
ParseFailure
};
/// Main parser implementation.
class Parser {
public:
Parser(llvm::SourceMgr &sourceMgr, MLIRContext *context)
: context(context), lex(sourceMgr), curToken(lex.lexToken()){
module.reset(new Module());
}
Module *parseModule();
private:
// State.
MLIRContext *const context;
// The lexer for the source file we're parsing.
Lexer lex;
// This is the next token that hasn't been consumed yet.
Token curToken;
// This is the result module we are parsing into.
std::unique_ptr<Module> module;
private:
// Helper methods.
/// Emit an error and return failure.
ParseResult emitError(const Twine &message);
/// Advance the current lexer onto the next token.
void consumeToken() {
assert(curToken.isNot(Token::eof, Token::error) &&
"shouldn't advance past EOF or errors");
curToken = lex.lexToken();
}
/// Advance the current lexer onto the next token, asserting what the expected
/// current token is. This is preferred to the above method because it leads
/// to more self-documenting code with better checking.
void consumeToken(Token::TokenKind kind) {
assert(curToken.is(kind) && "consumed an unexpected token");
consumeToken();
}
/// If the current token has the specified kind, consume it and return true.
/// If not, return false.
bool consumeIf(Token::TokenKind kind) {
if (curToken.isNot(kind))
return false;
consumeToken(kind);
return true;
}
ParseResult parseCommaSeparatedList(Token::TokenKind rightToken,
const std::function<ParseResult()> &parseElement,
bool allowEmptyList = true);
// We have two forms of parsing methods - those that return a non-null
// pointer on success, and those that return a ParseResult to indicate whether
// they returned a failure. The second class fills in by-reference arguments
// as the results of their action.
// Type parsing.
PrimitiveType *parsePrimitiveType();
Type *parseElementType();
VectorType *parseVectorType();
ParseResult parseDimensionListRanked(SmallVectorImpl<int> &dimensions);
Type *parseTensorType();
Type *parseMemRefType();
Type *parseFunctionType();
Type *parseType();
ParseResult parseTypeList(SmallVectorImpl<Type*> &elements);
// Top level entity parsing.
ParseResult parseFunctionSignature(StringRef &name, FunctionType *&type);
ParseResult parseExtFunc();
};
} // end anonymous namespace
//===----------------------------------------------------------------------===//
// Helper methods.
//===----------------------------------------------------------------------===//
ParseResult Parser::emitError(const Twine &message) {
// If we hit a parse error in response to a lexer error, then the lexer
// already emitted an error.
if (curToken.is(Token::error))
return ParseFailure;
// TODO(clattner): If/when we want to implement a -verify mode, this will need
// to package up errors into SMDiagnostic and report them.
lex.getSourceMgr().PrintMessage(curToken.getLoc(), SourceMgr::DK_Error,
message);
return ParseFailure;
}
/// Parse a comma-separated list of elements, terminated with an arbitrary
/// token. This allows empty lists if allowEmptyList is true.
///
/// abstract-list ::= rightToken // if allowEmptyList == true
/// abstract-list ::= element (',' element)* rightToken
///
ParseResult Parser::
parseCommaSeparatedList(Token::TokenKind rightToken,
const std::function<ParseResult()> &parseElement,
bool allowEmptyList) {
// Handle the empty case.
if (curToken.is(rightToken)) {
if (!allowEmptyList)
return emitError("expected list element");
consumeToken(rightToken);
return ParseSuccess;
}
// Non-empty case starts with an element.
if (parseElement())
return ParseFailure;
// Otherwise we have a list of comma separated elements.
while (consumeIf(Token::comma)) {
if (parseElement())
return ParseFailure;
}
// Consume the end character.
if (!consumeIf(rightToken))
return emitError("expected ',' or ')'");
return ParseSuccess;
}
//===----------------------------------------------------------------------===//
// Type Parsing
//===----------------------------------------------------------------------===//
/// Parse the low-level fixed dtypes in the system.
///
/// primitive-type
/// ::= `f16` | `bf16` | `f32` | `f64` // Floating point
/// | `i1` | `i8` | `i16` | `i32` | `i64` // Sized integers
/// | `int`
///
PrimitiveType *Parser::parsePrimitiveType() {
switch (curToken.getKind()) {
default:
return (emitError("expected type"), nullptr);
case Token::kw_bf16:
consumeToken(Token::kw_bf16);
return Type::getBF16(context);
case Token::kw_f16:
consumeToken(Token::kw_f16);
return Type::getF16(context);
case Token::kw_f32:
consumeToken(Token::kw_f32);
return Type::getF32(context);
case Token::kw_f64:
consumeToken(Token::kw_f64);
return Type::getF64(context);
case Token::kw_i1:
consumeToken(Token::kw_i1);
return Type::getI1(context);
case Token::kw_i8:
consumeToken(Token::kw_i8);
return Type::getI8(context);
case Token::kw_i16:
consumeToken(Token::kw_i16);
return Type::getI16(context);
case Token::kw_i32:
consumeToken(Token::kw_i32);
return Type::getI32(context);
case Token::kw_i64:
consumeToken(Token::kw_i64);
return Type::getI64(context);
case Token::kw_int:
consumeToken(Token::kw_int);
return Type::getInt(context);
}
}
/// Parse the element type of a tensor or memref type.
///
/// element-type ::= primitive-type | vector-type
///
Type *Parser::parseElementType() {
if (curToken.is(Token::kw_vector))
return parseVectorType();
return parsePrimitiveType();
}
/// Parse a vector type.
///
/// vector-type ::= `vector` `<` const-dimension-list primitive-type `>`
/// const-dimension-list ::= (integer-literal `x`)+
///
VectorType *Parser::parseVectorType() {
consumeToken(Token::kw_vector);
if (!consumeIf(Token::less))
return (emitError("expected '<' in vector type"), nullptr);
if (curToken.isNot(Token::integer))
return (emitError("expected dimension size in vector type"), nullptr);
SmallVector<unsigned, 4> dimensions;
while (curToken.is(Token::integer)) {
// Make sure this integer value is in bound and valid.
auto dimension = curToken.getUnsignedIntegerValue();
if (!dimension.hasValue())
return (emitError("invalid dimension in vector type"), nullptr);
dimensions.push_back(dimension.getValue());
consumeToken(Token::integer);
// Make sure we have an 'x' or something like 'xbf32'.
if (curToken.isNot(Token::bare_identifier) ||
curToken.getSpelling()[0] != 'x')
return (emitError("expected 'x' in vector dimension list"), nullptr);
// If we had a prefix of 'x', lex the next token immediately after the 'x'.
if (curToken.getSpelling().size() != 1)
lex.resetPointer(curToken.getSpelling().data()+1);
// Consume the 'x'.
consumeToken(Token::bare_identifier);
}
// Parse the element type.
auto *elementType = parsePrimitiveType();
if (!elementType)
return nullptr;
if (!consumeIf(Token::greater))
return (emitError("expected '>' in vector type"), nullptr);
return VectorType::get(dimensions, elementType);
}
/// Parse a dimension list of a tensor or memref type. This populates the
/// dimension list, returning -1 for the '?' dimensions.
///
/// dimension-list-ranked ::= (dimension `x`)*
/// dimension ::= `?` | integer-literal
///
ParseResult Parser::parseDimensionListRanked(SmallVectorImpl<int> &dimensions) {
while (curToken.isAny(Token::integer, Token::question)) {
if (consumeIf(Token::question)) {
dimensions.push_back(-1);
} else {
// Make sure this integer value is in bound and valid.
auto dimension = curToken.getUnsignedIntegerValue();
if (!dimension.hasValue() || (int)dimension.getValue() < 0)
return emitError("invalid dimension");
dimensions.push_back((int)dimension.getValue());
consumeToken(Token::integer);
}
// Make sure we have an 'x' or something like 'xbf32'.
if (curToken.isNot(Token::bare_identifier) ||
curToken.getSpelling()[0] != 'x')
return emitError("expected 'x' in dimension list");
// If we had a prefix of 'x', lex the next token immediately after the 'x'.
if (curToken.getSpelling().size() != 1)
lex.resetPointer(curToken.getSpelling().data()+1);
// Consume the 'x'.
consumeToken(Token::bare_identifier);
}
return ParseSuccess;
}
/// Parse a tensor type.
///
/// tensor-type ::= `tensor` `<` dimension-list element-type `>`
/// dimension-list ::= dimension-list-ranked | `??`
///
Type *Parser::parseTensorType() {
consumeToken(Token::kw_tensor);
if (!consumeIf(Token::less))
return (emitError("expected '<' in tensor type"), nullptr);
bool isUnranked;
SmallVector<int, 4> dimensions;
if (consumeIf(Token::questionquestion)) {
isUnranked = true;
} else {
isUnranked = false;
if (parseDimensionListRanked(dimensions))
return nullptr;
}
// Parse the element type.
auto elementType = parseElementType();
if (!elementType)
return nullptr;
if (!consumeIf(Token::greater))
return (emitError("expected '>' in tensor type"), nullptr);
// FIXME: Add an IR representation for tensor types.
return Type::getI1(context);
}
/// Parse a memref type.
///
/// memref-type ::= `memref` `<` dimension-list-ranked element-type
/// (`,` semi-affine-map-composition)? (`,` memory-space)? `>`
///
/// semi-affine-map-composition ::= (semi-affine-map `,` )* semi-affine-map
/// memory-space ::= integer-literal /* | TODO: address-space-id */
///
Type *Parser::parseMemRefType() {
consumeToken(Token::kw_memref);
if (!consumeIf(Token::less))
return (emitError("expected '<' in memref type"), nullptr);
SmallVector<int, 4> dimensions;
if (parseDimensionListRanked(dimensions))
return nullptr;
// Parse the element type.
auto elementType = parseElementType();
if (!elementType)
return nullptr;
// TODO: Parse semi-affine-map-composition.
// TODO: Parse memory-space.
if (!consumeIf(Token::greater))
return (emitError("expected '>' in memref type"), nullptr);
// FIXME: Add an IR representation for memref types.
return Type::getI1(context);
}
/// Parse a function type.
///
/// function-type ::= type-list-parens `->` type-list
///
Type *Parser::parseFunctionType() {
assert(curToken.is(Token::l_paren));
SmallVector<Type*, 4> arguments;
if (parseTypeList(arguments))
return nullptr;
if (!consumeIf(Token::arrow))
return (emitError("expected '->' in function type"), nullptr);
SmallVector<Type*, 4> results;
if (parseTypeList(results))
return nullptr;
return FunctionType::get(arguments, results, context);
}
/// Parse an arbitrary type.
///
/// type ::= primitive-type
/// | vector-type
/// | tensor-type
/// | memref-type
/// | function-type
/// element-type ::= primitive-type | vector-type
///
Type *Parser::parseType() {
switch (curToken.getKind()) {
case Token::kw_memref: return parseMemRefType();
case Token::kw_tensor: return parseTensorType();
case Token::kw_vector: return parseVectorType();
case Token::l_paren: return parseFunctionType();
default:
return parsePrimitiveType();
}
}
/// Parse a "type list", which is a singular type, or a parenthesized list of
/// types.
///
/// type-list ::= type-list-parens | type
/// type-list-parens ::= `(` `)`
/// | `(` type (`,` type)* `)`
///
ParseResult Parser::parseTypeList(SmallVectorImpl<Type*> &elements) {
auto parseElt = [&]() -> ParseResult {
auto elt = parseType();
elements.push_back(elt);
return elt ? ParseSuccess : ParseFailure;
};
// If there is no parens, then it must be a singular type.
if (!consumeIf(Token::l_paren))
return parseElt();
if (parseCommaSeparatedList(Token::r_paren, parseElt))
return ParseFailure;
return ParseSuccess;
}
//===----------------------------------------------------------------------===//
// Top-level entity parsing.
//===----------------------------------------------------------------------===//
/// Parse a function signature, starting with a name and including the parameter
/// list.
///
/// argument-list ::= type (`,` type)* | /*empty*/
/// function-signature ::= function-id `(` argument-list `)` (`->` type-list)?
///
ParseResult Parser::parseFunctionSignature(StringRef &name,
FunctionType *&type) {
if (curToken.isNot(Token::at_identifier))
return emitError("expected a function identifier like '@foo'");
name = curToken.getSpelling().drop_front();
consumeToken(Token::at_identifier);
if (curToken.isNot(Token::l_paren))
return emitError("expected '(' in function signature");
SmallVector<Type*, 4> arguments;
if (parseTypeList(arguments))
return ParseFailure;
// Parse the return type if present.
SmallVector<Type*, 4> results;
if (consumeIf(Token::arrow)) {
if (parseTypeList(results))
return ParseFailure;
}
type = FunctionType::get(arguments, results, context);
return ParseSuccess;
}
/// External function declarations.
///
/// ext-func ::= `extfunc` function-signature
///
ParseResult Parser::parseExtFunc() {
consumeToken(Token::kw_extfunc);
StringRef name;
FunctionType *type = nullptr;
if (parseFunctionSignature(name, type))
return ParseFailure;
// Okay, the external function definition was parsed correctly.
module->functionList.push_back(new Function(name, type));
return ParseSuccess;
}
/// This is the top-level module parser.
Module *Parser::parseModule() {
while (1) {
switch (curToken.getKind()) {
default:
emitError("expected a top level entity");
return nullptr;
// If we got to the end of the file, then we're done.
case Token::eof:
return module.release();
// If we got an error token, then the lexer already emitted an error, just
// stop. Someday we could introduce error recovery if there was demand for
// it.
case Token::error:
return nullptr;
case Token::kw_extfunc:
if (parseExtFunc())
return nullptr;
break;
// TODO: cfgfunc, mlfunc, affine entity declarations, etc.
}
}
}
//===----------------------------------------------------------------------===//
/// This parses the file specified by the indicated SourceMgr and returns an
/// MLIR module if it was valid. If not, it emits diagnostics and returns null.
Module *mlir::parseSourceFile(llvm::SourceMgr &sourceMgr, MLIRContext *context){
return Parser(sourceMgr, context).parseModule();
}