blob: 755a2c20d70522e365861d954401b69a75d4ff4d [file] [log] [blame]
//===- GPUDialect.cpp - MLIR Dialect for GPU Kernels implementation -------===//
//
// Copyright 2019 The MLIR Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================
//
// This file implements the GPU kernel-related dialect and its operations.
//
//===----------------------------------------------------------------------===//
#include "mlir/GPU/GPUDialect.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/Module.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/StandardTypes.h"
using namespace mlir;
using namespace mlir::gpu;
StringRef GPUDialect::getDialectName() { return "gpu"; }
GPUDialect::GPUDialect(MLIRContext *context)
: Dialect(getDialectName(), context) {
addOperations<LaunchOp, LaunchFuncOp,
#define GET_OP_LIST
#include "mlir/GPU/GPUOps.cpp.inc"
>();
}
#define GET_OP_CLASSES
#include "mlir/GPU/GPUOps.cpp.inc"
//===----------------------------------------------------------------------===//
// LaunchOp
//===----------------------------------------------------------------------===//
static SmallVector<Type, 4> getValueTypes(ArrayRef<Value *> values) {
SmallVector<Type, 4> types;
types.reserve(values.size());
for (Value *v : values)
types.push_back(v->getType());
return types;
}
void LaunchOp::build(Builder *builder, OperationState *result, Value *gridSizeX,
Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
Value *blockSizeY, Value *blockSizeZ,
ArrayRef<Value *> operands) {
// Add grid and block sizes as op operands, followed by the data operands.
result->addOperands(
{gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
result->addOperands(operands);
// Create a kernel body region with kNumConfigRegionAttributes + N arguments,
// where the first kNumConfigRegionAttributes arguments have `index` type and
// the rest have the same types as the data operands.
Region *kernelRegion = result->addRegion();
Block *body = new Block();
body->addArguments(
std::vector<Type>(kNumConfigRegionAttributes, builder->getIndexType()));
body->addArguments(getValueTypes(operands));
kernelRegion->push_back(body);
}
Region &LaunchOp::getBody() { return getOperation()->getRegion(0); }
KernelDim3 LaunchOp::getBlockIds() {
assert(!getBody().getBlocks().empty() && "Function body must not be empty.");
auto args = getBody().getBlocks().front().getArguments();
return KernelDim3{args[0], args[1], args[2]};
}
KernelDim3 LaunchOp::getThreadIds() {
assert(!getBody().getBlocks().empty() && "Function body must not be empty.");
auto args = getBody().getBlocks().front().getArguments();
return KernelDim3{args[3], args[4], args[5]};
}
KernelDim3 LaunchOp::getGridSize() {
assert(!getBody().getBlocks().empty() && "Function body must not be empty.");
auto args = getBody().getBlocks().front().getArguments();
return KernelDim3{args[6], args[7], args[8]};
}
KernelDim3 LaunchOp::getBlockSize() {
assert(!getBody().getBlocks().empty() && "Function body must not be empty.");
auto args = getBody().getBlocks().front().getArguments();
return KernelDim3{args[9], args[10], args[11]};
}
Operation::operand_range LaunchOp::getKernelOperandValues() {
return {getOperation()->operand_begin() + kNumConfigOperands,
getOperation()->operand_end()};
}
void LaunchOp::getKernelOperandTypes(SmallVectorImpl<Type> &out) {
out.reserve(getNumOperands() - kNumConfigOperands + out.size());
for (unsigned i = kNumConfigOperands; i < getNumOperands(); ++i) {
out.push_back(getOperand(i)->getType());
}
}
KernelDim3 LaunchOp::getGridSizeOperandValues() {
return KernelDim3{getOperand(0), getOperand(1), getOperand(2)};
}
KernelDim3 LaunchOp::getBlockSizeOperandValues() {
return KernelDim3{getOperand(3), getOperand(4), getOperand(5)};
}
LogicalResult LaunchOp::verify() {
// Kernel launch takes kNumConfigOperands leading operands for grid/block
// sizes and transforms them into kNumConfigRegionAttributes region arguments
// for block/thread identifiers and grid/block sizes.
if (!getBody().empty()) {
Block &entryBlock = getBody().front();
if (entryBlock.getNumArguments() != kNumConfigOperands + getNumOperands())
return emitError("unexpected number of region arguments");
}
return success();
}
// Pretty-print the kernel grid/block size assignment as
// (%iter-x, %iter-y, %iter-z) in
// (%size-x = %ssa-use, %size-y = %ssa-use, %size-z = %ssa-use)
// where %size-* and %iter-* will correspond to the body region arguments.
static void printSizeAssignment(OpAsmPrinter *p, KernelDim3 size,
ArrayRef<Value *> operands, KernelDim3 ids) {
*p << '(' << *ids.x << ", " << *ids.y << ", " << *ids.z << ") in (";
*p << *size.x << " = " << *operands[0] << ", ";
*p << *size.y << " = " << *operands[1] << ", ";
*p << *size.z << " = " << *operands[2] << ')';
}
void LaunchOp::print(OpAsmPrinter *p) {
SmallVector<Value *, 12> operandContainer(operand_begin(), operand_end());
ArrayRef<Value *> operands(operandContainer);
// Print the launch configuration.
*p << getOperationName() << ' ' << getBlocksKeyword();
printSizeAssignment(p, getGridSize(), operands.take_front(3), getBlockIds());
*p << ' ' << getThreadsKeyword();
printSizeAssignment(p, getBlockSize(), operands.slice(3, 3), getThreadIds());
// From now on, the first kNumConfigOperands operands corresponding to grid
// and block sizes are irrelevant, so we can drop them.
operands = operands.drop_front(kNumConfigOperands);
// Print the data argument remapping.
if (!getBody().empty() && !operands.empty()) {
*p << ' ' << getArgsKeyword() << '(';
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
if (i != 0)
*p << ", ";
*p << *getBody().front().getArgument(kNumConfigRegionAttributes + i)
<< " = " << *operands[i];
}
*p << ") ";
}
// Print the types of data arguments.
if (!operands.empty()) {
*p << ": ";
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
if (i != 0)
*p << ", ";
*p << operands[i]->getType();
}
}
p->printRegion(getBody(), /*printEntryBlockArgs=*/false);
p->printOptionalAttrDict(getAttrs());
}
// Parse the size assignment blocks for blocks and threads. These have the form
// (%region_arg, %region_arg, %region_arg) in
// (%region_arg = %operand, %region_arg = %operand, %region_arg = %operand)
// where %region_arg are percent-identifiers for the region arguments to be
// introduced futher (SSA defs), and %operand are percent-identifiers for the
// SSA value uses.
static ParseResult
parseSizeAssignment(OpAsmParser *parser,
MutableArrayRef<OpAsmParser::OperandType> sizes,
MutableArrayRef<OpAsmParser::OperandType> regionSizes,
MutableArrayRef<OpAsmParser::OperandType> indices) {
if (parser->parseLParen() || parser->parseRegionArgument(indices[0]) ||
parser->parseComma() || parser->parseRegionArgument(indices[1]) ||
parser->parseComma() || parser->parseRegionArgument(indices[2]) ||
parser->parseRParen() || parser->parseKeyword("in") ||
parser->parseLParen())
return failure();
for (int i = 0; i < 3; ++i) {
if (i != 0 && parser->parseComma())
return failure();
if (parser->parseRegionArgument(regionSizes[i]) || parser->parseEqual() ||
parser->parseOperand(sizes[i]))
return failure();
}
return parser->parseRParen();
}
// Parses a Launch operation.
// operation ::= `gpu.launch` `blocks` `(` ssa-id-list `)` `in` ssa-reassignment
// `threads` `(` ssa-id-list `)` `in` ssa-reassignment
// (`args` ssa-reassignment `:` type-list)?
// region attr-dict?
// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
ParseResult LaunchOp::parse(OpAsmParser *parser, OperationState *result) {
// Sizes of the grid and block.
SmallVector<OpAsmParser::OperandType, kNumConfigOperands> sizes(
kNumConfigOperands);
MutableArrayRef<OpAsmParser::OperandType> sizesRef(sizes);
// Actual (data) operands passed to the kernel.
SmallVector<OpAsmParser::OperandType, 4> dataOperands;
// Region arguments to be created.
SmallVector<OpAsmParser::OperandType, 16> regionArgs(
kNumConfigRegionAttributes);
MutableArrayRef<OpAsmParser::OperandType> regionArgsRef(regionArgs);
// Parse the size assignment segments: the first segment assigns grid siezs
// and defines values for block identifiers; the second segment assigns block
// sies and defines values for thread identifiers. In the region argument
// list, identifiers preceed sizes, and block-related values preceed
// thread-related values.
if (parser->parseKeyword(getBlocksKeyword().data()) ||
parseSizeAssignment(parser, sizesRef.take_front(3),
regionArgsRef.slice(6, 3),
regionArgsRef.slice(0, 3)) ||
parser->parseKeyword(getThreadsKeyword().data()) ||
parseSizeAssignment(parser, sizesRef.drop_front(3),
regionArgsRef.slice(9, 3),
regionArgsRef.slice(3, 3)) ||
parser->resolveOperands(sizes, parser->getBuilder().getIndexType(),
result->operands))
return failure();
// If kernel argument renaming segment is present, parse it. When present,
// the segment should have at least one element. If this segment is present,
// so is the trailing type list. Parse it as well and use the parsed types
// to resolve the operands passed to the kernel arguments.
SmallVector<Type, 4> dataTypes;
if (!parser->parseOptionalKeyword(getArgsKeyword().data())) {
llvm::SMLoc argsLoc;
regionArgs.push_back({});
dataOperands.push_back({});
if (parser->getCurrentLocation(&argsLoc) || parser->parseLParen() ||
parser->parseRegionArgument(regionArgs.back()) ||
parser->parseEqual() || parser->parseOperand(dataOperands.back()))
return failure();
while (!parser->parseOptionalComma()) {
regionArgs.push_back({});
dataOperands.push_back({});
if (parser->parseRegionArgument(regionArgs.back()) ||
parser->parseEqual() || parser->parseOperand(dataOperands.back()))
return failure();
}
if (parser->parseRParen() || parser->parseColonTypeList(dataTypes) ||
parser->resolveOperands(dataOperands, dataTypes, argsLoc,
result->operands))
return failure();
}
// Introduce the body region and parse it. The region has
// kNumConfigRegionAttributes leading arguments that correspond to
// block/thread identifiers and grid/block sizes, all of the `index` type.
// Follow the actual kernel arguments.
Type index = parser->getBuilder().getIndexType();
dataTypes.insert(dataTypes.begin(), kNumConfigRegionAttributes, index);
Region *body = result->addRegion();
return failure(parser->parseRegion(*body, regionArgs, dataTypes) ||
parser->parseOptionalAttributeDict(result->attributes));
}
//===----------------------------------------------------------------------===//
// LaunchFuncOp
//===----------------------------------------------------------------------===//
void LaunchFuncOp::build(Builder *builder, OperationState *result,
Function *kernelFunc, Value *gridSizeX,
Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
Value *blockSizeY, Value *blockSizeZ,
ArrayRef<Value *> kernelOperands) {
// Add grid and block sizes as op operands, followed by the data operands.
result->addOperands(
{gridSizeX, gridSizeY, gridSizeZ, blockSizeX, blockSizeY, blockSizeZ});
result->addOperands(kernelOperands);
result->addAttribute(getKernelAttrName(),
builder->getFunctionAttr(kernelFunc));
}
void LaunchFuncOp::build(Builder *builder, OperationState *result,
Function *kernelFunc, KernelDim3 gridSize,
KernelDim3 blockSize,
ArrayRef<Value *> kernelOperands) {
build(builder, result, kernelFunc, gridSize.x, gridSize.y, gridSize.z,
blockSize.x, blockSize.y, blockSize.z, kernelOperands);
}
StringRef LaunchFuncOp::kernel() {
return getAttrOfType<FunctionAttr>(getKernelAttrName()).getValue();
}
unsigned LaunchFuncOp::getNumKernelOperands() {
return getNumOperands() - kNumConfigOperands;
}
Value *LaunchFuncOp::getKernelOperand(unsigned i) {
return getOperation()->getOperand(i + kNumConfigOperands);
}
LogicalResult LaunchFuncOp::verify() {
auto kernelAttr = this->getAttr(getKernelAttrName());
if (!kernelAttr) {
return emitOpError("attribute 'kernel' must be specified");
} else if (!kernelAttr.isa<FunctionAttr>()) {
return emitOpError("attribute 'kernel' must be a function");
}
auto *module = getOperation()->getFunction()->getModule();
Function *kernelFunc = module->getNamedFunction(kernel());
if (!kernelFunc)
return emitError() << "kernel function '" << kernelAttr << "' is undefined";
if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
GPUDialect::getKernelFuncAttrName())) {
return emitError("kernel function is missing the '")
<< GPUDialect::getKernelFuncAttrName() << "' attribute";
}
unsigned numKernelFuncArgs = kernelFunc->getNumArguments();
if (getNumKernelOperands() != numKernelFuncArgs) {
return emitOpError("got ")
<< getNumKernelOperands() << " kernel operands but expected "
<< numKernelFuncArgs;
}
for (unsigned i = 0; i < numKernelFuncArgs; ++i) {
if (getKernelOperand(i)->getType() !=
kernelFunc->getArgument(i)->getType()) {
return emitOpError("type of function argument ")
<< i << " does not match";
}
}
return success();
}