Move GPU::LaunchOp to ODS. NFC.
Move the definition of the GPU launch opreation from hand-rolled C++ code to
ODS framework. This only does the moves, a follow-up is necessary to clean up
users of custom functions that could be auto-generated by ODS.
PiperOrigin-RevId: 284261856
Change-Id: I3bc93035fc9364c2992ea90129001697fca70cf0
diff --git a/third_party/mlir/g3doc/Dialects/GPU.md b/third_party/mlir/g3doc/Dialects/GPU.md
index d34ce18..bcb677d 100644
--- a/third_party/mlir/g3doc/Dialects/GPU.md
+++ b/third_party/mlir/g3doc/Dialects/GPU.md
@@ -69,77 +69,6 @@
%gDimZ = "gpu.grid_dim"() {dimension = "z"} : () -> (index)
```
-### `gpu.launch`
-
-Launch a kernel on the specified grid of thread blocks. The body of the kernel
-is defined by the single region that this operation contains. The operation
-takes at least six operands, with first three operands being grid sizes along
-x,y,z dimensions, the following three arguments being block sizes along x,y,z
-dimension, and the remaining operands are arguments of the kernel. When a
-lower-dimensional kernel is required, unused sizes must be explicitly set to
-`1`.
-
-The body region has at least _twelve_ arguments, grouped as follows:
-
-- three arguments that contain block identifiers along x,y,z dimensions;
-- three arguments that contain thread identifiers along x,y,z dimensions;
-- operands of the `gpu.launch` operation as is, including six leading operands
- for grid and block sizes.
-
-Operations inside the body region, and any operations in the nested regions, are
-_not_ allowed to use values defined outside the _body_ region, as if this region
-was a function. If necessary, values must be passed as kernel arguments into the
-body region. Nested regions inside the kernel body are allowed to use values
-defined in their ancestor regions as long as they don't cross the kernel body
-region boundary.
-
-Syntax:
-
-``` {.ebnf}
-operation ::= `gpu.launch` `block` `(` ssa-id-list `)` `in` ssa-reassignment
- `threads` `(` ssa-id-list `)` `in` ssa-reassignment
- (`args` ssa-reassignment `:` type-list)?
- region attr-dict?
-ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-```
-
-Example:
-
-```mlir {.mlir}
-gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
- threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
- args(%arg0 = %6, %arg1 = 7) : f32, memref<?xf32, 1> {
- // Block and thread identifiers, as well as block/grid sizes are
- // immediately usable inside body region.
- "some_op"(%bx, %tx) : (index, index) -> ()
- %42 = load %arg1[%bx] : memref<?xf32, 1>
-}
-
-// Generic syntax explains how the pretty syntax maps to the IR structure.
-"gpu.launch"(%cst, %cst, %c1, // Grid sizes.
- %cst, %c1, %c1, // Block sizes.
- %arg0, %arg1) // Actual arguments.
- {/*attributes*/}
- // All sizes and identifiers have "index" size.
- : (index, index, index, index, index, index, f32, memref<?xf32, 1>) -> () {
-// The operation passes block and thread identifiers, followed by grid and block
-// sizes, followed by actual arguments to the entry block of the region.
-^bb0(%bx : index, %by : index, %bz : index,
- %tx : index, %ty : index, %tz : index,
- %num_bx : index, %num_by : index, %num_bz : index,
- %num_tx : index, %num_ty : index, %num_tz : index,
- %arg0 : f32, %arg1 : memref<?xf32, 1>):
- "some_op"(%bx, %tx) : (index, index) -> ()
- %3 = "std.load"(%arg1, %bx) : (memref<?xf32, 1>, index) -> f32
-}
-```
-
-Rationale: using operation/block arguments gives analyses a clear way of
-understanding that a value has additional semantics (e.g., we will need to know
-what value corresponds to threadIdx.x for coalescing). We can recover these
-properties by analyzing the operations producing values, but it is easier just
-to have that information by construction.
-
### `gpu.launch_func`
Launch a kernel function on the specified grid of thread blocks. `gpu.launch`
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h b/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
index 194dd9c..3d63a45 100644
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
+++ b/third_party/mlir/include/mlir/Dialect/GPU/GPUDialect.h
@@ -77,74 +77,6 @@
Value *z;
};
-/// GPU kernel launch operation. Takes a 3D grid of thread blocks as leading
-/// operands, followed by kernel data operands. Has one region representing
-/// the kernel to be executed. This region is not allowed to use values defined
-/// outside it.
-class LaunchOp : public Op<LaunchOp, OpTrait::AtLeastNOperands<6>::Impl,
- OpTrait::ZeroResult, OpTrait::IsIsolatedFromAbove> {
-public:
- using Op::Op;
-
- static void build(Builder *builder, OperationState &result, Value *gridSizeX,
- Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,
- Value *blockSizeY, Value *blockSizeZ,
- ArrayRef<Value *> operands);
-
- /// Get the kernel region.
- Region &getBody();
-
- /// Get the SSA values corresponding to kernel block identifiers.
- KernelDim3 getBlockIds();
- /// Get the SSA values corresponding to kernel thread identifiers.
- KernelDim3 getThreadIds();
- /// Get the SSA values corresponding to kernel grid size.
- KernelDim3 getGridSize();
- /// Get the SSA values corresponding to kernel block size.
- KernelDim3 getBlockSize();
- /// Get the operand values passed as kernel arguments.
- operand_range getKernelOperandValues();
- /// Get the operand types passed as kernel arguments.
- operand_type_range getKernelOperandTypes();
-
- /// Get the SSA values passed as operands to specify the grid size.
- KernelDim3 getGridSizeOperandValues();
- /// Get the SSA values passed as operands to specify the block size.
- KernelDim3 getBlockSizeOperandValues();
-
- /// Get the SSA values of the kernel arguments.
- llvm::iterator_range<Block::args_iterator> getKernelArguments();
-
- LogicalResult verify();
-
- /// Custom syntax support.
- void print(OpAsmPrinter &p);
- static ParseResult parse(OpAsmParser &parser, OperationState &result);
-
- static StringRef getOperationName() { return "gpu.launch"; }
-
- /// Erase the `index`-th kernel argument. Both the entry block argument and
- /// the operand will be dropped. The block argument must not have any uses.
- void eraseKernelArgument(unsigned index);
-
- /// Append canonicalization patterns to `results`.
- static void getCanonicalizationPatterns(OwningRewritePatternList &results,
- MLIRContext *context);
-
-private:
- static StringRef getBlocksKeyword() { return "blocks"; }
- static StringRef getThreadsKeyword() { return "threads"; }
- static StringRef getArgsKeyword() { return "args"; }
-
- /// The number of launch configuration operands, placed at the leading
- /// positions of the operand list.
- static constexpr unsigned kNumConfigOperands = 6;
-
- /// The number of region attributes containing the launch configuration,
- /// placed in the leading positions of the argument list.
- static constexpr unsigned kNumConfigRegionAttributes = 12;
-};
-
/// Operation to launch a kernel given as outlined function.
class LaunchFuncOp : public Op<LaunchFuncOp, OpTrait::AtLeastNOperands<6>::Impl,
OpTrait::ZeroResult> {
diff --git a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td b/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
index fcaa77c..9b4e218 100644
--- a/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/third_party/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -181,6 +181,146 @@
let parser = [{ return parseGPUFuncOp(parser, result); }];
}
+def GPU_LaunchOp : GPU_Op<"launch", [IsolatedFromAbove]>,
+ Arguments<(ins Index:$gridSizeX, Index:$gridSizeY, Index:$gridSizeZ,
+ Index:$blockSizeX, Index:$blockSizeY, Index:$blockSizeZ,
+ Variadic<AnyType>:$operands)>,
+ Results<(outs)> {
+ let summary = "GPU kernel launch operation";
+
+ let description = [{
+ Launch a kernel on the specified grid of thread blocks. The body of the
+ kernel is defined by the single region that this operation contains. The
+ operation takes at least six operands, with first three operands being grid
+ sizes along x,y,z dimensions, the following three arguments being block
+ sizes along x,y,z dimension, and the remaining operands are arguments of the
+ kernel. When a lower-dimensional kernel is required, unused sizes must be
+ explicitly set to `1`.
+
+ The body region has at least _twelve_ arguments, grouped as follows:
+
+ - three arguments that contain block identifiers along x,y,z dimensions;
+ - three arguments that contain thread identifiers along x,y,z dimensions;
+ - operands of the `gpu.launch` operation as is, including six leading
+ operands for grid and block sizes.
+
+ Operations inside the body region, and any operations in the nested regions,
+ are _not_ allowed to use values defined outside the _body_ region, as if
+ this region was a function. If necessary, values must be passed as kernel
+ arguments into the body region. Nested regions inside the kernel body are
+ allowed to use values defined in their ancestor regions as long as they
+ don't cross the kernel body region boundary.
+
+ Syntax:
+
+ ``` {.ebnf}
+ operation ::= `gpu.launch` `block` `(` ssa-id-list `)` `in` ssa-reassignment
+ `threads` `(` ssa-id-list `)` `in` ssa-reassignment
+ (`args` ssa-reassignment `:` type-list)?
+ region attr-dict?
+ ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
+ ```
+
+ Example:
+
+ ```mlir {.mlir}
+ gpu.launch blocks(%bx, %by, %bz) in (%sz_bx = %0, %sz_by = %1, %sz_bz = %2)
+ threads(%tx, %ty, %tz) in (%sz_tx = %3, %sz_ty = %4, %sz_tz = %5)
+ args(%arg0 = %6, %arg1 = 7) : f32, memref<?xf32, 1> {
+ // Block and thread identifiers, as well as block/grid sizes are
+ // immediately usable inside body region.
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ %42 = load %arg1[%bx] : memref<?xf32, 1>
+ }
+
+ // Generic syntax explains how the pretty syntax maps to the IR structure.
+ "gpu.launch"(%cst, %cst, %c1, // Grid sizes.
+ %cst, %c1, %c1, // Block sizes.
+ %arg0, %arg1) // Actual arguments.
+ {/*attributes*/}
+ // All sizes and identifiers have "index" size.
+ : (index, index, index, index, index, index, f32, memref<?xf32, 1>)
+ -> () {
+ // The operation passes block and thread identifiers, followed by grid and
+ // block sizes, followed by actual arguments to the entry block of the
+ // region.
+ ^bb0(%bx : index, %by : index, %bz : index,
+ %tx : index, %ty : index, %tz : index,
+ %num_bx : index, %num_by : index, %num_bz : index,
+ %num_tx : index, %num_ty : index, %num_tz : index,
+ %arg0 : f32, %arg1 : memref<?xf32, 1>):
+ "some_op"(%bx, %tx) : (index, index) -> ()
+ %3 = "std.load"(%arg1, %bx) : (memref<?xf32, 1>, index) -> f32
+ }
+ ```
+
+ Rationale: using operation/block arguments gives analyses a clear way of
+ understanding that a value has additional semantics (e.g., we will need to
+ know what value corresponds to threadIdx.x for coalescing). We can recover
+ these properties by analyzing the operations producing values, but it is
+ easier just to have that information by construction.
+ }];
+
+ let regions = (region AnyRegion:$body);
+
+ let skipDefaultBuilders = 1;
+
+ let builders = [
+ OpBuilder<"Builder *builder, OperationState &result, Value *gridSizeX,"
+ "Value *gridSizeY, Value *gridSizeZ, Value *blockSizeX,"
+ "Value *blockSizeY, Value *blockSizeZ,"
+ "ArrayRef<Value *> operands">
+ ];
+
+ let hasCanonicalizer = 1;
+
+ let extraClassDeclaration = [{
+ /// Get the kernel region.
+ Region &getBody();
+
+ /// Get the SSA values corresponding to kernel block identifiers.
+ KernelDim3 getBlockIds();
+ /// Get the SSA values corresponding to kernel thread identifiers.
+ KernelDim3 getThreadIds();
+ /// Get the SSA values corresponding to kernel grid size.
+ KernelDim3 getGridSize();
+ /// Get the SSA values corresponding to kernel block size.
+ KernelDim3 getBlockSize();
+ /// Get the operand values passed as kernel arguments.
+ operand_range getKernelOperandValues();
+ /// Get the operand types passed as kernel arguments.
+ operand_type_range getKernelOperandTypes();
+
+ /// Get the SSA values passed as operands to specify the grid size.
+ KernelDim3 getGridSizeOperandValues();
+ /// Get the SSA values passed as operands to specify the block size.
+ KernelDim3 getBlockSizeOperandValues();
+
+ /// Get the SSA values of the kernel arguments.
+ llvm::iterator_range<Block::args_iterator> getKernelArguments();
+
+ /// Erase the `index`-th kernel argument. Both the entry block argument and
+ /// the operand will be dropped. The block argument must not have any uses.
+ void eraseKernelArgument(unsigned index);
+
+ static StringRef getBlocksKeyword() { return "blocks"; }
+ static StringRef getThreadsKeyword() { return "threads"; }
+ static StringRef getArgsKeyword() { return "args"; }
+
+ /// The number of launch configuration operands, placed at the leading
+ /// positions of the operand list.
+ static constexpr unsigned kNumConfigOperands = 6;
+
+ /// The number of region attributes containing the launch configuration,
+ /// placed in the leading positions of the argument list.
+ static constexpr unsigned kNumConfigRegionAttributes = 12;
+ }];
+
+ let parser = [{ return parseLaunchOp(parser, result); }];
+ let printer = [{ printLaunchOp(p, *this); }];
+ let verifier = [{ return ::verify(*this); }];
+}
+
def GPU_ReturnOp : GPU_Op<"return", [Terminator]>, Arguments<(ins)>,
Results<(outs)> {
let summary = "Terminator for GPU launch regions.";
diff --git a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index 38998b9..87b170b 100644
--- a/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/third_party/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -46,7 +46,7 @@
GPUDialect::GPUDialect(MLIRContext *context)
: Dialect(getDialectName(), context) {
- addOperations<LaunchOp, LaunchFuncOp,
+ addOperations<LaunchFuncOp,
#define GET_OP_LIST
#include "mlir/Dialect/GPU/GPUOps.cpp.inc"
>();
@@ -244,19 +244,20 @@
return llvm::drop_begin(args, LaunchOp::kNumConfigRegionAttributes);
}
-LogicalResult LaunchOp::verify() {
+LogicalResult verify(LaunchOp op) {
// Kernel launch takes kNumConfigOperands leading operands for grid/block
// sizes and transforms them into kNumConfigRegionAttributes region arguments
// for block/thread identifiers and grid/block sizes.
- if (!getBody().empty()) {
- Block &entryBlock = getBody().front();
- if (entryBlock.getNumArguments() != kNumConfigOperands + getNumOperands())
- return emitOpError("unexpected number of region arguments");
+ if (!op.getBody().empty()) {
+ Block &entryBlock = op.getBody().front();
+ if (entryBlock.getNumArguments() !=
+ LaunchOp::kNumConfigOperands + op.getNumOperands())
+ return op.emitOpError("unexpected number of region arguments");
}
// Block terminators without successors are expected to exit the kernel region
// and must be `gpu.launch`.
- for (Block &block : getBody()) {
+ for (Block &block : op.getBody()) {
if (block.empty())
continue;
if (block.back().getNumSuccessors() != 0)
@@ -265,8 +266,8 @@
return block.back()
.emitError("expected 'gpu.terminator' or a terminator with "
"successors")
- .attachNote(getLoc())
- << "in '" << getOperationName() << "' body region";
+ .attachNote(op.getLoc())
+ << "in '" << LaunchOp::getOperationName() << "' body region";
}
}
@@ -285,27 +286,31 @@
p << *size.z << " = " << *operands[2] << ')';
}
-void LaunchOp::print(OpAsmPrinter &p) {
- SmallVector<Value *, 12> operandContainer(operand_begin(), operand_end());
+void printLaunchOp(OpAsmPrinter &p, LaunchOp op) {
+ SmallVector<Value *, 12> operandContainer(op.operand_begin(),
+ op.operand_end());
ArrayRef<Value *> operands(operandContainer);
// Print the launch configuration.
- p << getOperationName() << ' ' << getBlocksKeyword();
- printSizeAssignment(p, getGridSize(), operands.take_front(3), getBlockIds());
- p << ' ' << getThreadsKeyword();
- printSizeAssignment(p, getBlockSize(), operands.slice(3, 3), getThreadIds());
+ p << LaunchOp::getOperationName() << ' ' << op.getBlocksKeyword();
+ printSizeAssignment(p, op.getGridSize(), operands.take_front(3),
+ op.getBlockIds());
+ p << ' ' << op.getThreadsKeyword();
+ printSizeAssignment(p, op.getBlockSize(), operands.slice(3, 3),
+ op.getThreadIds());
// From now on, the first kNumConfigOperands operands corresponding to grid
// and block sizes are irrelevant, so we can drop them.
- operands = operands.drop_front(kNumConfigOperands);
+ operands = operands.drop_front(LaunchOp::kNumConfigOperands);
// Print the data argument remapping.
- if (!getBody().empty() && !operands.empty()) {
- p << ' ' << getArgsKeyword() << '(';
+ if (!op.getBody().empty() && !operands.empty()) {
+ p << ' ' << op.getArgsKeyword() << '(';
for (unsigned i = 0, e = operands.size(); i < e; ++i) {
if (i != 0)
p << ", ";
- p << *getBody().front().getArgument(kNumConfigRegionAttributes + i)
+ p << *op.getBody().front().getArgument(
+ LaunchOp::kNumConfigRegionAttributes + i)
<< " = " << *operands[i];
}
p << ") ";
@@ -321,8 +326,8 @@
}
}
- p.printRegion(getBody(), /*printEntryBlockArgs=*/false);
- p.printOptionalAttrDict(getAttrs());
+ p.printRegion(op.getBody(), /*printEntryBlockArgs=*/false);
+ p.printOptionalAttrDict(op.getAttrs());
}
// Parse the size assignment blocks for blocks and threads. These have the form
@@ -361,10 +366,10 @@
// (`args` ssa-reassignment `:` type-list)?
// region attr-dict?
// ssa-reassignment ::= `(` ssa-id `=` ssa-use (`,` ssa-id `=` ssa-use)* `)`
-ParseResult LaunchOp::parse(OpAsmParser &parser, OperationState &result) {
+ParseResult parseLaunchOp(OpAsmParser &parser, OperationState &result) {
// Sizes of the grid and block.
- SmallVector<OpAsmParser::OperandType, kNumConfigOperands> sizes(
- kNumConfigOperands);
+ SmallVector<OpAsmParser::OperandType, LaunchOp::kNumConfigOperands> sizes(
+ LaunchOp::kNumConfigOperands);
MutableArrayRef<OpAsmParser::OperandType> sizesRef(sizes);
// Actual (data) operands passed to the kernel.
@@ -372,7 +377,7 @@
// Region arguments to be created.
SmallVector<OpAsmParser::OperandType, 16> regionArgs(
- kNumConfigRegionAttributes);
+ LaunchOp::kNumConfigRegionAttributes);
MutableArrayRef<OpAsmParser::OperandType> regionArgsRef(regionArgs);
// Parse the size assignment segments: the first segment assigns grid sizes
@@ -380,11 +385,11 @@
// sizes and defines values for thread identifiers. In the region argument
// list, identifiers precede sizes, and block-related values precede
// thread-related values.
- if (parser.parseKeyword(getBlocksKeyword().data()) ||
+ if (parser.parseKeyword(LaunchOp::getBlocksKeyword().data()) ||
parseSizeAssignment(parser, sizesRef.take_front(3),
regionArgsRef.slice(6, 3),
regionArgsRef.slice(0, 3)) ||
- parser.parseKeyword(getThreadsKeyword().data()) ||
+ parser.parseKeyword(LaunchOp::getThreadsKeyword().data()) ||
parseSizeAssignment(parser, sizesRef.drop_front(3),
regionArgsRef.slice(9, 3),
regionArgsRef.slice(3, 3)) ||
@@ -397,7 +402,7 @@
// so is the trailing type list. Parse it as well and use the parsed types
// to resolve the operands passed to the kernel arguments.
SmallVector<Type, 4> dataTypes;
- if (!parser.parseOptionalKeyword(getArgsKeyword())) {
+ if (!parser.parseOptionalKeyword(LaunchOp::getArgsKeyword())) {
llvm::SMLoc argsLoc = parser.getCurrentLocation();
regionArgs.push_back({});
@@ -425,7 +430,8 @@
// block/thread identifiers and grid/block sizes, all of the `index` type.
// Follow the actual kernel arguments.
Type index = parser.getBuilder().getIndexType();
- dataTypes.insert(dataTypes.begin(), kNumConfigRegionAttributes, index);
+ dataTypes.insert(dataTypes.begin(), LaunchOp::kNumConfigRegionAttributes,
+ index);
Region *body = result.addRegion();
return failure(parser.parseRegion(*body, regionArgs, dataTypes) ||
parser.parseOptionalAttrDict(result.attributes));