Loop unrolling update.
- deal with non-operation stmt's (if/for stmt's) in loops being unrolled
(unrolling of non-innermost loops works).
- update uses in unrolled bodies to use results of new operations that may be
introduced in the unrolled bodies.
Unrolling now works for all kinds of loop nests - perfect nests, imperfect
nests, loops at any depth, and with any kind of operation in the body. (IfStmt
support not done, hence untested there).
Added missing dump/print method for StmtBlock.
TODO: add test case for outer loop unrolling.
PiperOrigin-RevId: 207314286
diff --git a/include/mlir/IR/Builders.h b/include/mlir/IR/Builders.h
index 795dde4..b6cc934 100644
--- a/include/mlir/IR/Builders.h
+++ b/include/mlir/IR/Builders.h
@@ -253,6 +253,9 @@
insertPoint = block->begin();
}
+ /// Get the current insertion point of the builder.
+ StmtBlock::iterator getInsertionPoint() const { return insertPoint; }
+
OperationStmt *createOperation(Identifier name, ArrayRef<MLValue *> operands,
ArrayRef<Type *> resultTypes,
ArrayRef<NamedAttribute> attributes) {
@@ -262,7 +265,7 @@
return op;
}
- OperationStmt *cloneOperation(const OperationStmt &srcOpStmt) {
+ OperationStmt *clone(const OperationStmt &srcOpStmt) {
auto *op = srcOpStmt.clone();
block->getStatements().insert(insertPoint, op);
return op;
@@ -274,6 +277,29 @@
return OpTy::build(this, args...);
}
+ ForStmt *clone(const ForStmt &srcForStmt) {
+ auto *forStmt = srcForStmt.clone();
+ block->getStatements().insert(insertPoint, forStmt);
+ return forStmt;
+ }
+
+ IfStmt *clone(const IfStmt &srcIfStmt) {
+ auto *ifStmt = srcIfStmt.clone();
+ block->getStatements().insert(insertPoint, ifStmt);
+ return ifStmt;
+ }
+
+ Statement *clone(const Statement &stmt) {
+ switch (stmt.getKind()) {
+ case Statement::Kind::Operation:
+ return clone(cast<const OperationStmt>(stmt));
+ case Statement::Kind::If:
+ return clone(cast<const IfStmt>(stmt));
+ case Statement::Kind::For:
+ return clone(cast<const ForStmt>(stmt));
+ }
+ }
+
// Creates for statement. When step is not specified, it is set to 1.
ForStmt *createFor(AffineConstantExpr *lowerBound,
AffineConstantExpr *upperBound,
@@ -285,15 +311,6 @@
return stmt;
}
- // TODO: subsume with a generate create<ConstantInt>() method.
- OperationStmt *createConstInt32Op(int value) {
- std::pair<Identifier, Attribute *> namedAttr(
- Identifier::get("value", context), getIntegerAttr(value));
- auto *mlconst = createOperation(Identifier::get("constant", context), {},
- {getIntegerType(32)}, {namedAttr});
- return mlconst;
- }
-
private:
StmtBlock *block = nullptr;
StmtBlock::iterator insertPoint;
diff --git a/include/mlir/IR/StandardOps.h b/include/mlir/IR/StandardOps.h
index 93f9818..8cfe432 100644
--- a/include/mlir/IR/StandardOps.h
+++ b/include/mlir/IR/StandardOps.h
@@ -164,6 +164,17 @@
///
class ConstantIntOp : public ConstantOp {
public:
+ template <class Builder>
+ static OpPointer<ConstantIntOp> build(Builder *builder, int64_t value,
+ unsigned width) {
+ std::pair<Identifier, Attribute *> namedAttr(
+ builder->getIdentifier("value"), builder->getIntegerAttr(value));
+ auto *type = builder->getIntegerType(width);
+
+ return OpPointer<ConstantIntOp>(ConstantIntOp(builder->createOperation(
+ builder->getIdentifier("constant"), {}, type, {namedAttr})));
+ }
+
int64_t getValue() const {
return getAttrOfType<IntegerAttr>("value")->getValue();
}
diff --git a/include/mlir/IR/Statement.h b/include/mlir/IR/Statement.h
index 83e3ff8..2326d50 100644
--- a/include/mlir/IR/Statement.h
+++ b/include/mlir/IR/Statement.h
@@ -48,6 +48,9 @@
/// Remove this statement from its block and delete it.
void eraseFromBlock();
+ /// Clone this statement, the cloning is deep.
+ Statement *clone() const;
+
/// Returns the statement block that contains this statement.
StmtBlock *getBlock() const { return block; }
diff --git a/include/mlir/IR/Statements.h b/include/mlir/IR/Statements.h
index 1894437..3e9f06c 100644
--- a/include/mlir/IR/Statements.h
+++ b/include/mlir/IR/Statements.h
@@ -209,6 +209,9 @@
clear();
}
+ /// Deep clone this for stmt.
+ ForStmt *clone() const;
+
AffineConstantExpr *getLowerBound() const { return lowerBound; }
AffineConstantExpr *getUpperBound() const { return upperBound; }
AffineConstantExpr *getStep() const { return step; }
@@ -270,6 +273,9 @@
~IfStmt();
+ /// Deep clone this IfStmt.
+ IfStmt *clone() const;
+
IfClause *getThenClause() const { return thenClause; }
IfClause *getElseClause() const { return elseClause; }
bool hasElseClause() const { return elseClause != nullptr; }
diff --git a/include/mlir/IR/StmtBlock.h b/include/mlir/IR/StmtBlock.h
index 609aabb..a8a1f20 100644
--- a/include/mlir/IR/StmtBlock.h
+++ b/include/mlir/IR/StmtBlock.h
@@ -22,8 +22,9 @@
#ifndef MLIR_IR_STMTBLOCK_H
#define MLIR_IR_STMTBLOCK_H
-#include "mlir/Support/LLVM.h"
#include "mlir/IR/Statement.h"
+#include "mlir/Support/LLVM.h"
+#include "llvm/Support/raw_ostream.h"
namespace mlir {
class MLFunction;
@@ -101,6 +102,9 @@
return &StmtBlock::statements;
}
+ void print(raw_ostream &os) const;
+ void dump() const;
+
protected:
StmtBlock(StmtBlockKind kind) : kind(kind) {}
diff --git a/lib/IR/AsmPrinter.cpp b/lib/IR/AsmPrinter.cpp
index d71fb92..6875120 100644
--- a/lib/IR/AsmPrinter.cpp
+++ b/lib/IR/AsmPrinter.cpp
@@ -1223,6 +1223,15 @@
void Statement::dump() const { print(llvm::errs()); }
+void StmtBlock::print(raw_ostream &os) const {
+ MLFunction *function = findFunction();
+ ModuleState state(function->getContext());
+ ModulePrinter modulePrinter(os, state);
+ MLFunctionPrinter(function, modulePrinter).print(this);
+}
+
+void StmtBlock::dump() const { print(llvm::errs()); }
+
void Function::print(raw_ostream &os) const {
ModuleState state(getContext());
ModulePrinter(os, state).print(this);
diff --git a/lib/IR/Statement.cpp b/lib/IR/Statement.cpp
index 978137b..44e44c8 100644
--- a/lib/IR/Statement.cpp
+++ b/lib/IR/Statement.cpp
@@ -77,6 +77,19 @@
return nlc.numNestedLoops == 1;
}
+Statement *Statement::clone() const {
+ switch (kind) {
+ case Kind::Operation:
+ return cast<OperationStmt>(this)->clone();
+ case Kind::If:
+ llvm_unreachable("cloning for if's not implemented yet");
+ return cast<IfStmt>(this)->clone();
+ case Kind::For:
+ llvm_unreachable("cloning for loops not implemented yet");
+ return cast<ForStmt>(this)->clone();
+ }
+}
+
//===----------------------------------------------------------------------===//
// ilist_traits for Statement
//===----------------------------------------------------------------------===//
@@ -227,6 +240,15 @@
StmtBlock(StmtBlockKind::For), lowerBound(lowerBound),
upperBound(upperBound), step(step) {}
+ForStmt *ForStmt::clone() const {
+ auto *stmt = new ForStmt(getLowerBound(), getUpperBound(), getStep(),
+ Statement::findFunction()->getContext());
+ for (auto &s : getStatements()) {
+ stmt->getStatements().push_back(s.clone());
+ }
+ return stmt;
+}
+
//===----------------------------------------------------------------------===//
// IfStmt
//===----------------------------------------------------------------------===//
@@ -236,3 +258,8 @@
if (elseClause)
delete elseClause;
}
+
+IfStmt *IfStmt::clone() const {
+ llvm_unreachable("cloning for if's not implemented yet");
+ return nullptr;
+}
diff --git a/lib/Transforms/LoopUnroll.cpp b/lib/Transforms/LoopUnroll.cpp
index fe110d2..27bb43f 100644
--- a/lib/Transforms/LoopUnroll.cpp
+++ b/lib/Transforms/LoopUnroll.cpp
@@ -26,6 +26,7 @@
#include "mlir/IR/Module.h"
#include "mlir/IR/OperationSet.h"
#include "mlir/IR/Pass.h"
+#include "mlir/IR/StandardOps.h"
#include "mlir/IR/Statements.h"
#include "mlir/IR/StmtVisitor.h"
#include "mlir/Transforms/Passes.h"
@@ -96,61 +97,94 @@
runOnForStmt(forStmt);
}
-/// Replace an IV with a constant value.
-static void replaceIterator(Statement *stmt, const ForStmt &iv,
- MLValue *constVal) {
- struct ReplaceIterator : public StmtWalker<ReplaceIterator> {
- // IV to be replaced.
- const ForStmt *iv;
- // Constant to be replaced with.
- MLValue *constVal;
+/// Replace all uses of 'oldVal' with 'newVal' in 'stmt'
+static void replaceAllStmtUses(Statement *stmt, MLValue *oldVal,
+ MLValue *newVal) {
+ struct ReplaceUseWalker : public StmtWalker<ReplaceUseWalker> {
+ // Value to be replaced.
+ MLValue *oldVal;
+ // Value to be replaced with.
+ MLValue *newVal;
- ReplaceIterator(const ForStmt &iv, MLValue *constVal)
- : iv(&iv), constVal(constVal){};
+ ReplaceUseWalker(MLValue *oldVal, MLValue *newVal)
+ : oldVal(oldVal), newVal(newVal){};
void visitOperationStmt(OperationStmt *os) {
for (auto &operand : os->getStmtOperands()) {
- if (operand.get() == static_cast<const MLValue *>(iv)) {
- operand.set(constVal);
- }
+ if (operand.get() == oldVal)
+ operand.set(newVal);
}
}
};
- ReplaceIterator ri(iv, constVal);
+ ReplaceUseWalker ri(oldVal, newVal);
ri.walk(stmt);
}
-/// Unrolls this loop completely.
+/// Unroll this 'for stmt' / loop completely.
void LoopUnroll::runOnForStmt(ForStmt *forStmt) {
auto lb = forStmt->getLowerBound()->getValue();
auto ub = forStmt->getUpperBound()->getValue();
auto step = forStmt->getStep()->getValue();
- auto trip_count = (ub - lb + 1) / step;
+ // Builder to add constants need for the unrolled iterator.
auto *mlFunc = forStmt->Statement::findFunction();
MLFuncBuilder funcTopBuilder(mlFunc);
funcTopBuilder.setInsertionPointAtStart(mlFunc);
+ // Builder to insert the unrolled bodies.
MLFuncBuilder builder(forStmt->getBlock());
- for (int i = 0; i < trip_count; i++) {
- auto *ivUnrolledVal = funcTopBuilder.createConstInt32Op(i)->getResult(0);
- for (auto &stmt : forStmt->getStatements()) {
- switch (stmt.getKind()) {
- case Statement::Kind::For:
- llvm_unreachable("unrolling loops that have only operations");
- break;
- case Statement::Kind::If:
- llvm_unreachable("unrolling loops that have only operations");
- break;
- case Statement::Kind::Operation:
- auto *cloneOp = builder.cloneOperation(*cast<OperationStmt>(&stmt));
- // TODO(bondhugula): only generate constants when the IV actually
- // appears in the body.
- replaceIterator(cloneOp, *forStmt, ivUnrolledVal);
- break;
+ // Set insertion point to right after where the for stmt ends.
+ builder.setInsertionPoint(forStmt->getBlock(),
+ ++StmtBlock::iterator(forStmt));
+
+ // Unroll the contents of 'forStmt'.
+ for (int i = lb; i <= ub; i += step) {
+ // TODO(bondhugula): generate constants only when IV actually appears.
+ auto constOp = funcTopBuilder.create<ConstantIntOp>(i, 32);
+ auto *ivConst = cast<OperationStmt>(constOp->getOperation())->getResult(0);
+
+ // Iterator pointing to just before 'this' (i^th) unrolled iteration.
+ StmtBlock::iterator beforeUnrolledBody = --builder.getInsertionPoint();
+
+ // Pairs of <old op stmt result whose uses need to be replaced,
+ // new result generated by the corresponding cloned op stmt>.
+ SmallVector<std::pair<MLValue *, MLValue *>, 8> oldNewResultPairs;
+
+ for (auto &loopBodyStmt : forStmt->getStatements()) {
+ auto *cloneStmt = builder.clone(loopBodyStmt);
+ // Replace all uses of the IV in the clone with constant iteration value.
+ replaceAllStmtUses(cloneStmt, forStmt, ivConst);
+
+ // Whenever we have an op stmt, we'll have a new ML Value defined: replace
+ // uses of the old result with this one.
+ if (auto *opStmt = dyn_cast<OperationStmt>(&loopBodyStmt)) {
+ if (opStmt->getNumResults()) {
+ auto *cloneOpStmt = cast<OperationStmt>(cloneStmt);
+ for (unsigned i = 0, e = opStmt->getNumResults(); i < e; i++) {
+ // Store old/new result pairs.
+ // TODO *only* if needed later: storing of old/new results can be
+ // avoided, by cloning the statement list in the reverse direction
+ // (and running the IR builder in the reverse
+ // (iplist.insertAfter()). That way, a newly created result can be
+ // immediately propagated to all its uses, which would already been
+ // cloned/inserted.
+ oldNewResultPairs.push_back(std::make_pair(
+ &opStmt->getStmtResult(i), &cloneOpStmt->getStmtResult(i)));
+ }
+ }
+ }
+ }
+ // Replace uses of old op results' with the results in the just
+ // unrolled body.
+ StmtBlock::iterator endOfUnrolledBody = builder.getInsertionPoint();
+ for (auto it = ++beforeUnrolledBody; it != endOfUnrolledBody; it++) {
+ for (unsigned i = 0; i < oldNewResultPairs.size(); i++) {
+ replaceAllStmtUses(&(*it), oldNewResultPairs[i].first,
+ oldNewResultPairs[i].second);
}
}
}
+ // Erase the original for stmt from the block.
forStmt->eraseFromBlock();
}
diff --git a/test/Transforms/unroll.mlir b/test/Transforms/unroll.mlir
index 6a7d9cf..aef6a01 100644
--- a/test/Transforms/unroll.mlir
+++ b/test/Transforms/unroll.mlir
@@ -1,11 +1,11 @@
// RUN: %S/../../mlir-opt %s -o - -unroll-innermost-loops | FileCheck %s
-// CHECK-LABEL: mlfunc @loops1() {
-mlfunc @loops1() {
- // CHECK: %c0_i32 = constant 0 : i32
- // CHECK-NEXT: %c1_i32 = constant 1 : i32
+// CHECK-LABEL: mlfunc @loop_nest_simplest() {
+mlfunc @loop_nest_simplest() {
+ // CHECK: %c1_i32 = constant 1 : i32
// CHECK-NEXT: %c2_i32 = constant 2 : i32
// CHECK-NEXT: %c3_i32 = constant 3 : i32
+ // CHECK-NEXT: %c4_i32 = constant 4 : i32
// CHECK-NEXT: for %i0 = 1 to 100 step 2 {
for %i = 1 to 100 step 2 {
// CHECK: %c1_i32_0 = constant 1 : i32
@@ -19,40 +19,162 @@
return // CHECK: return
} // CHECK }
-// CHECK-LABEL: mlfunc @loops2() {
-mlfunc @loops2() {
+// CHECK-LABEL: mlfunc @loop_nest_simple_iv_use() {
+mlfunc @loop_nest_simple_iv_use() {
+ // CHECK: %c1_i32 = constant 1 : i32
+ // CHECK-NEXT: %c2_i32 = constant 2 : i32
+ // CHECK-NEXT: %c3_i32 = constant 3 : i32
+ // CHECK-NEXT: %c4_i32 = constant 4 : i32
+ // CHECK-NEXT: for %i0 = 1 to 100 step 2 {
+ for %i = 1 to 100 step 2 {
+ // CHECK: %0 = "addi32"(%c1_i32, %c1_i32) : (i32, i32) -> i32
+ // CHECK-NEXT: %1 = "addi32"(%c2_i32, %c2_i32) : (i32, i32) -> i32
+ // CHECK-NEXT: %2 = "addi32"(%c3_i32, %c3_i32) : (i32, i32) -> i32
+ // CHECK-NEXT: %3 = "addi32"(%c4_i32, %c4_i32) : (i32, i32) -> i32
+ for %j = 1 to 4 {
+ %x = "addi32"(%j, %j) : (affineint, affineint) -> i32
+ }
+ } // CHECK: }
+ return // CHECK: return
+} // CHECK }
+
+// CHECK-LABEL: mlfunc @loop_nest_strided() {
+mlfunc @loop_nest_strided() {
+ // CHECK: %c3_i32 = constant 3 : i32
+ // CHECK-NEXT: %c5_i32 = constant 5 : i32
+ // CHECK-NEXT: %c7_i32 = constant 7 : i32
+ // CHECK-NEXT: %c3_i32_0 = constant 3 : i32
+ // CHECK-NEXT: %c5_i32_1 = constant 5 : i32
+ // CHECK-NEXT: for %i0 = 1 to 100 {
+ for %i = 1 to 100 {
+ // CHECK: %0 = affine_apply (d0) -> (d0 + 1)(%c3_i32_0)
+ // CHECK-NEXT: %1 = "addi32"(%0, %0) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %2 = affine_apply (d0) -> (d0 + 1)(%c5_i32_1)
+ // CHECK-NEXT: %3 = "addi32"(%2, %2) : (affineint, affineint) -> affineint
+ for %j = 3 to 6 step 2 {
+ %x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
+ (affineint) -> (affineint)
+ %y = "addi32"(%x, %x) : (affineint, affineint) -> affineint
+ }
+ // CHECK: %4 = affine_apply (d0) -> (d0 + 1)(%c3_i32)
+ // CHECK-NEXT: %5 = "addi32"(%4, %4) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %6 = affine_apply (d0) -> (d0 + 1)(%c5_i32)
+ // CHECK-NEXT: %7 = "addi32"(%6, %6) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %8 = affine_apply (d0) -> (d0 + 1)(%c7_i32)
+ // CHECK-NEXT: %9 = "addi32"(%8, %8) : (affineint, affineint) -> affineint
+ for %k = 3 to 7 step 2 {
+ %z = "affine_apply" (%k) { map: (d0) -> (d0 + 1) } :
+ (affineint) -> (affineint)
+ %w = "addi32"(%z, %z) : (affineint, affineint) -> affineint
+ }
+ } // CHECK: }
+ return // CHECK: return
+} // CHECK }
+
+// Operations in the loop body have results that are used therein.
+// CHECK-LABEL: mlfunc @loop_nest_body_def_use() {
+mlfunc @loop_nest_body_def_use() {
// CHECK: %c0_i32 = constant 0 : i32
// CHECK-NEXT: %c1_i32 = constant 1 : i32
// CHECK-NEXT: %c2_i32 = constant 2 : i32
// CHECK-NEXT: %c3_i32 = constant 3 : i32
- // CHECK-NEXT: %c0_i32_0 = constant 0 : i32
- // CHECK-NEXT: %c1_i32_1 = constant 1 : i32
- // CHECK-NEXT: %c2_i32_2 = constant 2 : i32
- // CHECK-NEXT: %c3_i32_3 = constant 3 : i32
// CHECK-NEXT: for %i0 = 1 to 100 step 2 {
for %i = 1 to 100 step 2 {
- // CHECK: %0 = affine_apply (d0) -> (d0 + 1)(%c0_i32_0)
- // CHECK-NEXT: %1 = affine_apply (d0) -> (d0 + 1)(%c1_i32_1)
- // CHECK-NEXT: %2 = affine_apply (d0) -> (d0 + 1)(%c2_i32_2)
- // CHECK-NEXT: %3 = affine_apply (d0) -> (d0 + 1)(%c3_i32_3)
+ // CHECK: %c0 = constant 0 : affineint
+ %c0 = constant 0 : affineint
+ // CHECK: %0 = affine_apply (d0) -> (d0 + 1)(%c0_i32)
+ // CHECK-NEXT: %1 = "addi32"(%0, %c0) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %2 = affine_apply (d0) -> (d0 + 1)(%c1_i32)
+ // CHECK-NEXT: %3 = "addi32"(%2, %c0) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %4 = affine_apply (d0) -> (d0 + 1)(%c2_i32)
+ // CHECK-NEXT: %5 = "addi32"(%4, %c0) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %6 = affine_apply (d0) -> (d0 + 1)(%c3_i32)
+ // CHECK-NEXT: %7 = "addi32"(%6, %c0) : (affineint, affineint) -> affineint
+ for %j = 0 to 3 {
+ %x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
+ (affineint) -> (affineint)
+ %y = "addi32"(%x, %c0) : (affineint, affineint) -> affineint
+ }
+ } // CHECK: }
+ return // CHECK: return
+} // CHECK }
+
+
+// Imperfect loop nest. Unrolling innermost here yields a perfect nest.
+// CHECK-LABEL: mlfunc @loop_nest_seq_imperfect(memref<128x128xf32>) {
+mlfunc @loop_nest_seq_imperfect(%a : memref<128x128xf32>) {
+ // CHECK: %c1_i32 = constant 1 : i32
+ // CHECK-NEXT: %c2_i32 = constant 2 : i32
+ // CHECK-NEXT: %c3_i32 = constant 3 : i32
+ // CHECK-NEXT: %c4_i32 = constant 4 : i32
+ // CHECK-NEXT: %c128 = constant 128 : affineint
+ %c128 = constant 128 : affineint
+ // CHECK: for %i0 = 1 to 100 {
+ for %i = 1 to 100 {
+ // CHECK: %0 = "vld"(%i0) : (affineint) -> i32
+ %ld = "vld"(%i) : (affineint) -> i32
+ // CHECK: %1 = affine_apply (d0) -> (d0 + 1)(%c1_i32)
+ // CHECK-NEXT: %2 = "vmulf"(%c1_i32, %1) : (i32, affineint) -> affineint
+ // CHECK-NEXT: %3 = "vaddf"(%2, %2) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %4 = affine_apply (d0) -> (d0 + 1)(%c2_i32)
+ // CHECK-NEXT: %5 = "vmulf"(%c2_i32, %4) : (i32, affineint) -> affineint
+ // CHECK-NEXT: %6 = "vaddf"(%5, %5) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %7 = affine_apply (d0) -> (d0 + 1)(%c3_i32)
+ // CHECK-NEXT: %8 = "vmulf"(%c3_i32, %7) : (i32, affineint) -> affineint
+ // CHECK-NEXT: %9 = "vaddf"(%8, %8) : (affineint, affineint) -> affineint
+ // CHECK-NEXT: %10 = affine_apply (d0) -> (d0 + 1)(%c4_i32)
+ // CHECK-NEXT: %11 = "vmulf"(%c4_i32, %10) : (i32, affineint) -> affineint
+ // CHECK-NEXT: %12 = "vaddf"(%11, %11) : (affineint, affineint) -> affineint
for %j = 1 to 4 {
%x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
(affineint) -> (affineint)
+ %y = "vmulf"(%j, %x) : (affineint, affineint) -> affineint
+ %z = "vaddf"(%y, %y) : (affineint, affineint) -> affineint
}
- } // CHECK: }
+ // CHECK: %13 = "scale"(%c128, %i0) : (affineint, affineint) -> affineint
+ %addr = "scale"(%c128, %i) : (affineint, affineint) -> affineint
+ // CHECK: "vst"(%13, %i0) : (affineint, affineint) -> ()
+ "vst"(%addr, %i) : (affineint, affineint) -> ()
+ } // CHECK }
+ return // CHECK: return
+}
+
+// CHECK-LABEL: mlfunc @loop_nest_seq_multiple() {
+mlfunc @loop_nest_seq_multiple() {
+ // CHECK: %c1_i32 = constant 1 : i32
+ // CHECK-NEXT: %c2_i32 = constant 2 : i32
+ // CHECK-NEXT: %c3_i32 = constant 3 : i32
+ // CHECK-NEXT: %c4_i32 = constant 4 : i32
+ // CHECK-NEXT: %c0_i32 = constant 0 : i32
+ // CHECK-NEXT: %c1_i32_0 = constant 1 : i32
+ // CHECK-NEXT: %c2_i32_1 = constant 2 : i32
+ // CHECK-NEXT: %c3_i32_2 = constant 3 : i32
+ // CHECK-NEXT: %0 = affine_apply (d0) -> (d0 + 1)(%c0_i32)
+ // CHECK-NEXT: "mul"(%0, %0) : (affineint, affineint) -> ()
+ // CHECK-NEXT: %1 = affine_apply (d0) -> (d0 + 1)(%c1_i32_0)
+ // CHECK-NEXT: "mul"(%1, %1) : (affineint, affineint) -> ()
+ // CHECK-NEXT: %2 = affine_apply (d0) -> (d0 + 1)(%c2_i32_1)
+ // CHECK-NEXT: "mul"(%2, %2) : (affineint, affineint) -> ()
+ // CHECK-NEXT: %3 = affine_apply (d0) -> (d0 + 1)(%c3_i32_2)
+ // CHECK-NEXT: "mul"(%3, %3) : (affineint, affineint) -> ()
+ for %j = 0 to 3 {
+ %x = "affine_apply" (%j) { map: (d0) -> (d0 + 1) } :
+ (affineint) -> (affineint)
+ "mul"(%x, %x) : (affineint, affineint) -> ()
+ }
// CHECK: %c99 = constant 99 : affineint
%k = "constant"(){value: 99} : () -> affineint
- // CHECK: for %i1 = 1 to 100 step 2 {
+ // CHECK: for %i0 = 1 to 100 step 2 {
for %m = 1 to 100 step 2 {
- // CHECK: %4 = affine_apply (d0) -> (d0 + 1)(%c0_i32)
- // CHECK-NEXT: %5 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c0_i32)[%c99]
- // CHECK-NEXT: %6 = affine_apply (d0) -> (d0 + 1)(%c1_i32)
- // CHECK-NEXT: %7 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c1_i32)[%c99]
- // CHECK-NEXT: %8 = affine_apply (d0) -> (d0 + 1)(%c2_i32)
- // CHECK-NEXT: %9 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c2_i32)[%c99]
- // CHECK-NEXT: %10 = affine_apply (d0) -> (d0 + 1)(%c3_i32)
- // CHECK-NEXT: %11 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c3_i32)[%c99]
+ // CHECK: %4 = affine_apply (d0) -> (d0 + 1)(%c1_i32)
+ // CHECK-NEXT: %5 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c1_i32)[%c99]
+ // CHECK-NEXT: %6 = affine_apply (d0) -> (d0 + 1)(%c2_i32)
+ // CHECK-NEXT: %7 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c2_i32)[%c99]
+ // CHECK-NEXT: %8 = affine_apply (d0) -> (d0 + 1)(%c3_i32)
+ // CHECK-NEXT: %9 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c3_i32)[%c99]
+ // CHECK-NEXT: %10 = affine_apply (d0) -> (d0 + 1)(%c4_i32)
+ // CHECK-NEXT: %11 = affine_apply (d0)[s0] -> (d0 + s0 + 1)(%c4_i32)[%c99]
for %n = 1 to 4 {
%y = "affine_apply" (%n) { map: (d0) -> (d0 + 1) } :
(affineint) -> (affineint)