blob: ff1fd30ce20f2e183ca849bd12da593dd039d86a [file] [log] [blame]
// RUN: mlir-opt %s -loop-tile -tile-size=32 | FileCheck %s
// RUN: mlir-opt %s -split-input-file -loop-tile -tile-cache-size=512 | FileCheck %s --check-prefix=MODEL
// CHECK-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 32)
// CHECK-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 32, 50)
// CHECK-DAG: [[IDENTITY:#map[0-9]+]] = (d0) -> (d0)
// CHECK-DAG: [[LB:#map[0-9]+]] = ()[s0] -> (0, s0)
// CHECK-DAG: [[UB:#map[0-9]+]] = ()[s0, s1] -> (s0, 4096 floordiv s1)
// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)
// CHECK-LABEL: func @loop_tiling()
// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
// CHECK-NEXT: }
// CHECK-NEXT: }
// CHECK-NEXT: return
func @loop_tiling() {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 512 {
affine.for %k = 0 to 1024 {
"foo"(%i, %j, %k) : (index, index, index) -> ()
}
}
}
affine.for %x = 0 to 50 {
"bar"(%x, %x) : (index, index) -> ()
}
// Intra-tile loop won't need a min expression.
affine.for %y = 0 to 21 {
"foobar"(%y) : (index) -> ()
}
return
}
#lb = ()[s0] -> (0, s0)
#ub = ()[s0, s1] -> (s0, 4096 floordiv s1)
// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
%M = dim %A, 0 : memref<? x i32>
affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
%out = affine.apply (d0) -> (d0) (%iTT)
}
return
// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
// CHECK-NEXT: }
// CHECK-NEXT: }
}
// -----
// Cache size is set to 512 KiB. This loop nest accesses about 49 MiB, and the
// tile sizes chosen would be 6 x 6 x 6. However, to avoid min/max, which is
// possible here, they are adjusted to 4 x 4 x 5.
// MODEL-LABEL: func @simple_matmul
func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
affine.for %i = 0 to 256 {
affine.for %j = 0 to 256 {
affine.for %k = 0 to 250 {
%l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
%r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
%o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
%m = mulf %l, %r : vector<64xf32>
%a = addf %o, %m : vector<64xf32>
store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
}
}
}
return %arg2 : memref<8x8xvector<64xf32>>
}
// MODEL: affine.for %i0 = 0 to 256 step 4 {
// MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 {
// MODEL-NEXT: affine.for %i2 = 0 to 250 step 5 {