test/Transforms/loop-tiling.mlir - platform/external/tensorflow - Git at Google

 // RUN: mlir-opt %s -loop-tile -tile-size=32 | FileCheck %s
 // RUN: mlir-opt %s -split-input-file -loop-tile -tile-cache-size=512 | FileCheck %s --check-prefix=MODEL

 // CHECK-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 32)
 // CHECK-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 32, 50)
 // CHECK-DAG: [[IDENTITY:#map[0-9]+]] = (d0) -> (d0)
 // CHECK-DAG: [[LB:#map[0-9]+]] = ()[s0] -> (0, s0)
 // CHECK-DAG: [[UB:#map[0-9]+]] = ()[s0, s1] -> (s0, 4096 floordiv s1)
 // CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)

 // CHECK-LABEL: func @loop_tiling()
 // CHECK-NEXT:   affine.for %i0 = 0 to 256 step 32 {
 // CHECK-NEXT:     affine.for %i1 = 0 to 512 step 32 {
 // CHECK-NEXT:       affine.for %i2 = 0 to 1024 step 32 {
 // CHECK-NEXT:         affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
 // CHECK-NEXT:           affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
 // CHECK-NEXT:             affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
 // CHECK-NEXT:               "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
 // CHECK-NEXT:             }
 // CHECK-NEXT:           }
 // CHECK-NEXT:         }
 // CHECK-NEXT:       }
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
 // CHECK-NEXT:   affine.for %i6 = 0 to 50 step 32 {
 // CHECK-NEXT:     affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
 // CHECK-NEXT:       "bar"(%i7, %i7) : (index, index) -> ()
 // CHECK-NEXT:     }
 // CHECK-NEXT:   }
 // CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
 // CHECK-NEXT:    affine.for %i9 = [[IDENTITY]](%i8) to 21 {
 // CHECK-NEXT:      "foobar"(%i9) : (index) -> ()
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 // CHECK-NEXT:  return
 func @loop_tiling() {
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 512 {
       affine.for %k = 0 to 1024 {
         "foo"(%i, %j, %k) : (index, index, index) -> ()
       }
     }
   }

   affine.for %x = 0 to 50 {
     "bar"(%x, %x) : (index, index) -> ()
   }

   // Intra-tile loop won't need a min expression.
   affine.for %y = 0 to 21 {
     "foobar"(%y) : (index) -> ()
   }

   return
 }

 #lb = ()[s0] -> (0, s0)
 #ub = ()[s0, s1] -> (s0, 4096 floordiv s1)
 // CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
 func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
   %M = dim %A, 0 : memref<? x i32>
   affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
       %out = affine.apply (d0) -> (d0) (%iTT)
   }
   return
 // CHECK:       affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
 // CHECK-NEXT:    affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
 // CHECK-NEXT:      %1 = affine.apply [[IDENTITY]](%i1)
 // CHECK-NEXT:    }
 // CHECK-NEXT:  }
 }

 // -----

 // Cache size is set to 512 KiB. This loop nest accesses about 49 MiB, and the
 // tile sizes chosen would be 6 x 6 x 6. However, to avoid min/max, which is
 // possible here, they are adjusted to 4 x 4 x 5.

 // MODEL-LABEL: func @simple_matmul
 func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
   affine.for %i = 0 to 256 {
     affine.for %j = 0 to 256 {
       affine.for %k = 0 to 250 {
         %l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
         %r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
         %o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
         %m = mulf %l, %r : vector<64xf32>
         %a = addf %o, %m : vector<64xf32>
         store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
       }
     }
   }
   return %arg2 : memref<8x8xvector<64xf32>>
 }
 // MODEL:       affine.for %i0 = 0 to 256 step 4 {
 // MODEL-NEXT:    affine.for %i1 = 0 to 256 step 4 {
 // MODEL-NEXT:      affine.for %i2 = 0 to 250 step 5 {
	// RUN: mlir-opt %s -loop-tile -tile-size=32 \| FileCheck %s
	// RUN: mlir-opt %s -split-input-file -loop-tile -tile-cache-size=512 \| FileCheck %s --check-prefix=MODEL

	// CHECK-DAG: [[MAP0:#map[0-9]+]] = (d0) -> (d0 + 32)
	// CHECK-DAG: [[MAP1:#map[0-9]+]] = (d0) -> (d0 + 32, 50)
	// CHECK-DAG: [[IDENTITY:#map[0-9]+]] = (d0) -> (d0)
	// CHECK-DAG: [[LB:#map[0-9]+]] = ()[s0] -> (0, s0)
	// CHECK-DAG: [[UB:#map[0-9]+]] = ()[s0, s1] -> (s0, 4096 floordiv s1)
	// CHECK-DAG: [[UB_INTRA_TILE:#map[0-9]+]] = (d0, d1, d2) -> (d2 + 32, s0, 4096 floordiv s1)

	// CHECK-LABEL: func @loop_tiling()
	// CHECK-NEXT: affine.for %i0 = 0 to 256 step 32 {
	// CHECK-NEXT: affine.for %i1 = 0 to 512 step 32 {
	// CHECK-NEXT: affine.for %i2 = 0 to 1024 step 32 {
	// CHECK-NEXT: affine.for %i3 = [[IDENTITY]](%i0) to [[MAP0]](%i0) {
	// CHECK-NEXT: affine.for %i4 = [[IDENTITY]](%i1) to [[MAP0]](%i1) {
	// CHECK-NEXT: affine.for %i5 = [[IDENTITY]](%i2) to [[MAP0]](%i2) {
	// CHECK-NEXT: "foo"(%i3, %i4, %i5) : (index, index, index) -> ()
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: affine.for %i6 = 0 to 50 step 32 {
	// CHECK-NEXT: affine.for %i7 = [[IDENTITY]](%i6) to min [[MAP1]](%i6) {
	// CHECK-NEXT: "bar"(%i7, %i7) : (index, index) -> ()
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: affine.for %i8 = 0 to 21 step 32 {
	// CHECK-NEXT: affine.for %i9 = [[IDENTITY]](%i8) to 21 {
	// CHECK-NEXT: "foobar"(%i9) : (index) -> ()
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	// CHECK-NEXT: return
	func @loop_tiling() {
	affine.for %i = 0 to 256 {
	affine.for %j = 0 to 512 {
	affine.for %k = 0 to 1024 {
	"foo"(%i, %j, %k) : (index, index, index) -> ()
	}
	}
	}

	affine.for %x = 0 to 50 {
	"bar"(%x, %x) : (index, index) -> ()
	}

	// Intra-tile loop won't need a min expression.
	affine.for %y = 0 to 21 {
	"foobar"(%y) : (index) -> ()
	}

	return
	}

	#lb = ()[s0] -> (0, s0)
	#ub = ()[s0, s1] -> (s0, 4096 floordiv s1)
	// CHECK-LABEL: func @loop_max_min_bound(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
	func @loop_max_min_bound(%A : memref<? x i32>, %L : index, %U : index) {
	%M = dim %A, 0 : memref<? x i32>
	affine.for %iTT = max #lb()[%L] to min #ub()[%M, %U] {
	%out = affine.apply (d0) -> (d0) (%iTT)
	}
	return
	// CHECK: affine.for %i0 = max [[LB]]()[%arg1] to min [[UB]]()[%0, %arg2] step 32 {
	// CHECK-NEXT: affine.for %i1 = [[IDENTITY]](%i0) to min [[UB_INTRA_TILE]](%0, %arg2, %i0) {
	// CHECK-NEXT: %1 = affine.apply [[IDENTITY]](%i1)
	// CHECK-NEXT: }
	// CHECK-NEXT: }
	}

	// -----

	// Cache size is set to 512 KiB. This loop nest accesses about 49 MiB, and the
	// tile sizes chosen would be 6 x 6 x 6. However, to avoid min/max, which is
	// possible here, they are adjusted to 4 x 4 x 5.

	// MODEL-LABEL: func @simple_matmul
	func @simple_matmul(%arg0: memref<8x8xvector<64xf32>>, %arg1: memref<8x8xvector<64xf32>>, %arg2: memref<8x8xvector<64xf32>>) -> memref<8x8xvector<64xf32>> {
	affine.for %i = 0 to 256 {
	affine.for %j = 0 to 256 {
	affine.for %k = 0 to 250 {
	%l = load %arg0[%i, %k] : memref<8x8xvector<64xf32>>
	%r = load %arg1[%k, %j] : memref<8x8xvector<64xf32>>
	%o = load %arg2[%i, %j] : memref<8x8xvector<64xf32>>
	%m = mulf %l, %r : vector<64xf32>
	%a = addf %o, %m : vector<64xf32>
	store %a, %arg2[%i, %j] : memref<8x8xvector<64xf32>>
	}
	}
	}
	return %arg2 : memref<8x8xvector<64xf32>>
	}
	// MODEL: affine.for %i0 = 0 to 256 step 4 {
	// MODEL-NEXT: affine.for %i1 = 0 to 256 step 4 {
	// MODEL-NEXT: affine.for %i2 = 0 to 250 step 5 {