Delete TPUOutsideCompilationCluster pass.
This is no longer needed as tpu_extract_outside_compilation correctly moves ops to host and minimizes communication without explicit clustering.
PiperOrigin-RevId: 352614148
Change-Id: I764d22a2dab4ca470c3e52a471aaff646987997b
diff --git a/tensorflow/compiler/mlir/tensorflow/BUILD b/tensorflow/compiler/mlir/tensorflow/BUILD
index 64c884e..90896d3 100644
--- a/tensorflow/compiler/mlir/tensorflow/BUILD
+++ b/tensorflow/compiler/mlir/tensorflow/BUILD
@@ -933,7 +933,6 @@
"transforms/tpu_host_computation_expansion.cc",
"transforms/tpu_identity_pruning.cc",
"transforms/tpu_merge_variables_with_execute.cc",
- "transforms/tpu_outside_compilation_cluster.cc",
"transforms/tpu_parallel_execute_sink_resource_write.cc",
"transforms/tpu_resource_partitioning.cc",
"transforms/tpu_resource_read_for_write.cc",
diff --git a/tensorflow/compiler/mlir/tensorflow/tests/tpu_outside_compilation_cluster.mlir b/tensorflow/compiler/mlir/tensorflow/tests/tpu_outside_compilation_cluster.mlir
deleted file mode 100644
index 130ed68..0000000
--- a/tensorflow/compiler/mlir/tensorflow/tests/tpu_outside_compilation_cluster.mlir
+++ /dev/null
@@ -1,558 +0,0 @@
-// RUN: tf-opt %s -tf-tpu-outside-compilation-cluster | FileCheck %s
-
-// CHECK-LABEL: func @one_cluster_no_dependencies
-func @one_cluster_no_dependencies() {
- // CHECK: "tf.opA"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "{{[a-zA-Z_0-9]+}}"
- // CHECK: "tf.opC"
- "tf_device.cluster"() ( {
- "tf.opA"() : () -> ()
- "tf.opB"() {_xla_outside_compilation = "0"} : () -> ()
- "tf.opC"() : () -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @one_cluster_with_one_op
-func @one_cluster_with_one_op() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "{{[a-zA-Z_0-9]+}}"
- // CHECK-NEXT: "tf.opC"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf.opC"(%b) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @one_cluster_with_two_ops
-func @one_cluster_with_two_ops() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER2:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER2]]"
- // CHECK-NEXT: "tf.opD"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf.opD"(%c) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @one_cluster_with_three_ops
-func @one_cluster_with_three_ops() {
- // CHECK: "tf.opA"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER3:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER3]]"
- // CHECK: "tf.opD"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER3]]"
- // CHECK: "tf.opE"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"(%b, %c) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
- "tf.opE"(%d) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_no_dependencies
-func @two_clusters_no_dependencies() {
- // CHECK: "tf.opA"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER4:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER5:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opD"
- "tf_device.cluster"() ( {
- "tf.opA"() : () -> ()
- "tf.opB"() {_xla_outside_compilation = "0"} : () -> ()
- "tf.opC"() {_xla_outside_compilation = "0"} : () -> ()
- "tf.opD"() : () -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_with_one_op_each
-func @two_clusters_with_one_op_each() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER6:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opC"
- // CHECK-NEXT: "tf.opD"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER6]]"
- // CHECK-NEXT: "tf.opE"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"(%c) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf.opE"(%d) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_with_two_ops_each
-func @two_clusters_with_two_ops_each() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER8:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER8]]"
- // CHECK-NEXT: "tf.opD"
- // CHECK-NEXT: "tf.opE"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER8]]"
- // CHECK-NEXT: "tf.opF"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER8]]"
- // CHECK-NEXT: "tf.opG"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"(%c) : (tensor<i32>) -> tensor<i32>
- %e = "tf.opE"(%d) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf.opG"(%f) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_transitive_data_dependency
-func @two_clusters_transitive_data_dependency() {
- // CHECK: "tf.opA"
- // CHECK: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.Identity"
- // CHECK: "tf.AddV2"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- "tf.opA"() : () -> ()
- %1 = "tf.Const"() {_xla_outside_compilation = "0", value = dense<1.0> : tensor<f32>} : () -> (tensor<f32>)
- %2 = "tf.Identity"(%1) : (tensor<f32>) -> (tensor<f32>)
- "tf.AddV2"(%1, %2) {_xla_outside_compilation = "0"} : (tensor<f32>, tensor<f32>) -> (tensor<f32>)
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @resource_side_effect_cycle
-func @resource_side_effect_cycle(%arg0: tensor<!tf.resource<tensor<f32>>>, %arg1: tensor<!tf.resource<tensor<f32>>>) {
- // CHECK: "tf.ReadVariableOp"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.Identity"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK-NEXT: "tf.AssignVariableOp"
- // CHECK-NOT: {_xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- %read0 = "tf.ReadVariableOp"(%arg0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
- %idet0 = "tf.Identity"(%read0) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
- "tf.AssignVariableOp"(%arg1, %idet0) : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
- %read1 = "tf.ReadVariableOp"(%arg1) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>) -> tensor<f32>
- %idet1 = "tf.Identity"(%read1) {_xla_outside_compilation = "0"} : (tensor<f32>) -> tensor<f32>
- %add0 = "tf.AddV2"(%idet0, %idet1) {_xla_outside_compilation = "0"} : (tensor<f32>, tensor<f32>) -> tensor<f32>
- "tf.AssignVariableOp"(%arg0, %add0) {_xla_outside_compilation = "0"} : (tensor<!tf.resource<tensor<f32>>>, tensor<f32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_with_same_parent
-func @two_clusters_with_same_parent() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER10:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opB"
- // CHECK-NEXT: "tf.opC"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER10]]"
- // CHECK-NEXT: "tf.opD"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER12:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opE"
- // CHECK-NEXT: "tf.opF"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER12]]"
- // CHECK-NEXT: "tf.opG"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %b = "tf.opB"(%a) : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %e = "tf.opE"(%d) : (tensor<i32>) -> tensor<i32>
- %f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %g = "tf.opG"(%c, %f) : (tensor<i32>, tensor<i32>) -> tensor<i32>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_with_same_outside_compiled_parent
-func @two_clusters_with_same_outside_compiled_parent() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER12:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opB"
- // CHECK-NEXT: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opD"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER13]]"
- // CHECK-NEXT: "tf.Identity"
- // CHECK-NEXT: "tf.opF"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER13]]"
- // CHECK-NEXT: "tf.opG"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER13]]"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %b = "tf.opB"(%a) : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %e = "tf.Identity"(%d) : (tensor<i32>) -> tensor<i32>
- %f = "tf.opF"(%e) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %g = "tf.opG"(%c, %f) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @parent_with_a_non_outside_compiled_child
-func @parent_with_a_non_outside_compiled_child() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER14:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER14]]"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %c = "tf.opC"(%a, %b) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @outside_compile_with_block
-func @outside_compile_with_block() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER15]]"
- // CHECK: "tf.opC"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER15]]"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf_device.cluster" () ( {
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- %c = "tf.opC"(%b) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @two_clusters_with_one_op_each_with_indirect_dependency
-func @two_clusters_with_one_op_each_with_indirect_dependency() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.opC"
- // CHECK-NEXT: "tf.opD"
- // CHECK-NEXT: "tf.opE"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER16]]"
- // CHECK-NEXT: "tf.opF"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.opC"(%b) : (tensor<i32>) -> tensor<i32>
- %d = "tf.opD"(%c) : (tensor<i32>) -> tensor<i32>
- %e = "tf.opE"(%d) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- "tf.opF"(%e) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @check_ops_with_data_dependency_added_as_host_cluster
-func @check_ops_with_data_dependency_added_as_host_cluster() {
- // CHECK: "tf.opA"
- // CHECK-NEXT: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER16:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.Identity"
- // CHECK-NEXT: "tf.Identity"
- // CHECK-NEXT: "tf.opE"
- // CHECK-NOT: _xla_outside_compilation = "[[CLUSTER16]]"
- // CHECK-NEXT: "tf.opF"
- "tf_device.cluster"() ( {
- %a = "tf.opA"() : () -> tensor<i32>
- %b = "tf.opB"(%a) {_xla_outside_compilation = "0"} : (tensor<i32>) -> tensor<i32>
- %c = "tf.Identity"(%b) : (tensor<i32>) -> tensor<i32>
- %d = "tf.Identity"(%c) : (tensor<i32>) -> tensor<i32>
- %e = "tf.opE"(%d, %b, %c) {_xla_outside_compilation = "0"} : (tensor<i32>, tensor<i32>, tensor<i32>) -> tensor<i32>
- "tf.opF"(%e) : (tensor<i32>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @check_op_inside_nested_region_clustered
-func @check_op_inside_nested_region_clustered(%arg0 : tensor<*x!tf.resource>) {
- // CHECK: tf_device.cluster
- // CHECK: "tf.IfRegion"
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.B"
- // CHECK-NEXT: "tf.C"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK-NEXT: "tf.WriteSummary"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- "tf_device.cluster"() ( {
- %0 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- "tf.IfRegion"(%0) ( {
- %1 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %2 = "tf.B"() : () -> (tensor<i64>)
- %3 = "tf.C"() : () -> (tensor<f32>)
- %4 = "tf.Const"() {_xla_outside_compilation = "auto0", value = dense<"logits"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- %5 = "tf.Const"() {_xla_outside_compilation = "auto1", value = dense<"\0A\09\0A\07scalars"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- "tf.WriteSummary"(%arg0, %2, %3, %4, %5) {_xla_outside_compilation = "auto2", device = "/device:CPU:0"} : (tensor<*x!tf.resource>, tensor<i64>, tensor<f32>, tensor<!tf.string>, tensor<!tf.string>) -> ()
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }, {
- %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }) { is_stateless = true } : (tensor<i1>) -> tensor<i1>
-
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @check_ops_inside_different_block_clustered
-func @check_ops_inside_different_block_clustered(%arg0 : tensor<*x!tf.resource>) {
- // CHECK: tf_device.cluster
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.B"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.C"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER18:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.IfRegion"
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK-NEXT: "tf.WriteSummary"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK: "tf.Const"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER18]]"
- // CHECK-NEXT: "tf.D"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER18]]"
- "tf_device.cluster"() ( {
- %0 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %2 = "tf.B"() {_xla_outside_compilation = "auto1"} : () -> (tensor<i64>)
- %3 = "tf.C"() {_xla_outside_compilation = "auto2"} : () -> (tensor<f32>)
- "tf.IfRegion"(%0) ( {
- %1 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %4 = "tf.Const"() {_xla_outside_compilation = "auto3", value = dense<"logits"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- %5 = "tf.Const"() {_xla_outside_compilation = "auto4", value = dense<"\0A\09\0A\07scalars"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- "tf.WriteSummary"(%arg0, %2, %3, %4, %5) {_xla_outside_compilation = "auto2", device = "/device:CPU:0"} : (tensor<*x!tf.resource>, tensor<i64>, tensor<f32>, tensor<!tf.string>, tensor<!tf.string>) -> ()
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }, {
- %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
- %4 = "tf.Const"() {_xla_outside_compilation = "auto5", value = dense<"a"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- "tf.D"(%3, %4, %1) {_xla_outside_compilation = "auto6"} : (tensor<f32>, tensor<!tf.string>, tensor<i1>) -> ()
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }) { is_stateless = true } : (tensor<i1>) -> tensor<i1>
-
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @check_clustering_ops_inside_nested_control_flow
-func @check_clustering_ops_inside_nested_control_flow(%arg0 : tensor<*x!tf.resource>) {
- // CHECK: tf_device.cluster
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.B"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17:[a-zA-Z_0-9]+]]"
- // CHECK-NEXT: "tf.C"
- // CHECK: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK: "tf.IfRegion"
- // CHECK: "tf.IfRegion"
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK-NEXT: "tf.Const"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- // CHECK-NEXT: "tf.WriteSummary"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER17]]"
- "tf_device.cluster"() ( {
- %0 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %2 = "tf.B"() {_xla_outside_compilation = "auto1"} : () -> (tensor<i64>)
- %3 = "tf.C"() {_xla_outside_compilation = "auto2"} : () -> (tensor<f32>)
- "tf.IfRegion"(%0) ( {
- %6 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- "tf.IfRegion"(%6) ( {
- %1 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %4 = "tf.Const"() {_xla_outside_compilation = "auto3", value = dense<"logits"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- %5 = "tf.Const"() {_xla_outside_compilation = "auto4", value = dense<"\0A\09\0A\07scalars"> : tensor<!tf.string>} : () -> tensor<!tf.string>
- "tf.WriteSummary"(%arg0, %2, %3, %4, %5) {_xla_outside_compilation = "auto2", device = "/device:CPU:0"} : (tensor<*x!tf.resource>, tensor<i64>, tensor<f32>, tensor<!tf.string>, tensor<!tf.string>) -> ()
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }, {
- %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }) { is_stateless = true } : (tensor<i1>) -> tensor<i1>
- "tf.Yield"(%6) : (tensor<i1>) -> ()
- }, {
- %7 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
- "tf.Yield"(%7) : (tensor<i1>) -> ()
- }) { is_stateless = true } : (tensor<i1>) -> tensor<i1>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @single_variant_input
-func @single_variant_input() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK: "tf.opC"
- "tf_device.cluster"() ( {
- %1= "tf.opA"() : () -> tensor<!tf.variant<tensor<f32>>>
- "tf.opB"(%1) {_xla_outside_compilation = "0"} : (tensor<!tf.variant<tensor<f32>>>) -> ()
- "tf.opC"() : () -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @chained_variant_input
-func @chained_variant_input() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- %1 = "tf.opA"() : () -> tensor<!tf.variant<tensor<f32>>>
- %2 = "tf.opB"(%1) : (tensor<!tf.variant<tensor<f32>>>) -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.opC"(%2) {_xla_outside_compilation = "0"} : (tensor<!tf.variant<tensor<f32>>>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @single_variant_output
-func @single_variant_output() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK: "tf.opC"
- "tf_device.cluster"() ( {
- %1= "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<!tf.variant<tensor<f32>>>
- "tf.opB"(%1) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- "tf.opC"() : () -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @chained_variant_output
-func @chained_variant_output() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- %1 = "tf.opA"() {_xla_outside_compilation = "0"} : () -> tensor<!tf.variant<tensor<f32>>>
- %2 = "tf.opB"(%1) : (tensor<!tf.variant<tensor<f32>>>) -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.opC"(%2) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @variant_input_output
-func @variant_input_output() {
- // CHECK: "tf.opA"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.opB"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- // CHECK: "tf.opC"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- %1 = "tf.opA"() : () -> tensor<!tf.variant<tensor<f32>>>
- %2 = "tf.opB"(%1) {_xla_outside_compilation = "0"} : (tensor<!tf.variant<tensor<f32>>>) -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.opC"(%2) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @variant_input_nested
-func @variant_input_nested(%arg0 : tensor<*x!tf.resource>) {
- // CHECK: tf_device.cluster
- // CHECK-NEXT: "tf.Const"
- // CHECK-NEXT: "tf.C"
- // CHECK-SAME: _xla_outside_compilation = "[[CLUSTER1:[a-zA-Z_0-9]+]]"
- // CHECK: "tf.IfRegion"
- // CHECK: "tf.opD"
- // CHECK: _xla_outside_compilation = "[[CLUSTER1]]"
- "tf_device.cluster"() ( {
- %0 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %2 = "tf.C"() {_xla_outside_compilation = "auto0"} : () -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.IfRegion"(%0) ( {
- %1 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- "tf.opD"(%2) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }, {
- %1 = "tf.Const"() {value = dense<false> : tensor<i1>} : () -> tensor<i1>
- "tf.Yield"(%1) : (tensor<i1>) -> ()
- }) { is_stateless = true, _xla_outside_compilation = "auto1" } : (tensor<i1>) -> tensor<i1>
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
-
-// CHECK-LABEL: func @variant_output_nested
-func @variant_output_nested(%arg0 : tensor<*x!tf.resource>) {
- // CHECK: tf_device.cluster
- // CHECK: "tf.IfRegion"
- // CHECK: "tf.C"
- // CHECK-NOT: _xla_outside_compilation
- // CHECK: "tf.D"
- // CHECK-NOT: _xla_outside_compilation
- // CHECK: "tf.Yield"
- // CHECK: _xla_outside_compilation
- "tf_device.cluster"() ( {
- %0 = "tf.Const"() {value = dense<true> : tensor<i1>} : () -> tensor<i1>
- %1 = "tf.IfRegion"(%0) ( {
- %2 = "tf.C"() : () -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.Yield"(%2) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- }, {
- %2 = "tf.D"() : () -> (tensor<!tf.variant<tensor<f32>>>)
- "tf.Yield"(%2) : (tensor<!tf.variant<tensor<f32>>>) -> ()
- }) { is_stateless = true, _xla_outside_compilation = "auto1" } : (tensor<i1>) -> tensor<!tf.variant<tensor<f32>>>
- "tf.E"(%1) {_xla_outside_compilation = "auto0"} : (tensor<!tf.variant<tensor<f32>>>) -> ()
- tf_device.return
- }) {cluster_attr = "cluster_attr"} : () -> ()
- return
-}
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
index f385249..de2d13e 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/bridge.cc
@@ -107,7 +107,6 @@
pm.addNestedPass<FuncOp>(createCSEPass());
pm.addPass(TFDevice::CreateMarkOpsForOutsideCompilationPass());
pm.addPass(CreateTPUExtractHeadTailOutsideCompilationPass());
- pm.addPass(CreateTPUOutsideCompilationClusterPass());
pm.addPass(CreateTPUExtractOutsideCompilationPass());
pm.addNestedPass<FuncOp>(TFDevice::CreateClusterConstantSinkingPass());
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
index c9dc66b..f0e2b84 100644
--- a/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
+++ b/tensorflow/compiler/mlir/tensorflow/transforms/passes.h
@@ -370,11 +370,6 @@
std::unique_ptr<OperationPass<ModuleOp>>
CreateOutsideCompiledToHostLaunchPass();
-// Creates a pass that groups outside compiled operations (CPU ops inside TPU
-// cluster) into clusters that can be extracted and run on the CPU.
-std::unique_ptr<OperationPass<ModuleOp>>
-CreateTPUOutsideCompilationClusterPass();
-
// Creates a pass that extracts outside compilation (CPU ops inside TPU cluster)
// at head/tail of TPU cluster to run before/after TPU computation.
std::unique_ptr<OperationPass<ModuleOp>>
diff --git a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_outside_compilation_cluster.cc b/tensorflow/compiler/mlir/tensorflow/transforms/tpu_outside_compilation_cluster.cc
deleted file mode 100644
index 558b877..0000000
--- a/tensorflow/compiler/mlir/tensorflow/transforms/tpu_outside_compilation_cluster.cc
+++ /dev/null
@@ -1,221 +0,0 @@
-/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/FormatVariadic.h"
-#include "mlir/IR/Attributes.h" // from @llvm-project
-#include "mlir/IR/Operation.h" // from @llvm-project
-#include "mlir/IR/Types.h" // from @llvm-project
-#include "mlir/Support/LLVM.h" // from @llvm-project
-#include "mlir/Support/LogicalResult.h" // from @llvm-project
-#include "tensorflow/compiler/mlir/tensorflow/analysis/side_effect_analysis.h"
-#include "tensorflow/compiler/mlir/tensorflow/ir/tf_device.h"
-#include "tensorflow/compiler/mlir/tensorflow/transforms/passes.h"
-
-namespace mlir {
-namespace TFTPU {
-
-namespace {
-
-constexpr char kXlaOutsideCompilationAttr[] = "_xla_outside_compilation";
-
-struct TPUOutsideCompilationCluster
- : public TF::PerFunctionAggregateAnalysisConsumerPass<
- TPUOutsideCompilationCluster, TF::SideEffectAnalysis> {
- void runOnFunction(FuncOp func,
- const TF::SideEffectAnalysis::Info& side_effect_analysis);
-};
-
-bool IsVariant(Value value) {
- return getElementTypeOrSelf(value.getType()).isa<TF::VariantType>();
-}
-
-bool HasOutsideCompiledAncestor(Operation* op) {
- Operation* parent = op->getParentOp();
- while (parent) {
- if (parent->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr))
- return true;
- parent = parent->getParentOp();
- }
- return false;
-}
-
-// Represents an outside compiled cluster. All ops that are added to the same
-// cluster will be extracted together in a later pass.
-class OutsideCompiledCluster {
- public:
- explicit OutsideCompiledCluster(int number)
- : cluster_name_(llvm::formatv("cluster{0}", number).str()) {}
-
- // Attempts to add an op to this cluster. Ops can be grouped to the same
- // cluster if they have data dependency and are inside the same block.
- bool AddOp(Operation* op,
- const TF::SideEffectAnalysis::Info& side_effect_analysis) {
- // Check if the op is safe to add before adding it.
- if (IsSafeToAdd(op, side_effect_analysis)) {
- op->setAttr(kXlaOutsideCompilationAttr,
- StringAttr::get(cluster_name_, op->getContext()));
- host_cluster_ops_.insert(op);
- return true;
- }
- return false;
- }
-
- // If any tf.variants are inputs/outputs to the cluster, add them to the
- // cluster unless they are already marks with outside compilation attribute.
- bool AddVariantInputsOutputs() {
- bool added_op = false;
- llvm::SmallPtrSet<Operation*, 8> expanded_cluster_ops(host_cluster_ops_);
- for (Operation* cluster_op : host_cluster_ops_) {
- // Walk the clustered operations to handle nested ops.
- cluster_op->walk([&](Operation* op) {
- // Add any operations that provide variant inputs to the cluster.
- for (auto value : op->getOperands()) {
- auto input_defining_op = value.getDefiningOp();
- if (IsVariant(value) && input_defining_op &&
- !HasOutsideCompiledAncestor(input_defining_op) &&
- !input_defining_op->getAttrOfType<StringAttr>(
- kXlaOutsideCompilationAttr)) {
- expanded_cluster_ops.insert(input_defining_op);
- input_defining_op->setAttr(
- kXlaOutsideCompilationAttr,
- StringAttr::get(cluster_name_,
- input_defining_op->getContext()));
- added_op = true;
- }
- }
- // Add any operations that consume variant outputs to the cluster.
- for (auto value : op->getResults()) {
- if (IsVariant(value)) {
- for (auto user : value.getUsers()) {
- if (!host_cluster_ops_.contains(user) &&
- !HasOutsideCompiledAncestor(user) &&
- !user->getAttrOfType<StringAttr>(
- kXlaOutsideCompilationAttr)) {
- expanded_cluster_ops.insert(user);
- user->setAttr(
- kXlaOutsideCompilationAttr,
- StringAttr::get(cluster_name_, user->getContext()));
- added_op = true;
- }
- }
- }
- }
- });
- }
- host_cluster_ops_.swap(expanded_cluster_ops);
-
- return added_op;
- }
-
- private:
- // TODO(hinsu): Consider using GraphCycles data structure available in xla
- // directory to avoid potentially full traversal for each new op and cluster
- // pair.
- // Checks if it is safe for `op` to be merged into this cluster.
- bool IsSafeToAdd(Operation* op,
- const TF::SideEffectAnalysis::Info& side_effect_analysis) {
- if (host_cluster_ops_.empty()) return true;
-
- // If there is an intermediate data or side effect dependency between the op
- // and ops in the cluster, it's not safe to add.
- std::vector<Operation*> dependencies;
-
- // Materialize data dependencies as the llvm::concat doesn't support
- // non-materialized iteration.
- auto data_deps = llvm::to_vector<4>(op->getUsers());
- llvm::SmallVector<Operation*, 4> control_deps =
- side_effect_analysis.DirectControlSuccessors(op);
- for (auto* dep : llvm::concat<Operation*>(data_deps, control_deps)) {
- if (!host_cluster_ops_.contains(dep)) dependencies.push_back(dep);
- }
-
- llvm::SmallPtrSet<Operation*, 4> visited;
- while (!dependencies.empty()) {
- Operation* next_op = dependencies.back();
- dependencies.pop_back();
- if (visited.count(next_op)) continue;
- visited.insert(next_op);
-
- auto data_deps = llvm::to_vector<4>(next_op->getUsers());
- llvm::SmallVector<Operation*, 4> control_deps =
- side_effect_analysis.DirectControlSuccessors(next_op);
- for (auto* dep : llvm::concat<Operation*>(data_deps, control_deps)) {
- if (host_cluster_ops_.contains(dep)) return false;
- dependencies.push_back(dep);
- }
- }
-
- return true;
- }
-
- // `host_cluster_op_` stores a set of ops that will be grouped and computed
- // on host as single XlaHostCompute op. An outside compiled op can be grouped
- // to a single cluster if it has data dependency to another op already in the
- // cluster.
- llvm::SmallPtrSet<Operation*, 8> host_cluster_ops_;
- std::string cluster_name_;
-};
-
-void TPUOutsideCompilationCluster::runOnFunction(
- FuncOp func, const TF::SideEffectAnalysis::Info& side_effect_analysis) {
- llvm::SmallVector<OutsideCompiledCluster, 8> clusters;
- int cluster_counter = 0;
-
- func.walk([&](tf_device::ClusterOp tpu_cluster) {
- llvm::SmallVector<Operation*, 4> outside_ops;
- tpu_cluster.walk([&](Operation* op) {
- if (op->getAttrOfType<StringAttr>(kXlaOutsideCompilationAttr))
- outside_ops.emplace_back(op);
- });
-
- // In order to cluster ops feeding results to the same operation, traverse
- // the ops in reverse order.
- for (Operation* op : llvm::reverse(outside_ops)) {
- // Try to add the op to existing clusters.
- bool added = false;
- for (auto& cluster : clusters)
- if ((added = cluster.AddOp(op, side_effect_analysis))) break;
-
- // If the op cannot be added to existing clusters, create a new cluster.
- if (!added) {
- OutsideCompiledCluster new_cluster(cluster_counter++);
- new_cluster.AddOp(op, side_effect_analysis);
- clusters.push_back(new_cluster);
- }
- }
- });
- for (auto& cluster : clusters) {
- bool variants_to_add = true;
- while (variants_to_add) variants_to_add = cluster.AddVariantInputsOutputs();
- }
-}
-
-} // anonymous namespace
-
-std::unique_ptr<OperationPass<ModuleOp>>
-CreateTPUOutsideCompilationClusterPass() {
- return std::make_unique<TPUOutsideCompilationCluster>();
-}
-
-static PassRegistration<TPUOutsideCompilationCluster> pass(
- "tf-tpu-outside-compilation-cluster",
- "Identifies clusters of operations assigned to outside compilation");
-
-} // namespace TFTPU
-} // namespace mlir