blob: 474bfb1eef163030a476d08a673c64d98c05ca1b [file] [log] [blame]
// RUN: tf-opt %s -split-input-file -verify-diagnostics -tf-tpu-extract-head-tail-outside-compilation | FileCheck %s --dump-input-on-failure
// Tests extraction of a outside compiled ops at head of TPU computation.
module attributes {tf.versions = {producer = 888 : i32}, tf.devices = ["/job:worker/replica:0/task:0/device:CPU:0", "/job:worker/replica:0/task:0/device:TPU_SYSTEM:0", "/job:worker/replica:0/task:0/device:TPU:0"]} {
// CHECK-LABEL: func @single_head_outside_compilation
func @single_head_outside_compilation(%arg0: tensor<i32>) {
// CHECK: tf_device.launch
// CHECK: "tf.A"
// CHECK-NEXT: tf_device.return
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK: "tf.C"
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
"tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> ()
"tf.B"() : () -> ()
"tf.C"() : () -> ()
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @ops_no_operands
func @ops_no_operands() {
// CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"
// CHECK-NEXT: tf_device.return %[[A_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK-NEXT: "tf.B"(%[[LAUNCH_OUT]])
// CHECK-NEXT: "tf.C"
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
%0 = "tf.A"() {_xla_outside_compilation = "cluster1"} : () -> tensor<i32>
%1 = "tf.B"(%0) {}: (tensor<i32>) -> tensor<i32>
"tf.C"(%1) : (tensor<i32>) -> ()
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @op_operand_outside_cluster
func @op_operand_outside_cluster() {
// CHECK: %[[A_OUT:.*]] = "tf.A"
%0 = "tf.A"() : () -> tensor<i32>
// CHECK-NEXT: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[B_OUT:.*]] = "tf.B"
// CHECK-NEXT: tf_device.return %[[B_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK-NEXT: "tf.C"(%[[LAUNCH_OUT]])
// CHECK-NEXT: "tf.D"
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
%1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%2 = "tf.C"(%1) {}: (tensor<i32>) -> tensor<i32>
"tf.D"(%2) : (tensor<i32>) -> ()
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @aliased_output
func @aliased_output() -> (tensor<i32>, tensor<i32>, tensor<i32>) {
// CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"
// CHECK-NEXT: tf_device.return %[[A_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: %[[CLUSTER_OUT:.*]]:2 = "tf_device.cluster"
// CHECK-NEXT: %[[B_OUT:.*]] = "tf.B"(%[[LAUNCH_OUT]])
// CHECK-NEXT: %[[C_OUT:.*]] = "tf.C"
// CHECK-NEXT: tf_device.return %[[C_OUT]], %[[B_OUT]]
// CHECK-NEXT: {
// CHECK-DAG: num_cores_per_replica = 1
// CHECK-DAG: step_marker_location = ""
// CHECK-DAG: padding_map = []
// CHECK-DAG: topology = ""
// CHECK-DAG: device_assignment = []
//
// CHECK: return %[[LAUNCH_OUT]], %[[CLUSTER_OUT]]#0, %[[CLUSTER_OUT]]#1
%0:3 = "tf_device.cluster"() ( {
%1 = "tf.A"() {_xla_outside_compilation = "cluster1"} : () -> tensor<i32>
%2 = "tf.B"(%1) {}: (tensor<i32>) -> tensor<i32>
%3 = "tf.C"(%2) : (tensor<i32>) -> tensor<i32>
tf_device.return %1, %3, %2 : tensor<i32>, tensor<i32>, tensor<i32>
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> (tensor<i32>, tensor<i32>, tensor<i32>)
return %0#0, %0#1, %0#2 : tensor<i32>, tensor<i32>, tensor<i32>
}
// CHECK-LABEL: func @all_head_computation_ops
func @all_head_computation_ops(%arg0: tensor<i32>) -> tensor<i32> {
// CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"
// CHECK: %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]])
// CHECK: %[[C_OUT:.*]] = "tf.C"(%[[B_OUT]], %arg0)
// CHECK-NEXT: tf_device.return %[[C_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK-NEXT: tf_device.return
//
// CHECK: return %[[LAUNCH_OUT]]
%0 = "tf_device.cluster"() ( {
%1 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%2 = "tf.B"(%1) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%3 = "tf.C"(%2, %arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
tf_device.return %3 : tensor<i32>
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> tensor<i32>
return %0 : tensor<i32>
}
// CHECK-LABEL: func @multiple_head_outside_compilation
func @multiple_head_outside_compilation(%arg0: tensor<i32>) {
// CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"
// CHECK: %[[B_OUT:.*]] = "tf.B"(%[[A_OUT]])
// CHECK: "tf.C"
// CHECK-NEXT: tf_device.return %[[B_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK: "tf.D"(%[[LAUNCH_OUT]])
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
%0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
"tf.C"(%1, %arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> ()
"tf.D"(%1) : (tensor<i32>) -> ()
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @no_extraction_middle_outside_compiled_ops
func @no_extraction_middle_outside_compiled_ops(%arg0: tensor<i32>) {
// CHECK-NOT: tf_device.launch
// CHECK: "tf_device.cluster"
// CHECK-NEXT: "tf.A"
// CHECK-NEXT: "tf.B"
// CHECK-NEXT: "tf.C"
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
%0 = "tf.A"(%arg0) {} : (tensor<i32>) -> tensor<i32>
%1 = "tf.B"(%0) {_xla_outside_compilation = "cluster1"}: (tensor<i32>) -> tensor<i32>
"tf.C"(%1) : (tensor<i32>) -> ()
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @no_extraction_tpu_op_operands
func @no_extraction_tpu_op_operands(%arg0: tensor<i32>) {
// CHECK: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"
// CHECK: %[[D_OUT:.*]] = "tf.D"(%[[A_OUT]])
// CHECK-NEXT: tf_device.return %[[D_OUT]]
// CHECK: device = "/job:worker/replica:0/task:0/device:CPU:0"
//
// CHECK: "tf_device.cluster"
// CHECK: "tf.B"
// CHECK: "tf.C"
// CHECK: "tf.E"
// CHECK-NEXT: tf_device.return
"tf_device.cluster"() ( {
%0 = "tf.A"(%arg0) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%1 = "tf.B"() {} : () -> tensor<i32>
%2 = "tf.C"(%arg0, %1) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%3 = "tf.D"(%0) {_xla_outside_compilation = "cluster1"}: (tensor<i32>) -> tensor<i32>
%4 = "tf.E"(%3) {} : (tensor<i32>) -> tensor<i32>
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
return
}
// CHECK-LABEL: func @replicated_head_outside_compilation
func @replicated_head_outside_compilation(%arg0: tensor<i32>, %arg1: tensor<i32>) {
// CHECK: tf_device.replicate([%arg0, %arg1] as %[[RI:.*]]: tensor<i32>)
//
// CHECK-NEXT: %[[LAUNCH_OUT:.*]] = "tf_device.launch"()
// CHECK: %[[A_OUT:.*]] = "tf.A"(%[[RI]])
// CHECK: %[[D_OUT:.*]] = "tf.D"(%[[A_OUT]])
// CHECK-NEXT: tf_device.return %[[D_OUT]]
// CHECK: device = "TPU_REPLICATED_HOST"
//
// CHECK: "tf_device.cluster"
// CHECK: %[[B_OUT:.*]] = "tf.B"
// CHECK: "tf.C"(%[[RI]], %[[B_OUT]])
// CHECK: "tf.E"(%[[LAUNCH_OUT]])
// CHECK-NEXT: tf_device.return
tf_device.replicate([%arg0, %arg1] as %ri : tensor<i32>) {n = 2 : i32} {
"tf_device.cluster"() ( {
%0 = "tf.A"(%ri) {_xla_outside_compilation = "cluster1"} : (tensor<i32>) -> tensor<i32>
%1 = "tf.B"() {} : () -> tensor<i32>
%2 = "tf.C"(%ri, %1) {_xla_outside_compilation = "cluster1"} : (tensor<i32>, tensor<i32>) -> tensor<i32>
%3 = "tf.D"(%0) {_xla_outside_compilation = "cluster1"}: (tensor<i32>) -> tensor<i32>
%4 = "tf.E"(%3) {} : (tensor<i32>) -> tensor<i32>
tf_device.return
}) {num_cores_per_replica = 1, step_marker_location = "", padding_map = [], topology = "", device_assignment = []} : () -> ()
tf_device.return
}
return
}
}