tensorflow/compiler/mlir/tensorflow/ir/tf_device_ops.td - platform/external/tensorflow - Git at Google

 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 // This is the definition file for the TensorFlow Device Dialect.

 #ifndef TF_DEVICE_DIALECT
 #define TF_DEVICE_DIALECT

 include "mlir/IR/OpBase.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"

 //===----------------------------------------------------------------------===//
 // TensorFlow Device Dialect definitions
 //===----------------------------------------------------------------------===//

 def TfDevice_Dialect : Dialect {
   let name = "tf_device";

   let description = [{
     The TensorFlow Device dialect.

     This dialect contains operations to describe/launch computations on devices.
     These operations do not map 1-1 to TensorFlow ops and requires a lowering
     pass later to transform them into Compile/Run op pairs, like XlaCompile and
     XlaRun.

   let emitAccessorPrefix = kEmitAccessorPrefix_Raw;
 }];

   let cppNamespace = "::mlir::tf_device";
 }

 //===----------------------------------------------------------------------===//
 // TensorFlow Device Dialect Ops definitions
 //===----------------------------------------------------------------------===//

 // Base class for the operation in this dialect.
 class TfDevice_Op<string mnemonic, list<Trait> traits = []> :
     Op<TfDevice_Dialect, mnemonic, traits> { }

 def TfDevice_LaunchOp : TfDevice_Op<"launch",
     [SingleBlockImplicitTerminator<"ReturnOp">]> {
   let summary = [{
 The `tf_device.launch` op launches containing operations on target device.
   }];

   let description = [{
 This op captures all needed live-in values.
   }];

   let arguments = (ins
     StrAttr:$device
   );

   let results = (outs
     Variadic<AnyType>:$results
   );

   let regions = (region SizedRegion<1>:$body);

   let extraClassDeclaration = [{
     Block &GetBody() { return getOperation()->getRegion(0).front(); }
     StringRef getDevice() { return device(); }
     bool WrapsSingleOp();
   }];

   let builders = [
     OpBuilder<(ins "StringAttr":$device, "TypeRange":$result_types),
     [{
       $_state.addAttribute("device", device);
       $_state.addTypes(result_types);
       $_state.addRegion();
     }]>
   ];

   let hasCanonicalizer = 1;
 }

 def TfDevice_ReturnOp : TfDevice_Op<"return", [NoSideEffect, ReturnLike, Terminator]> {
   let summary = [{
 The `tf_device.return` operation terminates and returns values from a
 `tf_device` dialect operation.
   }];

   let arguments = (ins
     Variadic<AnyType>:$results
   );

   let builders = [
     OpBuilder<(ins),
     [{
       build($_builder, $_state, {});
     }]>
    ];

   let assemblyFormat = "attr-dict ($results^ `:` type($results))?";
 }

 def TfDevice_LaunchFuncOp : TfDevice_Op<"launch_func", []> {
   let summary = [{
     The `tf_device.launch_func` launches a function on target device.
   }];

   let arguments = (ins
     StrAttr:$device,
     FlatSymbolRefAttr:$func,
     Variadic<AnyType>:$operands);

   let results = (outs
     Variadic<AnyType>:$results
   );

   let extraClassDeclaration = [{
     StringRef getFunc() { return func(); }
     StringRef getDevice() { return device(); }
   }];
 }

 def TfDevice_ParallelExecuteOp : TfDevice_Op<"parallel_execute",
     [SingleBlockImplicitTerminator<"ReturnOp">]> {
   let description = [{
     ParallelExecute op concurrently executes variadic number of regions. Regions
     must represent separate sets of instructions to execute concurrently. In
     order to represent concurrently executed regions with dependencies, multiple
     ParallelExecute ops can be used instead. As so, regions within
     ParallelExecute op must not have control/data dependencies.

     While explicit dependencies between regions are disallowed, ParallelExecute
     op does not prevent implicit communication between regions (e.g.
     communication via send/recvs). In this case, users of ParallelExecute op
     must provide correct control dependencies between regions to guarantee
     correctness. Regions in ParallelExecute may include Resource ops.

     In the case where different regions include ops access the same resource,
     the users of the ParallelExecute op must provide mechanism (via send/recvs
     or via control dependencies) to guarantee correct ordering. Sequential
     ordering of ops within a region is guaranteed. Also, sequential ordering of
     ops before/after ParallelExecute ops are guaranteed. That is, execution of
     regions inside ParallelExecute op is blocked until all inputs to all regions
     are materialized and ops following ParallelExecute op are blocked until all
     regions are executed.
   }];

   let results = (outs
     Variadic<AnyType>:$execute_outputs
   );

   let regions = (region VariadicRegion<SizedRegion<1>>:$regions);

   let extraClassDeclaration = [{
     Block& GetRegionBlockWithIndex(unsigned index);
     Operation::result_range GetRegionOutputs(unsigned region_index);

     // Checks if a tf_device.parallel_execute index'th region block wraps a
     // single operation and the single operation results are perfectly forwarded
     // to the region block's return.
     bool RegionWrapsSingleOp(unsigned index);
   }];

   let builders = [
     OpBuilder<(ins "int":$num_regions, "TypeRange":$output_types)>,
   ];

   let hasVerifier = 1;
 }

 def TfDevice_ReplicateOp : TfDevice_Op<"replicate",
     [SingleBlockImplicitTerminator<"ReturnOp">, AttrSizedOperandSegments]> {
   let summary = "Wraps an N-way replicated computation.";

   let description = [{
 The region held by this operation represents a computation that is replicated
 across multiple devices. The number of replications is based on the `n`
 attribute. Explicit devices can be populated in the `devices` attribute, and it
 must be a mapping of device alias to list of explicit or aliased device names
 from the outer scope. The device name map specifies devices on which replicated
 ops inside tf_device.replicate will be executed.

 A tf_device.parallel_execute inside the tf_device.replicate op region may be
 used to represent computations across a larger set of devices. In that case, the
 device alias can be used to specify device assignment and replication of each
 concurrent execution (i.e. region) defined by tf_device.parallel_execute op.
 The size of each value list in the device name map must match `n`. Within a
 replica, the execution semantics follow standard sequential behavior. Ops in the
 tf_device.replicate wrapped with a tf_device.launch will have its device set to
 the associated replicated device from `devices` if the tf_device.launch refers
 to an aliased device name. Otherwise the device already set in tf_device.launch
 is used instead.

 Operands are replicated inputs and packed inputs.

 replicated_inputs: each group of `n` inputs corresponds to an input for a single
 individual replica and is mapped to a single region argument. Inside one group
 the operands are matching in order the `devices` attribute. Each replicated
 input must have compatible shapes and types.
 packed_inputs: each input corresponds to an input broadcasted across all
 replicas and is mapped to a single region argument.

 Operands not replicated can be implicitly captured by ops in the region. Results
 are replicated each from the regions terminator.

 For example:
 ```
 %0 = "tf.opA"() : () -> tensor<i32>
 %1 = "tf.opB"() : () -> tensor<i32>
 %2 = "tf.opC"() : () -> tensor<f32>
 %3 = "tf.opD"() : () -> tensor<f32>
 %4 = "tf.opE"() : () -> tensor<!tf_type.resource>
 %5 = "tf.opF"() : () -> tensor<!tf_type.resource>
 %6 = "tf.opG"() : () -> tensor<!tf_type.string>
 %7 = "tf.opH"() : () -> tensor<!tf_type.string>
 %8 = "tf.opI"() : () -> tensor<!tf_type.variant>
 %9 = "tf.opJ"() : () -> tensor<i1>
 %output:8 = tf_device.replicate([%0, %1] as %input_0: tensor<i32>,
                                 [%2, %3] as %input_1: tensor<f32>,
                                 [%4, %5] as %input_2: tensor<!tf_type.resource>,
                                 [%6, %7] as %input_3: tensor<!tf_type.string>,
                                 %8 as %input_4: tensor<!tf_type.variant>)
                 {n = 2 : i32,
                  devices = {DEVICE_ALIAS_0 = ["/DEVICE:0", "/DEVICE:1"],
                             DEVICE_ALIAS_1 = ["/DEVICE:2", "/DEVICE:3"]}} {
   // Inside the region, %0, %2, %4, and %6 corresponds to
   // "/DEVICE:0"/"/DEVICE:2" and %1, %3, %5, and %7 corresponds to
   // "/DEVICE:1"/"/DEVICE:3", depending on which device alias is used.
   %k = "tf_device.launch"() ( {
     %9 = "tf.opK"(%input_0, %input_4, %9) :
       (tensor<i32>, tensor<!tf_type.variant>, tensor<i1>) -> tensor<i32>
     tf_device.return %9 : tensor<i32>
   }) {device = "DEVICE_ALIAS_0"} : () -> tensor<i32>
   %l = "tf_device.launch"() ( {
     %10 = "tf.opL"(%input_1, %input_4, %9) :
       (tensor<f32>, tensor<!tf_type.variant>, tensor<i1>) -> tensor<f32>
     tf_device.return %10 : tensor<f32>
   }) {device = "DEVICE_ALIAS_1"} : () -> tensor<f32>
   %m = "tf_device.launch"() ( {
     %11 = "tf.opM"(%input_2, %input_4, %9) :
       (tensor<!tf_type.resource>, tensor<!tf_type.variant>, tensor<i1>)
         -> tensor<!tf_type.resource>
     tf_device.return %11 : tensor<!tf_type.resource>
   }) {device = "/DEVICE:4"} : () -> tensor<f32>
   %n = "tf.opN"(%input_3, %input_4, %9) :
     (tensor<!tf_type.string>, tensor<!tf_type.variant>, tensor<i1>)
       -> tensor<!tf_type.string>
   tf_device.return %k, %l, %m, %n :
     tensor<i32>, tensor<f32>, tensor<!tf_type.resource>, tensor<!tf_type.string>
 }
 // %output#0 corresponds to %k returned from "/DEVICE:0"
 // %output#1 corresponds to %k returned from "/DEVICE:1"
 // %output#2 corresponds to %l returned from "/DEVICE:2"
 // %output#3 corresponds to %l returned from "/DEVICE:3"
 // %output#4, %output#5 corresponds to %m and will be returned from "/DEVICE:4"
 // %output#6, %output#7 corresponds to %n and will have no device set
 ```
   }];

   let arguments = (ins
     Variadic<AnyType>:$replicated_inputs,
     Variadic<AnyType>:$packed_inputs,

     I32ElementsAttr:$operand_segment_sizes,
     Confined<I32Attr, [IntMinValue<2>]>:$n,
     OptionalAttr<DictionaryAttr>:$devices
   );

   let results = (outs
     Variadic<AnyType>:$replicated_outputs
   );

   let regions = (region SizedRegion<1>:$body);

   let extraClassDeclaration = [{
     Block &GetBody() { return getOperation()->getRegion(0).front(); }
     unsigned GetNumReplicatedBlockArguments();
     unsigned GetNumPackedBlockArguments();
     llvm::ArrayRef<BlockArgument> GetPackedBlockArguments();
     llvm::ArrayRef<BlockArgument> GetReplicatedBlockArguments();
     bool IsReplicatedBlockArgument(BlockArgument block_arg);
     bool IsPackedBlockArgument(BlockArgument block_arg);
     unsigned GetReplicaOperandIndexForBlockArgument(BlockArgument block_arg, unsigned replica);
     Value GetReplicaOperandForBlockArgument(BlockArgument block_arg, unsigned replica);
     MutableArrayRef<OpOperand> GetOperandsForBlockArgument(BlockArgument block_arg);
     bool WrapsSingleOp();
   }];

   let builders = [
     OpBuilder<(ins "int":$n,
       "const llvm::SmallDenseMap<StringRef, llvm::SmallVector<StringRef, 4>>&":$devices,
       "llvm::ArrayRef<std::pair<ValueRange, Type>>":$replicated_inputs,
       "ValueRange":$packed_inputs, "TypeRange":$replica_output_types)>,
     OpBuilder<(ins "int":$n, "llvm::Optional<DictionaryAttr>":$devices,
       "llvm::ArrayRef<std::pair<ValueRange, Type>>":$replicated_inputs,
       "ValueRange":$packed_inputs, "TypeRange":$replica_output_types)>,
   ];

   let hasCustomAssemblyFormat = 1;
   let hasVerifier = 1;
 }

 def TfDevice_ClusterOp : TfDevice_Op<"cluster",
     [SingleBlockImplicitTerminator<"ReturnOp">]> {
   let summary = [{
 The `tf_device.cluster` op wraps containing operations in a region.
   }];

   let description = [{
 This op can be used to group operations, and captures all needed live-in values.

 Optional policy attribute allows to tag clusters with a policy name that was
 used to form the cluster.
   }];

   let arguments = (ins OptionalAttr<StrAttr>:$policy);

   let results = (outs
     Variadic<AnyType>:$results
   );

   let regions = (region SizedRegion<1>:$body);

   let builders = [
     OpBuilder<(ins "TypeRange":$resultTypes),
     [{
       build($_builder, $_state, resultTypes, mlir::StringAttr {});
     }]>
   ];

   let extraClassDeclaration = [{
     Block &GetBody() { return getOperation()->getRegion(0).front(); }
   }];

   let hasCanonicalizer = 1;
 }

 def TfDevice_ClusterFuncOp : TfDevice_Op<"cluster_func", []> {
   let summary = [{
 The `tf_device.cluster_func` launches a function containing the body of a
 cluster.
   }];

   let description = [{
 This op is used for outlining a cluster.
   }];

   let arguments = (ins
     FlatSymbolRefAttr:$func,
     Variadic<AnyType>:$operands
   );

   let results = (outs
     Variadic<AnyType>:$results
   );

   let extraClassDeclaration = [{
     // returns the function that this operation will launch.
     func::FuncOp getFunc() {
       return SymbolTable::lookupNearestSymbolFrom<func::FuncOp>(*this, funcAttr());
     }
   }];
 }

 def TfDevice_RemoteRunOp : TfDevice_Op<"remote_run",
     [SingleBlockImplicitTerminator<"ReturnOp">]> {
   let summary = [{
 The `tf_device.remote_run` op launches the containing operations on a specific
 host.
   }];

   let description = [{
 This op captures all needed live-in values.
   }];

   let arguments = (ins
     StrAttr:$host,
     FlatSymbolRefAttr:$callee,
     Variadic<AnyType>:$callee_args
   );

   let results = (outs
     Variadic<AnyType>:$results
   );

   let assemblyFormat = [{
       $host $callee `(` $callee_args `)` attr-dict `:` functional-type ( $callee_args , $results )
   }];
 }

 def TfDevice_SendOp : TfDevice_Op<"send", []> {
   let summary = "Send a value to a host.";

   let description = [{
     Send the value to the given host with the given rendezvous key.
   }];

   let arguments = (ins
     AnyType:$value,
     StrAttr:$key,
     StrAttr:$dst_host
   );

   let results = (outs);

   let assemblyFormat = [{$value $key $dst_host attr-dict `:` type($value)}];
 }

 def TfDevice_ReceiveOp : TfDevice_Op<"receive", []> {
   let summary = "Rceive a value from a host.";

   let description = [{
     Receive a value from the given host with the given rendezvous key.
   }];

   let arguments = (ins
     StrAttr:$key,
     StrAttr:$src_host
   );

   let results = (outs
     AnyType:$result
   );

   let assemblyFormat = [{$key $src_host attr-dict `:` type($result)}];
 }

 #endif // TF_DEVICE_DIALECT
	/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	// This is the definition file for the TensorFlow Device Dialect.

	#ifndef TF_DEVICE_DIALECT
	#define TF_DEVICE_DIALECT

	include "mlir/IR/OpBase.td"
	include "mlir/Interfaces/ControlFlowInterfaces.td"
	include "mlir/Interfaces/SideEffectInterfaces.td"

	//===----------------------------------------------------------------------===//
	// TensorFlow Device Dialect definitions
	//===----------------------------------------------------------------------===//

	def TfDevice_Dialect : Dialect {
	let name = "tf_device";

	let description = [{
	The TensorFlow Device dialect.

	This dialect contains operations to describe/launch computations on devices.
	These operations do not map 1-1 to TensorFlow ops and requires a lowering
	pass later to transform them into Compile/Run op pairs, like XlaCompile and
	XlaRun.

	let emitAccessorPrefix = kEmitAccessorPrefix_Raw;
	}];

	let cppNamespace = "::mlir::tf_device";
	}

	//===----------------------------------------------------------------------===//
	// TensorFlow Device Dialect Ops definitions
	//===----------------------------------------------------------------------===//

	// Base class for the operation in this dialect.
	class TfDevice_Op<string mnemonic, list<Trait> traits = []> :
	Op<TfDevice_Dialect, mnemonic, traits> { }

	def TfDevice_LaunchOp : TfDevice_Op<"launch",
	[SingleBlockImplicitTerminator<"ReturnOp">]> {
	let summary = [{
	The `tf_device.launch` op launches containing operations on target device.
	}];

	let description = [{
	This op captures all needed live-in values.
	}];

	let arguments = (ins
	StrAttr:$device
	);

	let results = (outs
	Variadic<AnyType>:$results
	);

	let regions = (region SizedRegion<1>:$body);

	let extraClassDeclaration = [{
	Block &GetBody() { return getOperation()->getRegion(0).front(); }
	StringRef getDevice() { return device(); }
	bool WrapsSingleOp();
	}];

	let builders = [
	OpBuilder<(ins "StringAttr":$device, "TypeRange":$result_types),
	[{
	$_state.addAttribute("device", device);
	$_state.addTypes(result_types);
	$_state.addRegion();
	}]>
	];

	let hasCanonicalizer = 1;
	}

	def TfDevice_ReturnOp : TfDevice_Op<"return", [NoSideEffect, ReturnLike, Terminator]> {
	let summary = [{
	The `tf_device.return` operation terminates and returns values from a
	`tf_device` dialect operation.
	}];

	let arguments = (ins
	Variadic<AnyType>:$results
	);

	let builders = [
	OpBuilder<(ins),
	[{
	build($_builder, $_state, {});
	}]>
	];

	let assemblyFormat = "attr-dict ($results^ `:` type($results))?";
	}

	def TfDevice_LaunchFuncOp : TfDevice_Op<"launch_func", []> {
	let summary = [{
	The `tf_device.launch_func` launches a function on target device.
	}];

	let arguments = (ins
	StrAttr:$device,
	FlatSymbolRefAttr:$func,
	Variadic<AnyType>:$operands);

	let results = (outs
	Variadic<AnyType>:$results
	);

	let extraClassDeclaration = [{
	StringRef getFunc() { return func(); }
	StringRef getDevice() { return device(); }
	}];
	}

	def TfDevice_ParallelExecuteOp : TfDevice_Op<"parallel_execute",
	[SingleBlockImplicitTerminator<"ReturnOp">]> {
	let description = [{
	ParallelExecute op concurrently executes variadic number of regions. Regions
	must represent separate sets of instructions to execute concurrently. In
	order to represent concurrently executed regions with dependencies, multiple
	ParallelExecute ops can be used instead. As so, regions within
	ParallelExecute op must not have control/data dependencies.

	While explicit dependencies between regions are disallowed, ParallelExecute
	op does not prevent implicit communication between regions (e.g.
	communication via send/recvs). In this case, users of ParallelExecute op
	must provide correct control dependencies between regions to guarantee
	correctness. Regions in ParallelExecute may include Resource ops.

	In the case where different regions include ops access the same resource,
	the users of the ParallelExecute op must provide mechanism (via send/recvs
	or via control dependencies) to guarantee correct ordering. Sequential
	ordering of ops within a region is guaranteed. Also, sequential ordering of
	ops before/after ParallelExecute ops are guaranteed. That is, execution of
	regions inside ParallelExecute op is blocked until all inputs to all regions
	are materialized and ops following ParallelExecute op are blocked until all
	regions are executed.
	}];

	let results = (outs
	Variadic<AnyType>:$execute_outputs
	);

	let regions = (region VariadicRegion<SizedRegion<1>>:$regions);

	let extraClassDeclaration = [{
	Block& GetRegionBlockWithIndex(unsigned index);
	Operation::result_range GetRegionOutputs(unsigned region_index);

	// Checks if a tf_device.parallel_execute index'th region block wraps a
	// single operation and the single operation results are perfectly forwarded
	// to the region block's return.
	bool RegionWrapsSingleOp(unsigned index);
	}];

	let builders = [
	OpBuilder<(ins "int":$num_regions, "TypeRange":$output_types)>,
	];

	let hasVerifier = 1;
	}

	def TfDevice_ReplicateOp : TfDevice_Op<"replicate",
	[SingleBlockImplicitTerminator<"ReturnOp">, AttrSizedOperandSegments]> {
	let summary = "Wraps an N-way replicated computation.";

	let description = [{
	The region held by this operation represents a computation that is replicated
	across multiple devices. The number of replications is based on the `n`
	attribute. Explicit devices can be populated in the `devices` attribute, and it
	must be a mapping of device alias to list of explicit or aliased device names
	from the outer scope. The device name map specifies devices on which replicated
	ops inside tf_device.replicate will be executed.

	A tf_device.parallel_execute inside the tf_device.replicate op region may be
	used to represent computations across a larger set of devices. In that case, the
	device alias can be used to specify device assignment and replication of each
	concurrent execution (i.e. region) defined by tf_device.parallel_execute op.
	The size of each value list in the device name map must match `n`. Within a
	replica, the execution semantics follow standard sequential behavior. Ops in the
	tf_device.replicate wrapped with a tf_device.launch will have its device set to
	the associated replicated device from `devices` if the tf_device.launch refers
	to an aliased device name. Otherwise the device already set in tf_device.launch
	is used instead.

	Operands are replicated inputs and packed inputs.

	replicated_inputs: each group of `n` inputs corresponds to an input for a single
	individual replica and is mapped to a single region argument. Inside one group
	the operands are matching in order the `devices` attribute. Each replicated
	input must have compatible shapes and types.
	packed_inputs: each input corresponds to an input broadcasted across all
	replicas and is mapped to a single region argument.

	Operands not replicated can be implicitly captured by ops in the region. Results
	are replicated each from the regions terminator.

	For example:
	```
	%0 = "tf.opA"() : () -> tensor<i32>
	%1 = "tf.opB"() : () -> tensor<i32>
	%2 = "tf.opC"() : () -> tensor<f32>
	%3 = "tf.opD"() : () -> tensor<f32>
	%4 = "tf.opE"() : () -> tensor<!tf_type.resource>
	%5 = "tf.opF"() : () -> tensor<!tf_type.resource>
	%6 = "tf.opG"() : () -> tensor<!tf_type.string>
	%7 = "tf.opH"() : () -> tensor<!tf_type.string>
	%8 = "tf.opI"() : () -> tensor<!tf_type.variant>
	%9 = "tf.opJ"() : () -> tensor<i1>
	%output:8 = tf_device.replicate([%0, %1] as %input_0: tensor<i32>,
	[%2, %3] as %input_1: tensor<f32>,
	[%4, %5] as %input_2: tensor<!tf_type.resource>,
	[%6, %7] as %input_3: tensor<!tf_type.string>,
	%8 as %input_4: tensor<!tf_type.variant>)
	{n = 2 : i32,
	devices = {DEVICE_ALIAS_0 = ["/DEVICE:0", "/DEVICE:1"],
	DEVICE_ALIAS_1 = ["/DEVICE:2", "/DEVICE:3"]}} {
	// Inside the region, %0, %2, %4, and %6 corresponds to
	// "/DEVICE:0"/"/DEVICE:2" and %1, %3, %5, and %7 corresponds to
	// "/DEVICE:1"/"/DEVICE:3", depending on which device alias is used.
	%k = "tf_device.launch"() ( {
	%9 = "tf.opK"(%input_0, %input_4, %9) :
	(tensor<i32>, tensor<!tf_type.variant>, tensor<i1>) -> tensor<i32>
	tf_device.return %9 : tensor<i32>
	}) {device = "DEVICE_ALIAS_0"} : () -> tensor<i32>
	%l = "tf_device.launch"() ( {
	%10 = "tf.opL"(%input_1, %input_4, %9) :
	(tensor<f32>, tensor<!tf_type.variant>, tensor<i1>) -> tensor<f32>
	tf_device.return %10 : tensor<f32>
	}) {device = "DEVICE_ALIAS_1"} : () -> tensor<f32>
	%m = "tf_device.launch"() ( {
	%11 = "tf.opM"(%input_2, %input_4, %9) :
	(tensor<!tf_type.resource>, tensor<!tf_type.variant>, tensor<i1>)
	-> tensor<!tf_type.resource>
	tf_device.return %11 : tensor<!tf_type.resource>
	}) {device = "/DEVICE:4"} : () -> tensor<f32>
	%n = "tf.opN"(%input_3, %input_4, %9) :
	(tensor<!tf_type.string>, tensor<!tf_type.variant>, tensor<i1>)
	-> tensor<!tf_type.string>
	tf_device.return %k, %l, %m, %n :
	tensor<i32>, tensor<f32>, tensor<!tf_type.resource>, tensor<!tf_type.string>
	}
	// %output#0 corresponds to %k returned from "/DEVICE:0"
	// %output#1 corresponds to %k returned from "/DEVICE:1"
	// %output#2 corresponds to %l returned from "/DEVICE:2"
	// %output#3 corresponds to %l returned from "/DEVICE:3"
	// %output#4, %output#5 corresponds to %m and will be returned from "/DEVICE:4"
	// %output#6, %output#7 corresponds to %n and will have no device set
	```
	}];

	let arguments = (ins
	Variadic<AnyType>:$replicated_inputs,
	Variadic<AnyType>:$packed_inputs,

	I32ElementsAttr:$operand_segment_sizes,
	Confined<I32Attr, [IntMinValue<2>]>:$n,
	OptionalAttr<DictionaryAttr>:$devices
	);

	let results = (outs
	Variadic<AnyType>:$replicated_outputs
	);

	let regions = (region SizedRegion<1>:$body);

	let extraClassDeclaration = [{
	Block &GetBody() { return getOperation()->getRegion(0).front(); }
	unsigned GetNumReplicatedBlockArguments();
	unsigned GetNumPackedBlockArguments();
	llvm::ArrayRef<BlockArgument> GetPackedBlockArguments();
	llvm::ArrayRef<BlockArgument> GetReplicatedBlockArguments();
	bool IsReplicatedBlockArgument(BlockArgument block_arg);
	bool IsPackedBlockArgument(BlockArgument block_arg);
	unsigned GetReplicaOperandIndexForBlockArgument(BlockArgument block_arg, unsigned replica);
	Value GetReplicaOperandForBlockArgument(BlockArgument block_arg, unsigned replica);
	MutableArrayRef<OpOperand> GetOperandsForBlockArgument(BlockArgument block_arg);
	bool WrapsSingleOp();
	}];

	let builders = [
	OpBuilder<(ins "int":$n,
	"const llvm::SmallDenseMap<StringRef, llvm::SmallVector<StringRef, 4>>&":$devices,
	"llvm::ArrayRef<std::pair<ValueRange, Type>>":$replicated_inputs,
	"ValueRange":$packed_inputs, "TypeRange":$replica_output_types)>,
	OpBuilder<(ins "int":$n, "llvm::Optional<DictionaryAttr>":$devices,
	"llvm::ArrayRef<std::pair<ValueRange, Type>>":$replicated_inputs,
	"ValueRange":$packed_inputs, "TypeRange":$replica_output_types)>,
	];

	let hasCustomAssemblyFormat = 1;
	let hasVerifier = 1;
	}

	def TfDevice_ClusterOp : TfDevice_Op<"cluster",
	[SingleBlockImplicitTerminator<"ReturnOp">]> {
	let summary = [{
	The `tf_device.cluster` op wraps containing operations in a region.
	}];

	let description = [{
	This op can be used to group operations, and captures all needed live-in values.

	Optional policy attribute allows to tag clusters with a policy name that was
	used to form the cluster.
	}];

	let arguments = (ins OptionalAttr<StrAttr>:$policy);

	let results = (outs
	Variadic<AnyType>:$results
	);

	let regions = (region SizedRegion<1>:$body);

	let builders = [
	OpBuilder<(ins "TypeRange":$resultTypes),
	[{
	build($_builder, $_state, resultTypes, mlir::StringAttr {});
	}]>
	];

	let extraClassDeclaration = [{
	Block &GetBody() { return getOperation()->getRegion(0).front(); }
	}];

	let hasCanonicalizer = 1;
	}

	def TfDevice_ClusterFuncOp : TfDevice_Op<"cluster_func", []> {
	let summary = [{
	The `tf_device.cluster_func` launches a function containing the body of a
	cluster.
	}];

	let description = [{
	This op is used for outlining a cluster.
	}];

	let arguments = (ins
	FlatSymbolRefAttr:$func,
	Variadic<AnyType>:$operands
	);

	let results = (outs
	Variadic<AnyType>:$results
	);

	let extraClassDeclaration = [{
	// returns the function that this operation will launch.
	func::FuncOp getFunc() {
	return SymbolTable::lookupNearestSymbolFrom<func::FuncOp>(*this, funcAttr());
	}
	}];
	}

	def TfDevice_RemoteRunOp : TfDevice_Op<"remote_run",
	[SingleBlockImplicitTerminator<"ReturnOp">]> {
	let summary = [{
	The `tf_device.remote_run` op launches the containing operations on a specific
	host.
	}];

	let description = [{
	This op captures all needed live-in values.
	}];

	let arguments = (ins
	StrAttr:$host,
	FlatSymbolRefAttr:$callee,
	Variadic<AnyType>:$callee_args
	);

	let results = (outs
	Variadic<AnyType>:$results
	);

	let assemblyFormat = [{
	$host $callee `(` $callee_args `)` attr-dict `:` functional-type ( $callee_args , $results )
	}];
	}

	def TfDevice_SendOp : TfDevice_Op<"send", []> {
	let summary = "Send a value to a host.";

	let description = [{
	Send the value to the given host with the given rendezvous key.
	}];

	let arguments = (ins
	AnyType:$value,
	StrAttr:$key,
	StrAttr:$dst_host
	);

	let results = (outs);

	let assemblyFormat = [{$value $key $dst_host attr-dict `:` type($value)}];
	}

	def TfDevice_ReceiveOp : TfDevice_Op<"receive", []> {
	let summary = "Rceive a value from a host.";

	let description = [{
	Receive a value from the given host with the given rendezvous key.
	}];

	let arguments = (ins
	StrAttr:$key,
	StrAttr:$src_host
	);

	let results = (outs
	AnyType:$result
	);

	let assemblyFormat = [{$key $src_host attr-dict `:` type($result)}];
	}

	#endif // TF_DEVICE_DIALECT