| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |
| #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |
| |
| #include <memory> |
| |
| #include "mlir/IR/MLIRContext.h" // from @llvm-project |
| #include "mlir/IR/PatternMatch.h" // from @llvm-project |
| #include "mlir/Pass/Pass.h" // from @llvm-project |
| |
| namespace mlir { |
| |
| // Creates a pass that breaks up an island with multiple ops into multiple |
| // islands, each with a single op. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateBreakUpIslandsPass(); |
| |
| // Creates a pass that converts mlir functions consisting of mlir ops into a |
| // tf_executor dialect as a single island. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateFunctionalToExecutorDialectConversionPass(); |
| |
| // Creates a pass that lifts inner ops of tf_executor.island ops in |
| // tf_executor.graph into the same block as the tf_executor.graph. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateExecutorDialectToFunctionalConversionPass(); |
| |
| namespace TF { |
| // Creates a pass that drops `shape_invariant` attribute from While/WhileRegion |
| // ops. |
| std::unique_ptr<OperationPass<FuncOp>> CreateDropWhileShapeInvariantPass(); |
| |
| // Transforms functional control flow operations in the TensorFlow dialect to |
| // MLIR Control Flow Graph (CFG) form. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFFunctionalControlFlowToCFG(); |
| |
| // Transforms functional control flow operations in the TensorFlow dialect to |
| // their region based counterparts. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFFunctionalControlFlowToRegions(); |
| |
| // Transforms region bases control flow operations in the TensorFlow dialect to |
| // their functional counterparts. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFRegionControlFlowToFunctional(); |
| |
| // Materialize the MlirPassthroughOp by replacing it with the MLIR module |
| // attached as an attribute. |
| std::unique_ptr<OperationPass<FuncOp>> CreateMaterializePassthroughOpPass(); |
| |
| // Performs Shape Inference on the TensorFlow dialect using the global registry. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTFShapeInferencePass(); |
| |
| // Guarantee that all FuncOp's have a single use. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateGuaranteeAllFuncsOneUsePass(); |
| |
| // Optional pass which will unroll BatchMatMul and use only MatMul |
| std::unique_ptr<OperationPass<FuncOp>> CreateUnrollBatchMatMulPassPass(); |
| |
| // Optional pass which will map TF BatchMatMul to TF Einsum |
| std::unique_ptr<OperationPass<FuncOp>> CreateBatchMatMulToEinsumPass(); |
| |
| // Optimizes Tensorflow graph. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFOptimizePass(); |
| |
| // Creates pass to rewrite RecvTPUEmbeddingActivationsOp and |
| // SendTPUEmbeddingGradients ops to internal variants. |
| std::unique_ptr<OperationPass<FuncOp>> CreateRewriteTPUEmbeddingOpsPass(); |
| |
| // Performs specific fusion for GPU targets. |
| std::unique_ptr<OperationPass<FuncOp>> CreateGpuOpFusionPass(); |
| |
| // Create a pass that convert ops that copy tensors between devices, e.g. |
| // tf.Identity. |
| std::unique_ptr<OperationPass<mlir::FuncOp>> |
| CreateTensorDeviceCopyConversionPass(); |
| |
| // Returns a pass that folds tf.BroadcastTo nodes with subsequent nodes if they |
| // have built in broadcasting support. |
| std::unique_ptr<OperationPass<FuncOp>> CreateBroadcastFoldPass(); |
| |
| struct LayoutOptimizationPipelineOptions |
| : public PassPipelineOptions<LayoutOptimizationPipelineOptions> { |
| Option<std::string> force_data_format{ |
| *this, "force-data-format", |
| llvm::cl::desc("Force data format for all layout sensitive ops")}; |
| Option<bool> skip_fold_transpose_in_ops{ |
| *this, "skip-fold-transpose-in-ops", |
| llvm::cl::desc("Skip folding transpose operands in Ops which can support " |
| "different layouts.")}; |
| }; |
| |
| // Layout optimization assigns optimal data layout for layout sensitive |
| // operations, and cancels all redundant transposes. |
| void CreateLayoutOptimizationPipeline( |
| OpPassManager& pm, // NOLINT - MLIR contract is pass by mutable reference. |
| const LayoutOptimizationPipelineOptions& options); |
| |
| struct StandardPipelineOptions |
| : public PassPipelineOptions<StandardPipelineOptions> { |
| Option<bool> enable_inliner{*this, "enable-inliner", |
| llvm::cl::desc("Enable inliner."), |
| llvm::cl::init(false)}; |
| Option<bool> form_clusters{*this, "form-clusters", |
| llvm::cl::desc("Enable Cluster Formation pass."), |
| llvm::cl::init(false)}; |
| }; |
| |
| // Propagates the pass manager with the passes involved in transforming or |
| // optimizing an MLIR graph without any target specialization. |
| // NOLINTNEXTLINE - MLIR contract is pass by mutable reference. |
| void CreateTFStandardPipeline(OpPassManager& pm, |
| const StandardPipelineOptions& options); |
| |
| // Propagates device attributes of resources from callers to callees. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass(); |
| |
| // Creates a pass that promotes resource reads/writes in the main function to |
| // inputs and outputs of the main function, assuming that resource operations |
| // have already been decomposed and function calls have already been inlined. |
| // The pass also annotates the input arguments for resources with the indices |
| // of their aliasing output arguments. |
| std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass(); |
| |
| // Creates a pass that promotes tf.VarHandleOp to resource arguments for all |
| // functions. |
| std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass(); |
| |
| // Creates a pass that converts readonly reference variables to the |
| // corresponding resource variables. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateConvertReadonlyReferenceVariablesToResourceVariablesPass(); |
| |
| // Creates a simple device assignment pass on TF dialect for CoreRT use case. |
| std::unique_ptr<OperationPass<FuncOp>> CreateSimpleTFDeviceAssignmentPass( |
| llvm::StringRef default_device); |
| |
| // Performs resource lifting on the function body to hoist resource variable |
| // accesses outside all control flow statements. |
| LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function); |
| |
| // Converts stack ops into operations on local variables, which can later be |
| // removed by resource lifting. Requires known maximum sizes of stacks and |
| // known element shapes of push ops. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateStackOpsDecompositionPass(); |
| |
| // Converts tensor list operations into operations on buffers and sizes. Needs |
| // static shapes and known max element count. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTensorListOpsDecompositionPass(); |
| |
| // Converts tensor array ops into operations on local variables, which can later |
| // be removed by resource lifting. Requires known sizes and known element shapes |
| // (either defined in TensorArrayV3 or implied in the first write). |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTensorArrayOpsDecompositionPass(); |
| |
| // Create a pass that legalize HLO to TF dialect. |
| std::unique_ptr<OperationPass<FuncOp>> CreateLegalizeHloToTfPass(); |
| |
| // Addds the HLO to TF rewrite patterns to the specified pattern list. |
| void PopulateLegalizeHloToTfPatterns(OwningRewritePatternList* patterns, |
| MLIRContext* context); |
| |
| // Matches sequence of ops to TensorFlow fused kernels. This pass should not be |
| // generally used beyond exporting to runtimes that supports these ops. In the |
| // future these fusions may be codegen'd automatically. |
| std::unique_ptr<OperationPass<FuncOp>> CreateFusedKernelMatcherPass(); |
| |
| // Fuses operations defining `ContractionFusableInterface` interface into the |
| // contraction operations (MatMul, Conv2D, etc...). This is a more general |
| // version of `CreateFusedKernelMatcherPass` that relies on codegen to compose |
| // contraction fusions together. |
| std::unique_ptr<OperationPass<FuncOp>> CreateContractionFusionPass(); |
| |
| // Creates function pass to select device index/fold tf.DeviceIndex. |
| std::unique_ptr<OperationPass<FuncOp>> CreateDeviceIndexSelectorPass(); |
| |
| // Creates function pass to replace InitializeTableFromTextFileV2Ops with |
| // LookupTableImportV2Op ops. |
| std::unique_ptr<OperationPass<FuncOp>> CreateInitTextFileToImportPass(); |
| |
| // Creates function pass to cluster TensorFlow ops by host. The program |
| // generated by this pass will have one function per host where all operations |
| // in the same function are placed on the same host. Each result of the per-host |
| // function will have a "tf.device" attribute which specifies the device |
| // assignment of the result. |
| std::unique_ptr<FunctionPass> CreateClusterTFOpsByHostPass(); |
| |
| // Creates a pass to insert tf_device.send and tf_device.receive ops to make |
| // sure any argument of any op is on the same host of the op itself. |
| std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateCrossHostTransferPass(); |
| |
| // Creates a pass that adds the device attribute to every tf.Const op based on |
| // the device attribute of the operations that read its result. If the result of |
| // a tf.Const op is read by operations placed on multiple devices, then the pass |
| // will replicate the tf.Const op once for each device. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateConstantOpDeviceAssignmentPass(); |
| |
| } // namespace TF |
| |
| namespace tf_executor { |
| // Returns a pass that folds switch nodes with constant predicates. |
| std::unique_ptr<OperationPass<FuncOp>> CreateSwitchFoldPass(); |
| |
| // Creates a pass to merge IslandOps from TFExecutor dialect. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorIslandCoarseningPass(); |
| |
| // Creates a pass to merge IslandOps for operation marked for execution on TPU. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandCoarseningPass(); |
| |
| // Creates a pass to outlining TPU clusters from single IslandOp into a nested |
| // module suitable for being processed as-if it was a V2 module. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandOutliningPass(); |
| |
| // Creates a pass to inline calls to the nested TPU module, this reverses the |
| // effect of the `TFExecutorTPUV1IslandOutlining` pass above. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandInliningPass(); |
| |
| // Creates a pass to prune tf_executor.graph from dead nodes. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorGraphPruningPass(); |
| } // namespace tf_executor |
| |
| namespace TFDevice { |
| // Creates a pass that forms clusters from instructions that are assigned to |
| // same device. |
| std::unique_ptr<OperationPass<FuncOp>> CreateClusterFormationPass(); |
| |
| // Sinks `tf.Const` operations in the ClusterOp region using them. This is |
| // performed in order to limit the number of values implicitly captured in this |
| // region before outlining. |
| std::unique_ptr<OperationPass<FuncOp>> CreateClusterConstantSinkingPass(); |
| |
| // Creates a pass that outlines regions of tf_device.launch operations. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateClusterOutliningPass(); |
| |
| // Creates a pass that clusters ops into tf_device::ClusterOp regions |
| // according to a policy specified by the pass options. |
| std::unique_ptr<FunctionPass> CreateClusterOpsByPolicyPass(); |
| |
| // A pass that decomposes composite resource operations into primitive ones like |
| // ReadVariableOp, AssignVariableOp and other computations to facilitate |
| // transformations like resource op lifting. |
| std::unique_ptr<OperationPass<FuncOp>> CreateDecomposeResourceOpsPass(); |
| |
| // Creates a pass that lifts operations on external resource variables from |
| // device computation nested in `tf_device::LaunchOp` out so that resource |
| // variable load operations are all before device computation while resource |
| // variable store operations are all after device computation. After this pass, |
| // device computation no longer interacts with external resource variables. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateResourceOpLiftingPass(); |
| |
| // Lifts resource operations from tf_device.launch_func ops nested in `op` |
| // outside. Returns a failure if there are remaining resource-type values that |
| // can not be lifted. |
| LogicalResult LiftResourceOps(Operation* op); |
| |
| // Creates a pass that hoists invariant operations in a `tf_device.replicate`. |
| std::unique_ptr<OperationPass<FuncOp>> CreateReplicateInvariantOpHoistingPass(); |
| |
| // Creates a pass that forms replica `tf_executor.island` from a single |
| // `tf_device.replicate` island. |
| std::unique_ptr<OperationPass<FuncOp>> CreateReplicateToIslandPass(); |
| |
| // Creates a pass that creates `tf_executor.island` from a single |
| // `tf_device.parallel_execute` island. |
| std::unique_ptr<OperationPass<FuncOp>> CreateParallelExecuteToIslandsPass(); |
| |
| // Creates a pass that annotates whether a LaunchFuncOp's parameters have the |
| // same data across replicas. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateAnnotateParameterReplicationPass(); |
| |
| // Creates a pass that marks unsupported ops in device cluster for outside |
| // compilation. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateMarkOpsForOutsideCompilationPass(); |
| |
| // Creates a pass that merges control flow with similar predicates. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateMergeControlFlowPass(); |
| |
| // Creates a pass that hoists a `tf_device.launch` body and assigns a `device` |
| // attribute to each TensorFlow dialect op in the body based on the `device` |
| // attribute on the `tf_device.launch`. |
| std::unique_ptr<OperationPass<FuncOp>> CreateLaunchToDeviceAttributePass(); |
| |
| // Creates a pass that hoists a `tf_device.replicate` body and replicates each |
| // TensorFlow dialect op in the body based on its `device` attribute and the |
| // `devices` attribute on the `tf_device.replicate`. |
| std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateTFDeviceReplicationPass(); |
| } // namespace TFDevice |
| |
| namespace TFTPU { |
| // Creates a pass that forms clusters from operations of the same |
| // `_tpu_replicate` attribute. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUClusterFormationPass(); |
| |
| // Creates a pass that cleans up `_tpu_replicate` attribute on operations |
| // that are inside a cluster. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTPUClusterCleanupAttributesPass(); |
| |
| // Creates a pass that removes Identity/IdentityN ops from a cluster. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUIdentityPruningPass(); |
| |
| // Creates a pass that allows TPU program inputs to have layouts determined at |
| // run time. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicLayoutPass(); |
| |
| // Creates a pass that remaps and assigns padding map from a |
| // `tf_device.launch_func` `padding_map` attribute to its encapsulated function. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicPaddingMapperPass(); |
| |
| // Creates a pass that adds `tf.ReadVariableOp` to a TPU cluster for resources |
| // the cluster only writes to. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUResourceReadForWritePass(); |
| |
| // Creates a pass that partitions unpartitioned resource read/write to |
| // partitioned resource variables. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateTPUResourceReadsWritesPartitioningPass(); |
| |
| // Creates a pass that rewrites `tf_device.launch_func` on TPUs into TPU runtime |
| // ops. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPURewritePass(); |
| |
| // Creates a pass that identifies XLASharding ops in launch op for TPU |
| // computation. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUShardingIdentificationPass(); |
| |
| // Creates a pass that moves `tf.AssignVariableOp` into a |
| // `tf_device.parallel_execute` region if the `tf.AssignVariableOp` is the |
| // only consumer of a `tf_device.parallel_execute` result. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateTPUParallelExecuteSinkResourceWritePass(); |
| |
| // Creates a pass that merges device variable reads/updates into the surrounded |
| // TPUExecute node. This allows the execute node to perform in-place variable |
| // updates. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUMergeVariablesWithExecutePass(); |
| |
| // Creates a pass that wraps ReadVariableOp/AssignVariable op that consumes a |
| // packed tensor to have same device placement as underlying TPU device. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUColocateCompositeResourceOps(); |
| |
| // Creates a pass that adds ops which perform formatting on variables at |
| // run-time according to compilation result. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass(); |
| |
| // Creates a pass that wraps ops with the same `_xla_outside_compilation` |
| // attribute value in a tf_device.launch op with host device assignment. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateOutsideCompiledToHostLaunchPass(); |
| |
| // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster) |
| // at head/tail of TPU cluster to run before/after TPU computation. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTPUExtractHeadTailOutsideCompilationPass(); |
| |
| // Creates a pass that expands outside compilation cluster at the head/tail of |
| // TPU computation by adding outside compilation attribute to identity/cast ops |
| // that are only used for host computation. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUHostComputationExpansionPass(); |
| |
| // Creates a pass that updates inputs to TPU embedding layer enqueue ops so that |
| // correct ops are invoked during training and evaluation. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateTPUUpdateEmbeddingEnqueueOpInputsPass(); |
| |
| // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) |
| // ops to a separate parallel_execute region to run on CPU. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTPUExtractOutsideCompilationPass(); |
| |
| // Creates a pass that propagates TPU devices to users. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUDevicePropagationPass(); |
| |
| // Populates the supplied passmanager with the passes required to run the |
| // bridge. |
| void CreateTPUBridgePipeline(OpPassManager& pm); |
| |
| // Populates the supplied passmanager with the passes required to run the |
| // bridge in V1 mode. |
| void CreateTPUBridgePipelineV1(OpPassManager& pm); |
| |
| // Creates a pass that replicates the tf._TPUCompileMlir op on each host that |
| // needs the compiled program. It helps avoid transferring the compiled binary |
| // between hosts. |
| std::unique_ptr<OperationPass<mlir::ModuleOp>> |
| CreateTPUCompileOpReplicationPass(); |
| |
| } // namespace TFTPU |
| |
| #define GEN_PASS_REGISTRATION |
| #include "tensorflow/compiler/mlir/tensorflow/transforms/tf_passes.h.inc" |
| |
| } // namespace mlir |
| |
| #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |