| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |
| #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |
| |
| #include <memory> |
| |
| #include "mlir/Pass/Pass.h" // from @llvm-project |
| |
| namespace mlir { |
| |
| // Creates a pass that breaks up an island with multiple ops into multiple |
| // islands, each with a single op. |
| std::unique_ptr<OperationPass<FuncOp>> CreateBreakUpIslandsPass(); |
| |
| // Creates a pass that converts mlir functions consisting of mlir ops into a |
| // tf_executor dialect as a single island. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateFunctionalToExecutorDialectConversionPass(); |
| |
| namespace TF { |
| // Transforms functional control flow operations in the TensorFlow dialect to |
| // MLIR Control Flow Graph (CFG) form. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFFunctionalControlFlowToCFG(); |
| |
| // Transforms functional control flow operations in the TensorFlow dialect to |
| // their region based counterparts. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFFunctionalControlFlowToRegions(); |
| |
| // Transforms region bases control flow operations in the TensorFlow dialect to |
| // their functional counterparts. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFRegionControlFlowToFunctional(); |
| |
| // Materialize the MlirPassthroughOp by replacing it with the MLIR module |
| // attached as an attribute. |
| std::unique_ptr<OperationPass<FuncOp>> CreateMaterializePassthroughOpPass(); |
| |
| // Performs Shape Inference on the TensorFlow dialect using the global registry. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTFShapeInferencePass(); |
| |
| // Optional pass which will unroll BatchMatMul and use only MatMul |
| std::unique_ptr<OperationPass<FuncOp>> CreateUnrollBatchMatMulPassPass(); |
| |
| // Optional pass which will map TF BatchMatMul to TF Einsum |
| std::unique_ptr<OperationPass<FuncOp>> CreateBatchMatMulToEinsumPass(); |
| |
| // Optimizes Tensorflow graph. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFOptimizePass(); |
| |
| // Creates pass to rewrite RecvTPUEmbeddingActivationsOp and |
| // SendTPUEmbeddingGradients ops to internal variants. |
| std::unique_ptr<OperationPass<FuncOp>> CreateRewriteTPUEmbeddingOpsPass(); |
| |
| // Performs specific fusion for GPU targets. |
| std::unique_ptr<OperationPass<FuncOp>> CreateGpuOpFusionPass(); |
| |
| struct LayoutOptimizationPipelineOptions |
| : public PassPipelineOptions<LayoutOptimizationPipelineOptions> { |
| Option<std::string> force_data_format{ |
| *this, "force-data-format", |
| llvm::cl::desc("Force data format for all layout sensitive ops")}; |
| }; |
| |
| // Layout optimization assigns optimal data layout for layout sensitive |
| // operations, and cancels all redundant transposes. |
| void CreateLayoutOptimizationPipeline( |
| OpPassManager& pm, // NOLINT - MLIR contract is pass by mutable reference. |
| const LayoutOptimizationPipelineOptions& options); |
| |
| struct StandardPipelineOptions |
| : public PassPipelineOptions<StandardPipelineOptions> { |
| Option<bool> enable_inliner{*this, "enable-inliner", |
| llvm::cl::desc("Enable inliner."), |
| llvm::cl::init(false)}; |
| Option<bool> form_clusters{*this, "form-clusters", |
| llvm::cl::desc("Enable Cluster Formation pass."), |
| llvm::cl::init(false)}; |
| }; |
| |
| // Propagates the pass manager with the passes involved in transforming or |
| // optimizing an MLIR graph without any target specialization. |
| // NOLINTNEXTLINE - MLIR contract is pass by mutable reference. |
| void CreateTFStandardPipeline(OpPassManager& pm, |
| const StandardPipelineOptions& options); |
| |
| // Propagates device attributes of resources from callers to callees. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass(); |
| |
| // Creates a pass that promotes resource reads/writes in the main function to |
| // inputs and outputs of the main function, assuming that resource operations |
| // have already been decomposed and function calls have already been inlined. |
| // The pass also annotates the input arguments for resources with the indices |
| // of their aliasing output arguments. |
| std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass(); |
| |
| // Creates a pass that promotes tf.VarHandleOp to resource arguments for all |
| // functions. |
| std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass(); |
| |
| // Creates a pass that converts readonly reference variables to the |
| // corresponding resource variables. |
| std::unique_ptr<OperationPass<FuncOp>> |
| CreateConvertReadonlyReferenceVariablesToResourceVariablesPass(); |
| |
| // Creates a simple device assignment pass on TF dialect for CoreRT use case. |
| std::unique_ptr<OperationPass<FuncOp>> CreateSimpleTFDeviceAssignmentPass( |
| llvm::StringRef default_device); |
| |
| // Performs resource lifting on the function body to hoist resource variable |
| // accesses outside all control flow statements. |
| LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function); |
| |
| // Converts stack ops into operations on local variables, which can later be |
| // removed by resource lifting. Requires known maximum sizes of stacks and |
| // known element shapes of push ops. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateStackOpsDecompositionPass(); |
| |
| // Converts tensor list operations into operations on buffers and sizes. Needs |
| // static shapes and known max element count. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTensorListOpsDecompositionPass(); |
| |
| // Converts tensor array ops into operations on local variables, which can later |
| // be removed by resource lifting. Requires known sizes and known element shapes |
| // (either defined in TensorArrayV3 or implied in the first write). |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTensorArrayOpsDecompositionPass(); |
| |
| // Create a pass that legalize HLO to TF dialect. |
| std::unique_ptr<OperationPass<FuncOp>> CreateLegalizeHloToTfPass(); |
| |
| // Matches sequence of ops to TensorFlow fused kernels. This pass should not be |
| // generally used beyond exporting to runtimes that supports these ops. In the |
| // future these fusions may be codegen'd automatically. |
| std::unique_ptr<OperationPass<FuncOp>> CreateFusedKernelMatcherPass(); |
| |
| // Creates function pass to select device index/fold tf.DeviceIndex. |
| std::unique_ptr<OperationPass<FuncOp>> CreateDeviceIndexSelectorPass(); |
| } // namespace TF |
| |
| namespace tf_executor { |
| class GraphOp; |
| |
| // Returns a pass that folds switch nodes with constant predicates. |
| std::unique_ptr<OperationPass<FuncOp>> CreateSwitchFoldPass(); |
| |
| // Creates a pass to merge IslandOps from TFExecutor dialect. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorIslandCoarseningPass(); |
| |
| // Creates a pass to merge IslandOps for operation marked for execution on TPU. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandCoarseningPass(); |
| |
| // Creates a pass to outlining TPU clusters from single IslandOp into a nested |
| // module suitable for being processed as-if it was a V2 module. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandOutliningPass(); |
| |
| // Creates a pass to inline calls to the nested TPU module, this reverses the |
| // effect of the `TFExecutorTPUV1IslandOutlining` pass above. |
| // This is a V1 backward compatibility. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTFExecutorTPUV1IslandInliningPass(); |
| |
| // Creates a pass to prune tf_executor.graph from dead nodes. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorGraphPruningPass(); |
| |
| // Prunes unreachable operations of a tf_executor.graph operation. |
| void PruneGraph(GraphOp graph); |
| |
| // Sink `tf.Const` operations in the LaunchOp region using them. This is |
| // performed in order to limit the number of values implicitly captured in this |
| // region before outlining. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTFExecutorConstantSinkingPass(); |
| |
| } // namespace tf_executor |
| |
| namespace TFDevice { |
| // Creates a pass that forms clusters from instructions that are assigned to |
| // same device. |
| std::unique_ptr<OperationPass<FuncOp>> CreateClusterFormationPass(); |
| |
| // Creates a pass that outlines regions of tf_device.launch operations. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateClusterOutliningPass(); |
| |
| // A pass that decomposes composite resource operations into primitive ones like |
| // ReadVariableOp, AssignVariableOp and other computations to facilitate |
| // transformations like resource op lifting. |
| std::unique_ptr<OperationPass<FuncOp>> CreateDecomposeResourceOpsPass(); |
| |
| // Creates a pass that lifts operations on external resource variables from |
| // device computation nested in `tf_device::LaunchOp` out so that resource |
| // variable load operations are all before device computation while resource |
| // variable store operations are all after device computation. After this pass, |
| // device computation no longer interacts with external resource variables. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateResourceOpLiftingPass(); |
| |
| // Lifts resource operations from tf_device.launch_func ops nested in `op` |
| // outside. Returns a failure if there are remaining resource-type values that |
| // can not be lifted. |
| LogicalResult LiftResourceOps(Operation* op); |
| |
| // Creates a pass that hoists invariant operations in a `tf_device.replicate`. |
| std::unique_ptr<OperationPass<FuncOp>> CreateReplicateInvariantOpHoistingPass(); |
| |
| // Creates a pass that forms replica `tf_executor.island` from a single |
| // `tf_device.replicate` island. |
| std::unique_ptr<OperationPass<FuncOp>> CreateReplicateToIslandPass(); |
| |
| // Creates a pass that creates `tf_executor.island` from a single |
| // `tf_device.parallel_execute` island. |
| std::unique_ptr<OperationPass<FuncOp>> CreateParallelExecuteToIslandsPass(); |
| |
| // Creates a pass that annotates whether a LaunchFuncOp's parameters have the |
| // same data across replicas. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateAnnotateParameterReplicationPass(); |
| |
| // Creates a pass that hoists a `tf_device.launch` body and assigns a `device` |
| // attribute to each TensorFlow dialect op in the body based on the `device` |
| // attribute on the `tf_device.launch`. |
| std::unique_ptr<OperationPass<FuncOp>> CreateLaunchToDeviceAttributePass(); |
| } // namespace TFDevice |
| |
| namespace TFTPU { |
| // Creates a pass that forms clusters from operations of the same |
| // `_tpu_replicate` attribute. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUClusterFormationPass(); |
| |
| // Creates a pass that allows TPU program inputs to have layouts determined at |
| // run time. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUDynamicLayoutPass(); |
| |
| // Creates a pass that remaps and assigns padding map from a |
| // `tf_device.launch_func` `padding_map` attribute to its encapsulated function. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUDynamicPaddingMapperPass(); |
| |
| // Creates a pass that rewrites `tf_device.launch_func` on TPUs into TPU runtime |
| // ops. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPURewritePass(); |
| |
| // Creates a pass that identifies XLASharding ops in launch op for TPU |
| // computation. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUShardingIdentificationPass(); |
| |
| // Creates a pass that merges device variable reads/updates into the surrounded |
| // TPUExecute node. This allows the execute node to perform in-place variable |
| // updates. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUMergeVariablesWithExecutePass(); |
| |
| // Creates a pass that adds ops which perform formatting on variables at |
| // run-time according to compilation result. |
| std::unique_ptr<OperationPass<ModuleOp>> CreateTPUVariableReformattingPass(); |
| |
| // Creates a pass that groups outside compiled operations (CPU ops inside TPU |
| // cluster) into clusters that can be extracted and run on the CPU. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUOutsideCompilationClusterPass(); |
| |
| // Creates a pass that extracts outside compilation (CPU ops inside TPU cluster) |
| // at head/tail of TPU cluster to run before/after TPU computation. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTPUExtractHeadTailOutsideCompilationPass(); |
| |
| // Creates a pass that expands outside compilation cluster at the head/tail of |
| // TPU computation by adding outside compilation attribute to identity/cast ops |
| // that are only used for host computation. |
| std::unique_ptr<OperationPass<FuncOp>> CreateTPUHostComputationExpansionPass(); |
| |
| // Creates a pass that extract outside compilation (CPU ops inside TPU cluster) |
| // ops to a separate parallel_execute region to run on CPU. |
| std::unique_ptr<OperationPass<ModuleOp>> |
| CreateTPUExtractOutsideCompilationPass(); |
| |
| // Populates the supplied passmanager with the passes required to run the |
| void CreateTPUBridgePipeline(OpPassManager& pm); |
| |
| // Populates the supplied passmanager with the passes required to run the |
| // bridge in V1 mode. |
| void CreateTPUBridgePipelineV1(OpPassManager& pm); |
| |
| } // namespace TFTPU |
| |
| } // namespace mlir |
| |
| #endif // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_TRANSFORMS_PASSES_H_ |