blob: e79a1bc919cda9ddaab824322ad83c5613811e01 [file] [log] [blame]
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_COMPILER_TF2TENSORRT_CONVERT_CONVERT_NODES_H_
#define TENSORFLOW_COMPILER_TF2TENSORRT_CONVERT_CONVERT_NODES_H_
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "absl/types/optional.h"
#include "tensorflow/compiler/tf2tensorrt/convert/op_converter.h"
#include "tensorflow/compiler/tf2tensorrt/convert/utils.h"
#include "tensorflow/compiler/tf2tensorrt/convert/weights.h"
#include "tensorflow/compiler/tf2tensorrt/utils/trt_allocator.h"
#include "tensorflow/compiler/tf2tensorrt/utils/trt_int8_calibrator.h"
#include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h"
#include "tensorflow/compiler/tf2tensorrt/utils/trt_shape_optimization_profiles.h"
#include "tensorflow/compiler/tf2tensorrt/utils/trt_tensor_proxy.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/graph/graph.h"
#include "tensorflow/core/grappler/costs/graph_properties.h"
#include "tensorflow/core/lib/core/status.h"
#include "tensorflow/stream_executor/lib/statusor.h"
#if GOOGLE_CUDA && GOOGLE_TENSORRT
#include "third_party/tensorrt/NvInfer.h"
namespace tensorflow {
namespace tensorrt {
namespace convert {
using ::stream_executor::port::StatusOr;
#define TFTRT_INTERNAL_ERROR_AT_NODE(node) \
do { \
return errors::Internal("TFTRT::", __FUNCTION__, ":", __LINE__, \
" failed to add TRT layer, at: ", node); \
} while (0)
#define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \
do { \
if (ptr == nullptr) { \
TFTRT_INTERNAL_ERROR_AT_NODE(node); \
} \
} while (0)
struct EngineConnection {
// Constructs a non-control edge.
EngineConnection(const string& outside, int out_id, int out_port,
const string& inside, int in_id, int in_port,
bool input_edge, int port)
: outside_node_name(outside),
outside_id(out_id),
outside_port(out_port),
inside_node_name(inside),
inside_id(in_id),
inside_port(in_port),
is_input_edge(input_edge),
port_number(port) {}
// Constructs a control edge.
EngineConnection(const string& outside, int out_id, const string& inside,
int in_id, bool input_edge)
: outside_node_name(outside),
outside_id(out_id),
outside_port(Graph::kControlSlot),
inside_node_name(inside),
inside_id(in_id),
inside_port(Graph::kControlSlot),
is_input_edge(input_edge),
port_number(Graph::kControlSlot) {}
bool is_control_edge() const { return port_number == Graph::kControlSlot; }
const string outside_node_name;
const int outside_id;
const int outside_port;
PartialTensorShape outside_shape; // Only set for input edge.
const string inside_node_name;
const int inside_id;
const int inside_port;
PartialTensorShape inside_shape; // Only set for output edge.
DataType connection_type;
const bool is_input_edge;
// The port number of the TRT node connected with this edge.
const int port_number;
};
struct EngineInfo {
EngineInfo()
: engine_type(EngineType::TRTStatic),
max_workspace_size_bytes(0),
max_batch_size(absl::nullopt),
maximum_cached_engines(0),
precision_mode(TrtPrecisionMode::FP32),
use_calibration(true),
allow_build_at_runtime(true),
use_explicit_precision(false) {}
string engine_name;
string device;
GraphDef segment_graph_def;
// Non-control input connections inside this vector are sorted in a way such
// that, the segment nodes connecting to them are topological sorted.
// In addition, for non-control connections, there must be no duplicates.
std::vector<EngineConnection> connections;
enum class EngineType { TRTStatic = 0, TRTDynamic = 1 };
EngineType engine_type;
int64 max_workspace_size_bytes;
absl::optional<int> max_batch_size;
int maximum_cached_engines;
TrtPrecisionMode precision_mode;
bool use_calibration;
bool allow_build_at_runtime;
bool use_explicit_precision;
};
// Constructs a graphdef from the segment in the given graph and stores it to
// the engine_info. Adds _Arg nodes for input edges (InputPH_*) and _Retval
// nodes for output edges (OutputPH_*). Maintains the topological order of the
// non-input/output nodes in the graphdef. This function needs to be called
// before TensorRT layers are created because it prepares the original graph
// for TensorRT conversion.
//
// - subgraph_node_names: the node names of the subgraph.
// - subgraph_node_ids: the node ids of the subgraph, must be sorted in
// topological order.
// - engine_info: a data structure that records the information about the
// engine containing the subgraph.
//
// TODO(aaroey): add tests to validate these properties.
Status ConvertSegmentToGraphDef(
const Graph* graph, const grappler::GraphProperties& graph_properties,
const std::vector<const Node*>& subgraph_nodes, EngineInfo* engine_info);
// Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff
// 'builder' successfully build the engine. If the result is not ok, 'engine'
// will be set to nullptr
// Once returned, 'builder' is not needed any more and can be safely destroyed.
//
// - convert_successfully: indicates whether the conversion to TensorRT network
// is successful. This is different than successfully building the engine:
// building can still fail afterwards.
// Note: When 'cluster' is not null, it contains the graph to be converted.
// We may perform additional optimizations to the graph before converting
// the graph.
Status ConvertGraphDefToEngine(
const GraphDef& gdef, TrtPrecisionMode precision_mode, int max_batch_size,
size_t max_workspace_size_bytes,
const std::vector<PartialTensorShape>& input_shapes,
nvinfer1::ILogger* logger, nvinfer1::IGpuAllocator* allocator,
TRTInt8Calibrator* calibrator,
TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, bool use_calibration,
const bool use_implicit_batch, bool* convert_successfully,
TrtShapeOptimizationProfile* profiles, absl::string_view engine_name,
bool use_explicit_precision,
tensorflow::grappler::Cluster* cluster = nullptr);
// Helper class for the segmenter to determine whether an output edge from the
// TRT segment is valid.
class OutputEdgeValidator {
public:
// Return true if the specified edge is eligible to be an output edge of the
// TRT segment.
bool operator()(const Edge* out_edge) const;
};
// Class to verify if specific TF node is supported by TRT.
class TrtNodeValidator {
public:
// 'graph_properties' is the GraphProperties of the graph whose nodes will be
// checked by IsTensorRTCandidate() later. It is used to get the shape and
// data type information of a tensor for validation purpose.
TrtNodeValidator(const grappler::GraphProperties& graph_properties,
TrtPrecisionMode precision_mode, bool use_calibration,
bool use_implicit_batch, bool use_explicit_precision);
// Returns OK iff 'node' is a TF-TRT conversion candidate, which will be added
// to TRT subgraph and later converted into TRT engine.
Status IsTensorRTCandidate(const Node* node);
static const std::set<string>* quantize_ops;
// Returns validator by op type. If no validator is registered for
// specific op, it means no validation is needed and ValidateNode() will
// return OK.
StatusOr<OpConverter> GetValidator(const std::string& op);
private:
// Convert a Const node to a TRT_TensorOrWeights.
Status ConvertConstToWeights(const NodeDef& const_node_def,
const std::vector<TRT_TensorOrWeights>& inputs,
TRT_TensorOrWeights* output);
// Convert the output tensor at 'output_port' of 'node_def' to a
// TRT_TensorOrWeights which will be later used as an input to other nodes and
// passed to ValidateNode() below.
Status ConvertToTensorOrWeights(const NodeDef& node_def, int output_port,
TRT_TensorOrWeights* tensor_or_weights);
// Store the weights added during validation. Some validations (e.g.
// validation for Const node) may produce weights.
TrtWeightStore weight_store_;
// GraphProperties of the graph whose nodes are to be validated by
// IsTensorRTCandidate().
const grappler::GraphProperties& graph_properties_;
// Quantization ops are only converted when using quantized precisions.
const TrtPrecisionMode precision_mode_;
const bool use_calibration_;
const bool use_implicit_batch_;
const bool use_explicit_precision_;
friend class ValidatorTest;
friend class OpConverterTest;
};
// Class to convert TF nodes to TRT network.
class Converter {
public:
// Used for Converter::RenameAndMarkOutputTensors()
struct EngineOutputInfo {
// The TRT tensor name which produces the output.
string source_tensor_name;
// The TensorFlow node name which is receiving the output from the TRT
// engine. This should always be the Identity node created in
// ConvertSegmentToGraphDef.
string dest_node_name;
// Output type. TensorRT requires this to be explicitly set for engine
// outputs.
nvinfer1::DataType trt_dtype;
};
static StatusOr<std::unique_ptr<Converter>> Create(
TrtPrecisionMode precision_mode, bool use_calibration,
nvinfer1::ILogger* trt_logger, const bool use_implicit_batch,
absl::string_view engine_name, bool use_explicit_precision = false);
//////////////////////////////////////////////////////////////////////////////
// Methods used by the TRT engine builder to build a TRT network from a TF
// function/subgraph.
// Convert the node to TRT network.
Status ConvertNode(const NodeDef& node_def);
// Add input tensor to the TRT network with given 'name', 'dtype', 'dims' and
// 'batch_size'.
Status AddInputTensor(const string& name, nvinfer1::DataType dtype,
const nvinfer1::Dims& dims, int batch_size);
// Mark the tensors with names specified by source_tensor_name as output of
// the TRT network, and set their names in the TRT network as dest_node_name.
Status RenameAndMarkOutputTensors(
const std::vector<EngineOutputInfo>& output_tensors);
// Build a TRT engine using the created network.
Status BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
int max_batch_size, size_t max_workspace_size_bytes,
nvinfer1::IGpuAllocator* allocator,
TRTInt8Calibrator* calibrator,
TrtShapeOptimizationProfile* profiles);
//////////////////////////////////////////////////////////////////////////////
// Methods used by op converters to convert individual TF node and add layers
// to the TRT network.
// Op converters (e.g. ConvertReshape) need to access the TRT network in order
// to add TRT layers.
nvinfer1::INetworkDefinition* network() { return trt_network_.get(); }
// What precision are we targeting?
TrtPrecisionMode precision_mode() const { return precision_mode_; }
// Calibration will be or was previously performed on this network?
bool use_calibration() const { return use_calibration_; }
// Whether implicit batch mode is enabled
bool use_implicit_batch() const { return use_implicit_batch_; }
// This function should be called when we know the quantization range of a
// tensor from a quantize/dequantize node.
void ProvideQuantizationRange(ITensorProxyPtr* tensor, float min_range,
float max_range);
// Should be called when full TRT network has been constructed and before
// building the engine.
void MaybeApplyQuantizationRanges();
// Below are helper methods for op converters to add different layers to the
// TRT network.
// Transpose 'input_tensor' with given permutation 'order_with_batch_dim' to
// 'output_tensor'. The permutation 'order_with_batch_dim' contains the batch
// dimension which should always be 0. If this is for adding a transpose layer
// to support the conversion of 'node_def', callers need to provide a
// non-empty 'sub_op_name' appended to the name of 'node_def' to avoid layer
// name conflicts.
Status TransposeTensor(ITensorProxyPtr input_tensor,
const std::vector<int>& order_with_batch_dim,
ITensorProxyPtr* output_tensor,
const NodeDef& node_def,
absl::string_view sub_op_name = "");
// Reshapes a dynamic shape tensor by removing or adding dimensions of size 1,
// and/or permuting the dimensions. The new shape is derived from the shape of
// the input tensor according to the slices and size_for_added_dims arguments.
//
// If there would be at most one unknown dimension, we could set the new shape
// using IShuffleLayer::setReshapeDimensions, which treats -1 as a special
// value (the same way as TF). In general, we can have more than one unknown
// dimensions, and we have to manipulate the shape tensors during runtime to
// define the new shape. This helper function defines the necessary shape
// inference layers and calls reshape using the calculated new shape.
//
// Example:
//
// Assume that we want to reshape a tensor from shape {A,B,C,D} to {C,D,A,B}
// (no transpose, just change the shape). In dynamic shape mode, the A,B,C,D
// values are not necessarily known at conversion time, they can be all -1. We
// can only define the new shape at runtime, when the actual shape is already
// known. To define the new shape:
// - We use an IShapeLayer to retrieve a shape tensor with the {A,B,C,D}
// values.
// - Create two slices {C,D} and {A,B} of the shape tensor.
// - Concatenate these slices {C,D,A,B},
// - Set the {C,D,A,B} shape tensor as an input shape tensor for
// IShuffleLayer.
//
// This can be achieved by calling DynamicReshape(input, {{2,4},{0,2}},
// params).
//
// Before each slice we can insert new dims if the corresponding
// size_for_added_dims element is not negative. The size_for_added_dims array
// can have more than slices.size() elements, in order to insert a dimension
// after the last slice. For example, to add two leading 1 dimensions, and
// three trailing 1 dimensions, call DynamicReshape(input, {{0,nbDims}},
// {2, 3}).
//
// Parameters:
// input - input tensor
// slices - [start, end) pairs of slices
// params - conversion parameters
// output - reshaped tensor
// size_for_added_dims - size of dimension inserted right before slice[i]. We
// only insert a new dim if size_for_added_dims[i] >= 0.
Status DynamicReshape(ITensorProxyPtr input,
std::vector<std::pair<int, int>> slices,
OpConverterParams* params, ITensorProxyPtr* output,
std::vector<int> size_for_added_dims = {},
absl::optional<int> op_instance = absl::nullopt);
// Inserts a singleton dimension at axis for a dynamic shape tensor.
Status DynamicExpandDims(ITensorProxyPtr input, const nvinfer1::Dims& dims,
int axis, OpConverterParams* params,
ITensorProxyPtr* output,
absl::optional<int> op_instance = absl::nullopt);
// Helper function to add a squeeze op to the network.
//
// The input_dims argument stores the TRT dimensions of the input tensor,
// where the dimensions to be squeezed are replaced by 0.
Status SqueezeTensor(ITensorProxyPtr input, std::vector<int>* input_dims,
OpConverterParams* params, ITensorProxyPtr* output);
// Creates an IConstantLayer using 'weights' whose dimensions are specified by
// 'dims', and returns the output ITensor.
ITensorProxyPtr CreateConstantLayer(const TRT_ShapedWeights& weights,
const nvinfer1::Dims& dims);
// Gets the min and max value in a TRT_ShapedWeights
Status GetWeightRange(const TRT_ShapedWeights& weights, float* out_min,
float* out_max) const;
// Constructs a name and passed it to the TensorRT layer to support xprof.
void SetLayerName(
nvinfer1::ILayer* layer, const NodeDef& node_def,
absl::string_view sub_op_name = "",
absl::optional<int> sub_op_instance = absl::nullopt,
absl::optional<std::string> origin_node_name = absl::nullopt);
void SetLayerName(nvinfer1::ILayer* layer, absl::string_view main_op_name,
absl::string_view sub_op_name,
absl::optional<int> sub_op_instance = absl::nullopt);
std::unordered_map<string, TRT_TensorOrWeights>& TensorsMap() {
return trt_tensors_;
}
bool UseExplicitPrecision() const { return use_explicit_precision_; }
private:
Converter(TrtPrecisionMode precision_mode, bool use_calibration,
nvinfer1::ILogger* trt_logger, const bool use_implicit_batch,
absl::string_view engine_name, bool use_explicit_precision);
Status Init(nvinfer1::ILogger* trt_logger);
// Verify the provided batch_size is consistent with batch_size_ and update it
// if necessary.
Status MaybeUpdateBatchSize(int batch_size);
// Add the provided tensor/weights to the map trt_tensors_.
Status AddTensorOrWeights(const string& name, TRT_TensorOrWeights input);
// Get the tensor/weights from trt_tensors_ by 'name'.
Status GetTensorOrWeights(const string& name, TRT_TensorOrWeights* output);
// Get the inputs of 'node_def' from trt_tensors_.
Status GetInputs(const NodeDef& node_def,
std::vector<TRT_TensorOrWeights>* inputs) const;
// Tensors/weights added during construction of trt_network_.
std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_;
// The TRT builder used to create the network and build the engine. Not owned.
TrtUniquePtrType<nvinfer1::IBuilder> trt_builder_;
// The TRT network being built.
TrtUniquePtrType<nvinfer1::INetworkDefinition> trt_network_;
// Store the weights added during construction of trt_network_.
TrtWeightStore weight_store_;
// During conversion, this table is populated with quantization ranges per
// tensor. MaybeApplyQuantizationRanges() will use this table to set the TRT
// quantization ranges. Since TRT only supports symmetric ranges, we will
// store the range as a single float = max(abs(min_range), abs(max_range)).
// Range refers to the floating point values, e.g. min_range = 0.0f, max_range
// = 6.0f for Relu6.
std::unordered_map<ITensorProxyPtr*, float> quantization_ranges_proxy_;
std::unordered_map<nvinfer1::ITensor*, float> quantization_ranges_;
const TrtPrecisionMode precision_mode_;
const bool use_calibration_;
// If this is false, all dimensions including the batch dimension are
// set explicitely.
const bool use_implicit_batch_;
// Batch size of inputs to trt_network_ added by AddInputTensor(). During
// network construction it will update this, use it to verify the batch
// size of all inputs are compatible, and make sure individual TF node is
// acceptable by TRT.
int batch_size_ = -1;
// Assign a ID to each constant layer we create, so that we can assign a
// unique name to the layer.
int next_constant_layer_id_ = 0;
// The name of the TRTEngineOp node.
absl::string_view engine_name_;
// Indicates whether to use explicit precision in TensorRT (Q/DQ support).
bool use_explicit_precision_;
friend class ConverterTest;
friend class OpConverterTest;
};
// Converts 'input' of 'node_def' into 'tensor' with shape specified by 'dims'
// (which doesn't contain the batch dimension).
//
// If validation_only is true, it doesn't do the conversion but only do some
// minimum validation for the eligibility of the conversion, and *tensor will
// be set to nullptr.
// If validation_only is false converter must not be nullptr.
Status PrepareTensorForShape(
Converter* converter, const TRT_TensorOrWeights& input,
const DimsAdapter& dims, const bool validation_only,
ITensorProxyPtr* tensor, const NodeDef& node_def,
absl::optional<int> op_instance = absl::nullopt,
absl::optional<std::string> origin_node_name = absl::nullopt);
// Return OK if the broadcast scheme is supported and compute the shapes after
// broadcasting. check_feasibility can be set to false in cases where dimensions
// do not need to match exactly (as in the case of BatchMatMulV2).
Status GetTrtBroadcastShape(const TRT_TensorOrWeights& operand_l,
const TRT_TensorOrWeights& operand_r,
const bool check_feasibility,
const bool use_implicit_batch,
nvinfer1::Dims* operand_l_new_dims,
nvinfer1::Dims* operand_r_new_dims);
template <typename T>
using operationMap = std::unordered_map<std::string, T>;
// Map of all supported UnaryOperations.
typedef operationMap<nvinfer1::UnaryOperation> unaryOperationMap;
const unaryOperationMap* UnaryOperationMap();
// Map of all supported ActivationTypes.
const operationMap<nvinfer1::ActivationType>* ActivationTypeMap();
// Map of all supported BinaryOperations.
typedef operationMap<nvinfer1::ElementWiseOperation> binaryOperationMap;
const binaryOperationMap* BinaryOperationMap();
template <typename T>
absl::InlinedVector<std::string, 10> GetOperationNames(const T& set) {
absl::InlinedVector<std::string, 10> result;
absl::c_transform(set, std::back_inserter(result),
[](const auto x) { return x.first; });
return result;
}
// Adds a matrix multiplication operation to the TensorRT graph. The "params"
// pointer is only used to access the TRT network builder. The inputs and
// parameters for the op are fully specified by input_[a|b] and transpose_[a|b].
StatusOr<ITensorProxyPtr> ConvertMatMulImpl(OpConverterParams* params,
TRT_TensorOrWeights input_a,
TRT_TensorOrWeights input_b,
bool transpose_a, bool transpose_b);
} // namespace convert
} // namespace tensorrt
} // namespace tensorflow
#endif // GOOGLE_CUDA && GOOGLE_TENSORRT
#endif // TENSORFLOW_COMPILER_TF2TENSORRT_CONVERT_CONVERT_NODES_H_