| /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" |
| |
| #include <algorithm> |
| #include <cstring> |
| #include <list> |
| #include <map> |
| #include <memory> |
| #include <set> |
| #include <unordered_map> |
| #include <unordered_set> |
| #include <utility> |
| #include <vector> |
| |
| #include "tensorflow/contrib/tensorrt/convert/utils.h" |
| #include "tensorflow/contrib/tensorrt/log/trt_logger.h" |
| #include "tensorflow/contrib/tensorrt/plugin/trt_plugin_factory.h" |
| #include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" |
| #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" |
| #include "tensorflow/core/framework/node_def.pb.h" // NOLINT |
| #include "tensorflow/core/framework/node_def_builder.h" |
| #include "tensorflow/core/framework/tensor.pb.h" // NOLINT |
| #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT |
| #include "tensorflow/core/framework/types.h" |
| #include "tensorflow/core/graph/algorithm.h" |
| #include "tensorflow/core/graph/graph.h" |
| #include "tensorflow/core/graph/graph_constructor.h" |
| #include "tensorflow/core/lib/core/errors.h" |
| #include "tensorflow/core/lib/core/status.h" |
| #include "tensorflow/core/lib/strings/numbers.h" |
| #include "tensorflow/core/lib/strings/str_util.h" |
| #include "tensorflow/core/lib/strings/strcat.h" |
| #include "tensorflow/core/platform/logging.h" |
| #include "tensorflow/core/platform/tensor_coding.h" |
| #include "tensorflow/core/platform/types.h" |
| |
| #if GOOGLE_CUDA |
| #if GOOGLE_TENSORRT |
| #include "tensorrt/include/NvInfer.h" |
| |
| // Check if the types are equal. Cast to int first so that failure log message |
| // would work! |
| #define TFTRT_CHECK_EQ_TYPE(val1, val2) CHECK_EQ((int)val1, (int)val2) |
| |
| #define TFTRT_INTERNAL_ERROR_AT_NODE(node) \ |
| do { \ |
| return tensorflow::errors::Internal( \ |
| "TFTRT::", __FUNCTION__, "failed to add TRT layer, at: ", node); \ |
| } while (0) |
| |
| #define TFTRT_RETURN_ERROR_IF_FALSE(status, node) \ |
| do { \ |
| if (status == false) { \ |
| TFTRT_INTERNAL_ERROR_AT_NODE(node); \ |
| } \ |
| } while (0) |
| |
| #define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \ |
| do { \ |
| if (ptr == nullptr) { \ |
| TFTRT_INTERNAL_ERROR_AT_NODE(node); \ |
| } \ |
| } while (0) |
| |
| namespace tensorflow { |
| namespace tensorrt { |
| // TODO(aaroey): put these constants into some class. |
| const char* const kInputPHName = "TensorRTInputPH_"; |
| const char* const kOutputPHName = "TensorRTOutputPH_"; |
| |
| namespace convert { |
| using ::tensorflow::str_util::Split; |
| using ::tensorflow::strings::StrAppend; |
| using ::tensorflow::strings::StrCat; |
| |
| namespace { |
| |
| inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, |
| nvinfer1::DataType* trt_dtype) { |
| switch (tf_dtype) { |
| case tensorflow::DataType::DT_FLOAT: |
| *trt_dtype = nvinfer1::DataType::kFLOAT; |
| break; |
| case tensorflow::DataType::DT_INT8: |
| *trt_dtype = nvinfer1::DataType::kINT8; |
| break; |
| case tensorflow::DataType::DT_HALF: |
| *trt_dtype = nvinfer1::DataType::kHALF; |
| break; |
| #if NV_TENSORRT_MAJOR > 3 |
| case tensorflow::DataType::DT_INT32: |
| *trt_dtype = nvinfer1::DataType::kINT32; |
| break; |
| #endif |
| default: |
| return tensorflow::errors::InvalidArgument( |
| "Unsupported data type ", tensorflow::DataTypeString(tf_dtype)); |
| } |
| return tensorflow::Status::OK(); |
| } |
| |
| void GetInputProperties(const grappler::GraphProperties& graph_properties, |
| const Node* outside_node, const int out_port, |
| PartialTensorShape* shape, |
| tensorflow::DataType* dtype) { |
| if (graph_properties.HasOutputProperties(outside_node->name())) { |
| auto output_params = |
| graph_properties.GetOutputProperties(outside_node->name()); |
| auto out_shape = output_params.at(out_port); |
| *dtype = out_shape.dtype(); |
| *shape = out_shape.shape(); |
| } else { |
| VLOG(0) << "Unknown output shape" << outside_node->name(); |
| *dtype = outside_node->output_type(out_port); |
| } |
| } |
| |
| void GetOutputProperties(const grappler::GraphProperties& graph_properties, |
| const Node* outside_node, const int in_port, |
| PartialTensorShape* shape, |
| tensorflow::DataType* dtype) { |
| if (graph_properties.HasInputProperties(outside_node->name())) { |
| auto input_params = |
| graph_properties.GetInputProperties(outside_node->name()); |
| auto in_shape = input_params.at(in_port); |
| *dtype = in_shape.dtype(); |
| *shape = in_shape.shape(); |
| } else { |
| *dtype = outside_node->input_type(in_port); |
| } |
| } |
| |
| tensorflow::Status ValidateInputProperties(const PartialTensorShape& shape, |
| const tensorflow::DataType dtype, |
| nvinfer1::DataType* trt_dtype) { |
| // TODO(aaroey): some of these checks also apply to IsTensorRTCandidate(), so |
| // put them there instead. |
| TF_RETURN_IF_ERROR(ConvertDType(dtype, trt_dtype)); |
| if (shape.dims() < 0) { |
| return tensorflow::errors::InvalidArgument("Input tensor rank is unknown."); |
| } |
| if (shape.dims() > 9) { |
| return tensorflow::errors::OutOfRange( |
| "Input tensor rank is greater than 8."); |
| } |
| for (int d = 1; d < shape.dims(); ++d) { |
| if (shape.dim_size(d) < 0) { |
| return tensorflow::errors::InvalidArgument( |
| "Input tensor with shape ", shape.DebugString(), |
| " has an unknown non-batch dimemension at dim ", d); |
| } |
| } |
| return Status::OK(); |
| } |
| |
| string DebugString(const nvinfer1::Dims& dims) { |
| string out = StrCat("nvinfer1::Dims(nbDims=", dims.nbDims, ", d="); |
| for (int i = 0; i < nvinfer1::Dims::MAX_DIMS; ++i) { |
| StrAppend(&out, dims.d[i], ","); |
| } |
| StrAppend(&out, ")"); |
| return out; |
| } |
| |
| // Return whether or not the broadcast is feasible; |
| bool TensorRTGetBroadcastShape(const nvinfer1::Dims& operand_l, |
| const bool operand_l_is_tensor, |
| const nvinfer1::Dims& operand_r, |
| const bool operand_r_is_tensor, |
| nvinfer1::Dims* operand_l_new_shape, |
| nvinfer1::Dims* operand_r_new_shape) { |
| // *************************************************************************** |
| // TensorRT Elementwise op supports broadcast but requires both tensor to be |
| // of Identical rank |
| // |
| // We consider case of: |
| // 1. operand_l to be a Tensor & operand_r to be a Const; |
| // 2. operand_l to be a Tensor & operand_r to be a Tensor; |
| // note: const op const (constant folding) should fallback to TensorFlow |
| // |
| // broadcast scheme: |
| // T: 1 3 5 (tensor would not have batch dimension) |
| // W: 1 1 3 1 (weight would have all explicit dimensions) |
| // i. fill in explicit dimensions |
| // -> T: -1 1 3 5 (we put a -1 for batch dimension) |
| // -> W: 1 1 3 1 |
| // ii. compare broadcast feasibility |
| // |
| // We cannot support the following since TensorRT does not allow manipulation |
| // on batch dimension, we cannot generate output with proper shape |
| // T: 3 5 1 |
| // W: 1 1 1 1 3 5 1 |
| // -> T: 1 1 1 -1 3 5 1 |
| // -> W: 1 1 1 1 3 5 1 |
| // *************************************************************************** |
| const int max_nb_dims = nvinfer1::Dims::MAX_DIMS + 1; |
| const size_t element_size = sizeof(operand_l.d[0]); |
| |
| // fill in dimensions |
| int l_s[max_nb_dims]; |
| std::fill(l_s, l_s + max_nb_dims, 1); |
| int l_d = operand_l_is_tensor ? operand_l.nbDims + 1 : operand_l.nbDims; |
| int r_s[max_nb_dims]; |
| std::fill(r_s, r_s + max_nb_dims, 1); |
| int r_d = operand_r_is_tensor ? operand_r.nbDims + 1 : operand_r.nbDims; |
| |
| int max_d = std::max(l_d, r_d); |
| std::memcpy(l_s + max_d - operand_l.nbDims, operand_l.d, |
| operand_l.nbDims * element_size); |
| std::memcpy(r_s + max_d - operand_r.nbDims, operand_r.d, |
| operand_r.nbDims * element_size); |
| |
| // set -1 for batch dimension, since batch size is not supposed to be |
| // broadcasted |
| if (operand_l_is_tensor) { |
| if (max_d != l_d) { // if broadcast beyond batch dimension, fail |
| return false; |
| } |
| l_s[0] = -1; |
| } |
| if (operand_r_is_tensor) { |
| if (max_d != r_d) { // if broadcast beyond batch dimension, fail |
| return false; |
| } |
| r_s[0] = -1; |
| } |
| |
| // compare broadcast feasibility |
| for (int i = max_d - 1; i >= 0; i--) { |
| if ((l_s[i] != r_s[i]) && (l_s[i] != 1) && (r_s[i] != 1)) { |
| return false; |
| } |
| } |
| |
| // output new TensorRT Dimension (stripping the batch dimension) |
| operand_l_new_shape->nbDims = max_d - 1; |
| std::memcpy(operand_l_new_shape->d, l_s + 1, (max_d - 1) * element_size); |
| operand_r_new_shape->nbDims = max_d - 1; |
| std::memcpy(operand_r_new_shape->d, r_s + 1, (max_d - 1) * element_size); |
| |
| return true; |
| } |
| |
| inline bool DimsEqual(const nvinfer1::Dims& dim_l, |
| const nvinfer1::Dims& dim_r) { |
| if (dim_l.nbDims != dim_r.nbDims) { |
| return false; |
| } |
| for (int i = 0; i < dim_l.nbDims; i++) { |
| if (dim_l.d[i] != dim_r.d[i]) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| inline nvinfer1::Dims GetTensorShape(const tensorflow::Tensor& tensor) { |
| nvinfer1::Dims dims; |
| dims.nbDims = tensor.dims(); |
| for (int i = 0; i < dims.nbDims; i++) { |
| dims.d[i] = tensor.dim_size(i); |
| } |
| return dims; |
| } |
| |
| inline int64_t GetShapeSize(const nvinfer1::Dims& shape) { |
| // Returns total number of elements in shape |
| int64_t count = 1; |
| for (int d = 0; d < shape.nbDims; ++d) { |
| count *= shape.d[d]; |
| } |
| return count; |
| } |
| |
| static std::vector<std::pair<int, int>> CreateSamePadding( |
| const nvinfer1::DimsHW& stride, const nvinfer1::DimsHW& kernel, |
| const std::vector<int64_t>& input_dims) { |
| std::vector<std::pair<int, int>> padding(input_dims.size()); |
| CHECK_EQ(stride.nbDims, input_dims.size()); // TODO(jie): N+C? NC+? |
| |
| for (size_t i = 0; i < input_dims.size(); ++i) { |
| // Formula to calculate the padding |
| int p = ((input_dims[i] - 1) / stride.d[i]) * stride.d[i] + kernel.d[i] - |
| input_dims[i]; |
| p = (p > 0) ? p : 0; |
| |
| // Right precedence padding, like in TensorFlow |
| int left = p / 2; |
| int right = p - left; |
| |
| VLOG(2) << "PADDING_" << i << " pre: " << left << ", post: " << right |
| << "paras: " << input_dims[i] << ", " << stride.d[i] << ", " |
| << "kernel: " << kernel.d[i]; |
| padding[i] = {left, right}; |
| } |
| return padding; |
| } |
| |
| string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { |
| size_t last_scope_separator = 0; |
| const size_t min_size = std::min(op_name_a.size(), op_name_b.size()); |
| for (size_t i = 0; i < min_size; ++i) { |
| if (op_name_a[i] != op_name_b[i]) break; |
| if (op_name_a[i] == '/') last_scope_separator = i + 1; |
| } |
| return op_name_a.substr(0, last_scope_separator); |
| } |
| |
| // Class to convert TF weight to TRT weight. |
| class TRT_ShapedWeights { |
| public: |
| TRT_ShapedWeights(tensorflow::DataType type, const void* values, |
| nvinfer1::Dims shape) |
| : shape_(shape), type_(type), values_(values), empty_weight_flag_(false) { |
| // Note: this->shape.type[] is not used |
| } |
| |
| explicit TRT_ShapedWeights(tensorflow::DataType type) |
| : shape_(), type_(type), values_(nullptr), empty_weight_flag_(true) {} |
| |
| // TODO(aaroey): use rvalue reference. |
| TRT_ShapedWeights(const TRT_ShapedWeights& rhs) |
| : shape_(rhs.shape_), |
| type_(rhs.type_), |
| values_(rhs.values_), |
| empty_weight_flag_(rhs.empty_weight_flag_) {} |
| |
| // TODO(aaroey): use GetShapeSize() instead. |
| int64_t count() const { |
| int64_t c = 1; |
| for (int i = 0; i < shape_.nbDims; i++) c *= shape_.d[i]; |
| return c; |
| } |
| |
| nvinfer1::Weights GetWeightsForTRT() const { |
| nvinfer1::DataType trt_type(nvinfer1::DataType::kFLOAT); |
| TF_CHECK_OK(ConvertDType(type_, &trt_type)); |
| if (empty_weight_flag_) return nvinfer1::Weights{trt_type, nullptr, 0}; |
| |
| // Note: this->shape.type[] is not used |
| return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)}; |
| } |
| |
| const void* GetValues() const { return values_; } |
| |
| // TODO(aaroey): get rid of this method. |
| void SetValues(const void* values) { values_ = values; } |
| |
| size_t size_bytes() const { |
| int type_size = tensorflow::DataTypeSize(this->type_); |
| return this->count() * type_size; |
| } |
| |
| // Default converter |
| operator nvinfer1::Weights() const { return GetWeightsForTRT(); } |
| |
| string DebugString() const { |
| return StrCat( |
| "TRT_ShapedWeights(shape=", convert::DebugString(shape_), ", type=", |
| type_, ", values=", reinterpret_cast<uintptr_t>(values_), |
| ", empty_weight_flag=", empty_weight_flag_, ")"); |
| } |
| |
| // TODO(aaroey): make these private. |
| nvinfer1::Dims shape_; |
| tensorflow::DataType type_; |
| |
| private: |
| // TODO(aaroey): this should not be const as it's always from TRTWeightStore. |
| const void* values_; |
| bool empty_weight_flag_; |
| }; |
| |
| class TRT_TensorOrWeights { |
| public: |
| explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor) |
| : tensor_(tensor), weights_(DT_FLOAT), variant_(TRT_NODE_TENSOR) {} |
| |
| explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights) |
| : tensor_(nullptr), weights_(weights), variant_(TRT_NODE_WEIGHTS) {} |
| |
| // TODO(aaroey): use rvalue reference. |
| TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs) |
| : tensor_(rhs.tensor_), weights_(rhs.weights_), variant_(rhs.variant_) {} |
| |
| ~TRT_TensorOrWeights() {} |
| |
| bool is_tensor() const { return variant_ == TRT_NODE_TENSOR; } |
| bool is_weights() const { return variant_ == TRT_NODE_WEIGHTS; } |
| |
| nvinfer1::ITensor* tensor() { |
| CHECK(is_tensor()); |
| return tensor_; |
| } |
| |
| const nvinfer1::ITensor* tensor() const { |
| CHECK(is_tensor()); |
| return tensor_; |
| } |
| |
| TRT_ShapedWeights& weights() { |
| CHECK(is_weights()); |
| return weights_; |
| } |
| |
| const TRT_ShapedWeights& weights() const { |
| CHECK(is_weights()); |
| return weights_; |
| } |
| |
| nvinfer1::Dims shape() const { |
| if (is_tensor()) { |
| return tensor()->getDimensions(); |
| } else { |
| return weights().shape_; |
| } |
| } |
| |
| string DebugString() const { |
| string output = "TRT_TensorOrWeights(type="; |
| if (is_tensor()) { |
| StrAppend(&output, "tensor @", reinterpret_cast<uintptr_t>(tensor_), |
| ", shape=", convert::DebugString(tensor_->getDimensions())); |
| } else { |
| StrAppend(&output, "weights=", weights_.DebugString()); |
| } |
| StrAppend(&output, ")"); |
| return output; |
| } |
| |
| private: |
| nvinfer1::ITensor* tensor_; |
| TRT_ShapedWeights weights_; |
| enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } variant_; |
| }; |
| |
| class TFAttrs { |
| public: |
| explicit TFAttrs(const tensorflow::NodeDef& tf_node) { |
| for (const auto& attr : tf_node.attr()) { |
| attrs_.insert({attr.first, &attr.second}); |
| } |
| } |
| |
| bool count(const string& key) const { return attrs_.count(key); } |
| |
| tensorflow::AttrValue const* at(const string& key) const { |
| if (!attrs_.count(key)) { |
| LOG(FATAL) << "Attribute not found: " << key; |
| } |
| return attrs_.at(key); |
| } |
| |
| template <typename T> |
| T get(const string& key) const; |
| |
| template <typename T> |
| T get(const string& key, const T& default_value) const { |
| return attrs_.count(key) ? this->get<T>(key) : default_value; |
| } |
| |
| std::vector<string> GetAllAttrKeys() const { |
| std::vector<string> attr_list; |
| for (const auto& attr_item : attrs_) { |
| attr_list.emplace_back(attr_item.first); |
| } |
| return attr_list; |
| } |
| |
| private: |
| typedef std::map<string, tensorflow::AttrValue const*> AttrMap; |
| AttrMap attrs_; |
| }; |
| |
| template <> |
| string TFAttrs::get<string>(const string& key) const { |
| return this->at(key)->s(); |
| } |
| |
| template <> |
| std::vector<int> TFAttrs::get<std::vector<int>>(const string& key) const { |
| auto attr = this->at(key)->list().i(); |
| return std::vector<int>(attr.begin(), attr.end()); |
| } |
| |
| template <> |
| std::vector<float> TFAttrs::get<std::vector<float>>(const string& key) const { |
| auto attr = this->at(key)->list().f(); |
| return std::vector<float>(attr.begin(), attr.end()); |
| } |
| |
| template <> |
| std::vector<string> TFAttrs::get<std::vector<string>>(const string& key) const { |
| auto attr = this->at(key)->list().s(); |
| return std::vector<string>(attr.begin(), attr.end()); |
| } |
| |
| template <> |
| nvinfer1::DataType TFAttrs::get<nvinfer1::DataType>(const string& key) const { |
| nvinfer1::DataType trt_dtype(nvinfer1::DataType::kFLOAT); |
| TF_CHECK_OK(ConvertDType(this->at(key)->type(), &trt_dtype)); |
| return trt_dtype; |
| } |
| |
| template <> |
| tensorflow::DataType TFAttrs::get<tensorflow::DataType>( |
| const string& key) const { |
| return this->at(key)->type(); |
| } |
| |
| template <> |
| float TFAttrs::get<float>(const string& key) const { |
| return this->at(key)->f(); |
| } |
| |
| template <> |
| bool TFAttrs::get<bool>(const string& key) const { |
| return this->at(key)->b(); |
| } |
| |
| // TODO(jie): reorder4 & reorder2 should be merged? |
| // TODO(aaroey): fix the order of parameters. |
| template <typename T> |
| void Reorder4(const nvinfer1::DimsNCHW& shape, const T* idata, |
| const nvinfer1::DimsNCHW& istrides, T* odata, |
| const nvinfer1::DimsNCHW& ostrides) { |
| for (int n = 0; n < shape.n(); ++n) { |
| for (int c = 0; c < shape.c(); ++c) { |
| for (int h = 0; h < shape.h(); ++h) { |
| for (int w = 0; w < shape.w(); ++w) { |
| odata[n * ostrides.n() + c * ostrides.c() + h * ostrides.h() + |
| w * ostrides.w()] = idata[n * istrides.n() + c * istrides.c() + |
| h * istrides.h() + w * istrides.w()]; |
| } |
| } |
| } |
| } |
| } |
| |
| template <typename T> |
| void Reorder2(const nvinfer1::DimsHW& shape, const T* idata, |
| const nvinfer1::DimsHW& istrides, T* odata, |
| const nvinfer1::DimsHW& ostrides) { |
| for (int h = 0; h < shape.h(); ++h) { |
| for (int w = 0; w < shape.w(); ++w) { |
| odata[h * ostrides.h() + w * ostrides.w()] = |
| idata[h * istrides.h() + w * istrides.w()]; |
| } |
| } |
| } |
| |
| // TODO(jie): fallback to tensorflow!! |
| void ReorderCKtoKC(const TRT_ShapedWeights& iweights, |
| TRT_ShapedWeights* oweights) { |
| const int c = iweights.shape_.d[0]; |
| const int k = iweights.shape_.d[1]; |
| oweights->shape_.d[0] = k; |
| oweights->shape_.d[1] = c; |
| const nvinfer1::DimsHW istrides = {1, k}; |
| const nvinfer1::DimsHW ostrides = {c, 1}; |
| switch (iweights.type_) { |
| case tensorflow::DataType::DT_FLOAT: { |
| Reorder2({k, c}, static_cast<float const*>(iweights.GetValues()), |
| istrides, |
| // TODO(aaroey): get rid of all the const_cast like this. |
| static_cast<float*>(const_cast<void*>(oweights->GetValues())), |
| ostrides); |
| break; |
| } |
| case tensorflow::DataType::DT_HALF: { |
| Reorder2( |
| {k, c}, static_cast<Eigen::half const*>(iweights.GetValues()), |
| istrides, |
| static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())), |
| ostrides); |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " |
| << DataTypeString(iweights.type_); |
| } |
| } |
| |
| void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, |
| TRT_ShapedWeights* oweights, const int num_groups) { |
| CHECK_EQ(iweights.type_, oweights->type_); |
| CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); |
| // K indexes over output channels, C over input channels, and R and S over the |
| // height and width of the convolution |
| const int r = iweights.shape_.d[0]; |
| const int s = iweights.shape_.d[1]; |
| // TRT requires GKcRS, while TF depthwise has RSCK where c=1, C=G |
| const int c = iweights.shape_.d[2] / num_groups; |
| const int k = iweights.shape_.d[3] * num_groups; |
| VLOG(2) << "num_groups: " << num_groups |
| << "c" << iweights.shape_.d[2] << " then " << c |
| << "k" << iweights.shape_.d[3] << " then " << k |
| << "r" << iweights.shape_.d[0] << " then " << r |
| << "s" << iweights.shape_.d[1] << " then " << s; |
| oweights->shape_.d[0] = k / num_groups; |
| oweights->shape_.d[1] = c * num_groups; |
| oweights->shape_.d[2] = r; |
| oweights->shape_.d[3] = s; |
| const nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; |
| const nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; |
| switch (iweights.type_) { |
| case tensorflow::DataType::DT_FLOAT: { |
| Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()), |
| istrides, |
| static_cast<float*>(const_cast<void*>(oweights->GetValues())), |
| ostrides); |
| break; |
| } |
| case tensorflow::DataType::DT_HALF: { |
| Reorder4( |
| {k, c, r, s}, static_cast<Eigen::half const*>(iweights.GetValues()), |
| istrides, |
| static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())), |
| ostrides); |
| break; |
| } |
| |
| default: |
| LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " |
| << DataTypeString(iweights.type_); |
| } |
| } |
| |
| class Converter; |
| |
| using OpConverter = |
| std::function<tensorflow::Status(Converter&, const tensorflow::NodeDef&, |
| const std::vector<TRT_TensorOrWeights>&, |
| std::vector<TRT_TensorOrWeights>*)>; |
| |
| class Converter { |
| public: |
| explicit Converter(nvinfer1::INetworkDefinition* trt_network, |
| TRTWeightStore* ws, bool fp16) |
| : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { |
| this->register_op_converters(); |
| } |
| |
| TRTWeightStore* weight_store() { return weight_store_; } |
| |
| TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, |
| nvinfer1::Dims shape) { |
| TRT_ShapedWeights weights(type, nullptr, shape); |
| // TODO(jie): check weights size_bytes. 0 means type error |
| weight_store_->store_.push_back(std::vector<uint8_t>(weights.size_bytes())); |
| weights.SetValues(weight_store_->store_.back().data()); |
| return weights; |
| } |
| |
| // TODO(aaroey): fix all the namings. |
| bool isFP16() { return fp16_; } |
| |
| TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { |
| return this->get_temp_weights(weights.type_, weights.shape_); |
| } |
| |
| tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) { |
| std::vector<TRT_TensorOrWeights> inputs; |
| TF_RETURN_IF_ERROR(this->get_inputs(node_def, &inputs)); |
| const string& op = node_def.op(); |
| std::vector<TRT_TensorOrWeights> outputs; |
| if (PluginFactoryTensorRT::GetInstance()->IsPlugin(op)) { |
| TF_RETURN_IF_ERROR(plugin_converter_(*this, node_def, inputs, &outputs)); |
| } else { |
| if (!op_registry_.count(op)) { |
| return tensorflow::errors::Unimplemented( |
| "No converter registered for op: " + op); |
| } |
| OpConverter op_converter = op_registry_.at(op); |
| TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs)); |
| } |
| for (size_t i = 0; i < outputs.size(); ++i) { |
| TRT_TensorOrWeights& output = outputs[i]; |
| // TODO(jie): tf protobuf seems to be omitting the :0 suffix |
| string output_name = node_def.name(); |
| if (i != 0) output_name = StrCat(output_name, ":", i); |
| if (output.is_tensor()) { |
| output.tensor()->setName(output_name.c_str()); |
| } |
| VLOG(2) << "Adding out tensor " << output_name << ": " |
| << output.DebugString(); |
| if (!trt_tensors_.insert({output_name, output}).second) { |
| return tensorflow::errors::AlreadyExists( |
| "Output tensor already exists for op: " + op); |
| } |
| } |
| return tensorflow::Status::OK(); |
| } |
| |
| nvinfer1::INetworkDefinition* network() { return trt_network_; } |
| |
| TRT_TensorOrWeights get_tensor(const string& name) { |
| if (!trt_tensors_.count(name)) { |
| return TRT_TensorOrWeights(nullptr); |
| } |
| return trt_tensors_.at(name); |
| } |
| |
| bool insert_input_tensor(const string& name, nvinfer1::ITensor* tensor) { |
| return trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second; |
| } |
| |
| nvinfer1::ITensor* TransposeTensor(nvinfer1::ITensor* input_tensor, |
| const std::vector<int>& order) { |
| const auto dims = input_tensor->getDimensions(); |
| |
| // TODO(jie): change the return to status and properly exit |
| if (order.size() - 1 != size_t(dims.nbDims)) |
| LOG(ERROR) << "Dimension does not match, fail gracefully"; |
| |
| nvinfer1::IShuffleLayer* layer = this->network()->addShuffle(*input_tensor); |
| if (layer == nullptr) { |
| return nullptr; |
| } |
| nvinfer1::Permutation permutation; |
| for (int32_t i = 0; i < dims.nbDims; ++i) { |
| permutation.order[i] = order[i + 1] - 1; |
| } |
| layer->setFirstTranspose(permutation); |
| |
| nvinfer1::Dims reshape_dims; |
| reshape_dims.nbDims = dims.nbDims; |
| for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { |
| reshape_dims.d[i] = 0; |
| reshape_dims.type[i] = dims.type[i]; |
| } |
| layer->setReshapeDimensions(reshape_dims); |
| return layer->getOutput(0); |
| } |
| |
| private: |
| std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_; |
| std::unordered_map<string, OpConverter> op_registry_; |
| OpConverter plugin_converter_; |
| nvinfer1::INetworkDefinition* trt_network_; |
| std::list<std::vector<uint8_t>> temp_bufs_; |
| |
| // TODO(aaroey): inline the definition of TRTWeightStore here, and add APIs to |
| // operate the stored weights instead of operating it directly. |
| TRTWeightStore* weight_store_; |
| |
| bool fp16_; |
| |
| void register_op_converters(); |
| |
| tensorflow::Status get_inputs(const tensorflow::NodeDef& node_def, |
| std::vector<TRT_TensorOrWeights>* inputs) { |
| for (auto const& input_name : node_def.input()) { |
| /************************************************************************* |
| * TODO(jie): handle case 1) here. |
| * Normalizes the inputs and extracts associated metadata: |
| * 1) Inputs can contain a colon followed by a suffix of characters. |
| * That suffix may be a single number (e.g. inputName:1) or several |
| * word characters separated from a number by a colon |
| * (e.g. inputName:foo:1). The |
| * latter case is used to denote inputs and outputs of functions. |
| * 2) Control dependency inputs contain caret at the beginning and we |
| * remove this and annotate the edge as a control dependency. |
| ************************************************************************/ |
| // skip control nodes |
| if (input_name[0] == '^') continue; |
| string name = input_name; |
| auto first = name.find_first_of(':'); |
| // TODO(aaroey): why removing the colon but not the zero? A bug? |
| // TODO(aaroey): use TensorId |
| if (first != string::npos && first + 2 == name.size() && |
| name[first + 1] == '0') { |
| name.erase(first); |
| } |
| |
| if (trt_tensors_.count(name)) { |
| TRT_TensorOrWeights& input = trt_tensors_.at(name); |
| inputs->push_back(input); |
| VLOG(2) << "Retrieved input " << name << ": " << input.DebugString(); |
| } else { |
| // TODO(aaroey): this should not happen, make it a CHECK. |
| // TODO(aaroey): use StrCat for pattern like this. |
| string msg("Node "); |
| StrAppend(&msg, node_def.name(), " should have an input named '", name, |
| "' but it is not available"); |
| LOG(ERROR) << msg; |
| return tensorflow::errors::InvalidArgument(msg); |
| } |
| } |
| return tensorflow::Status::OK(); |
| } |
| }; |
| |
| TRT_ShapedWeights ConvertFP32ToFP16(Converter& ctx, |
| const TRT_ShapedWeights& weights_src) { |
| auto dtype_new = tensorflow::DataType::DT_HALF; |
| TRT_ShapedWeights weights = |
| ctx.get_temp_weights(dtype_new, weights_src.shape_); |
| const float* src = static_cast<const float*>(weights_src.GetValues()); |
| Eigen::half* dst = const_cast<Eigen::half*>( |
| static_cast<Eigen::half const*>(weights.GetValues())); |
| for (int64_t i = 0; i < weights_src.count(); i++) { |
| dst[i] = Eigen::half_impl::float_to_half_rtne(src[i]); |
| } |
| return weights; |
| } |
| |
| // **************************************************************************** |
| // Constant folding functions |
| // TODO(jie): once optimizer kicks in, we should have done constant folding |
| // there. |
| // ***************************************************************************** |
| struct LambdaFactory { |
| enum class OP_CATEGORY : int { RSQRT = 0, NEG, ADD, MUL, SUB, RECIP }; |
| OP_CATEGORY op; |
| |
| template <typename T> |
| std::function<T(T)> unary() { |
| switch (op) { |
| case OP_CATEGORY::RSQRT: { |
| VLOG(2) << "RSQRT GETS DONE"; |
| return [](T t) -> T { return 1.0 / sqrt(t); }; |
| } |
| case OP_CATEGORY::NEG: |
| return [](T t) -> T { return -t; }; |
| case OP_CATEGORY::RECIP: |
| return [](T t) -> T { return 1.0 / t; }; |
| default: |
| VLOG(2) << "Not supported op for unary: " << static_cast<int>(op); |
| return nullptr; |
| } |
| } |
| |
| template <typename T> |
| std::function<T(T, T)> binary() { |
| switch (op) { |
| case OP_CATEGORY::ADD: |
| return [](T l, T r) -> T { return l + r; }; |
| case OP_CATEGORY::SUB: |
| return [](T l, T r) -> T { return l - r; }; |
| case OP_CATEGORY::MUL: |
| return [](T l, T r) -> T { return l * r; }; |
| default: |
| LOG(WARNING) << "Not supported op for binary: " << static_cast<int>(op); |
| } |
| return [](T l, T r) -> T { |
| LOG(FATAL) << "Unsupported op type "; |
| return l; |
| }; |
| } |
| |
| template <typename T> |
| std::function<T(T)> broadcast_r(T val) { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| switch (op) { |
| case OP_CATEGORY::ADD: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return l + val; |
| }; |
| case OP_CATEGORY::SUB: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return l - val; |
| }; |
| case OP_CATEGORY::MUL: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return l * val; |
| }; |
| default: |
| LOG(WARNING) << "Not supported op for binary: " << static_cast<int>(op); |
| } |
| return [val](T l) -> T { |
| LOG(FATAL) << "Unsupported op type "; |
| return l; |
| }; |
| } |
| |
| template <typename T> |
| std::function<T(T)> broadcast_l(T val) { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| switch (op) { |
| case OP_CATEGORY::ADD: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return val + l; |
| }; |
| case OP_CATEGORY::SUB: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return val - l; |
| }; |
| case OP_CATEGORY::MUL: |
| return [val](T l) -> T { |
| VLOG(2) << "LAMBDA VAL : " << val; |
| return val * l; |
| }; |
| default: |
| LOG(ERROR) << "Not supported op for binary: " << static_cast<int>(op); |
| } |
| return [val](T l) -> T { |
| LOG(FATAL) << "Unsupported op type "; |
| return l; |
| }; |
| } |
| }; |
| |
| template <> |
| std::function<Eigen::half(Eigen::half)> LambdaFactory::unary<Eigen::half>() { |
| switch (op) { |
| case OP_CATEGORY::RSQRT: { |
| VLOG(2) << "RSQRT GETS DONE"; |
| return [](Eigen::half t) -> Eigen::half { |
| return Eigen::half(1.0 / sqrt(static_cast<float>(t))); |
| }; |
| } |
| case OP_CATEGORY::NEG: |
| return [](Eigen::half t) -> Eigen::half { return -t; }; |
| // TODO(aaroey): can we support RECIP? |
| default: |
| VLOG(2) << "Not supported op for unary: " << static_cast<int>(op); |
| return nullptr; |
| } |
| } |
| |
| tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, |
| TRT_ShapedWeights* oweights, |
| LambdaFactory unary_op) { |
| CHECK_EQ(iweights.type_, oweights->type_); |
| switch (iweights.type_) { |
| case tensorflow::DataType::DT_FLOAT: { |
| auto inp = static_cast<float const*>(iweights.GetValues()); |
| auto oup = static_cast<float*>(const_cast<void*>(oweights->GetValues())); |
| std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>()); |
| break; |
| } |
| case tensorflow::DataType::DT_HALF: { |
| auto inp = static_cast<Eigen::half const*>(iweights.GetValues()); |
| auto oup = |
| static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())); |
| std::transform(inp, inp + iweights.count(), oup, |
| unary_op.unary<Eigen::half>()); |
| break; |
| } |
| default: |
| return tensorflow::errors::Unimplemented( |
| "Data type not supported: " + |
| tensorflow::DataTypeString(iweights.type_)); |
| } |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, |
| const TRT_ShapedWeights& iweights_r, |
| TRT_ShapedWeights* oweights, |
| LambdaFactory binary_op) { |
| // Assume iweights_l.type == iweight_r.type |
| CHECK_EQ(iweights_l.type_, oweights->type_); |
| CHECK_EQ(iweights_r.type_, oweights->type_); |
| VLOG(2) << "SANITY CHECK!"; |
| |
| switch (iweights_l.type_) { |
| case tensorflow::DataType::DT_FLOAT: { |
| auto inp_l = static_cast<const float*>(iweights_l.GetValues()); |
| auto inp_r = static_cast<const float*>(iweights_r.GetValues()); |
| auto oup = static_cast<float*>(const_cast<void*>(oweights->GetValues())); |
| |
| if (iweights_l.count() != iweights_r.count()) { |
| // We only supports broadcast of RankZero |
| if (iweights_l.count() == 1) { |
| // TODO(aaroey): Remove loggings like this. |
| VLOG(2) << "I bet it is not working!" << (*inp_l); |
| std::transform(inp_r, inp_r + iweights_r.count(), oup, |
| binary_op.broadcast_l<float>(*inp_l)); |
| } else if (iweights_r.count() == 1) { |
| VLOG(2) << "I bet it is not working!" << (*inp_r); |
| std::transform(inp_l, inp_l + iweights_l.count(), oup, |
| binary_op.broadcast_r<float>(*inp_r)); |
| } else { |
| return tensorflow::errors::Unimplemented( |
| "Binary op with non-rankZero broadcast not supported"); |
| } |
| } else { |
| std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, |
| binary_op.binary<float>()); |
| } |
| break; |
| } |
| case tensorflow::DataType::DT_HALF: { |
| auto inp_l = static_cast<const Eigen::half*>(iweights_l.GetValues()); |
| auto inp_r = static_cast<const Eigen::half*>(iweights_r.GetValues()); |
| auto oup = |
| static_cast<Eigen::half*>(const_cast<void*>(oweights->GetValues())); |
| |
| if (iweights_l.count() != iweights_r.count()) { |
| // We only supports broadcast of RankZero |
| if (iweights_l.count() == 1) { |
| VLOG(2) << "I bet it is not working!" << (*inp_l); |
| std::transform(inp_r, inp_r + iweights_r.count(), oup, |
| binary_op.broadcast_l<Eigen::half>(*inp_l)); |
| } else if (iweights_r.count() == 1) { |
| VLOG(2) << "I bet it is not working!" << (*inp_r); |
| std::transform(inp_l, inp_l + iweights_l.count(), oup, |
| binary_op.broadcast_r<Eigen::half>(*inp_r)); |
| } else { |
| return tensorflow::errors::Unimplemented( |
| "Binary op with non-rankZero broadcast not supported"); |
| } |
| } else { |
| std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, |
| binary_op.binary<Eigen::half>()); |
| } |
| break; |
| } |
| default: |
| return tensorflow::errors::Unimplemented( |
| "Data type not supported: " + |
| tensorflow::DataTypeString(iweights_l.type_)); |
| } |
| |
| return tensorflow::Status::OK(); |
| } |
| |
| // TODO(jie): broadcast is needed yet not implemented. |
| // Only implemented channel wise for the time being |
| tensorflow::Status BinaryTensorOpWeight( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const nvinfer1::ITensor* tensor, TRT_ShapedWeights weights, |
| bool swapped_inputs, std::vector<TRT_TensorOrWeights>* outputs) { |
| // tensor is the left operand while weights is the right operand; |
| // when swapped_inputs set to true, those two are swapped. |
| // TODO(aaroey): use a set. |
| if (node_def.op() != "Sub" && node_def.op() != "Add" && |
| node_def.op() != "Mul" && node_def.op() != "Div" && |
| node_def.op() != "RealDiv") { |
| return tensorflow::errors::Unimplemented( |
| "op not supported: " + node_def.op() + ", at: " + node_def.name()); |
| } |
| |
| // Check type consistency |
| nvinfer1::DataType ttype; |
| TF_RETURN_IF_ERROR(ConvertDType(weights.type_, &ttype)); |
| |
| // Check scale mode |
| auto dims_w = weights.shape_; |
| auto dims_t = tensor->getDimensions(); |
| |
| // TODO(jie): addScale checks for input tensor dimension |
| if (dims_t.nbDims != 3) { |
| return tensorflow::errors::InvalidArgument( |
| "addScale requires tensor with rank 3, " + node_def.name()); |
| } |
| |
| // default to element-wise |
| auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; |
| |
| // TODO(jie): maybe use a permutation instead to support more cases; |
| bool permutation_flag = false; |
| |
| if (weights.count() == 1) { |
| VLOG(2) << "UNIFORM"; |
| scale_mode = nvinfer1::ScaleMode::kUNIFORM; |
| } else { |
| // no broadcasting on Batch dimension; |
| VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims |
| << " tensor DIM: " << dims_t.nbDims; |
| if (dims_w.nbDims == dims_t.nbDims + 1) { |
| if (dims_w.d[0] == 1) { |
| for (int i = 1; i < dims_w.nbDims; i++) { |
| dims_w.d[i - 1] = dims_w.d[i]; |
| } |
| dims_w.nbDims--; |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Binary op cannot operate on batch, " + node_def.name()); |
| } |
| } |
| |
| if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { |
| scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; |
| // default is element; |
| for (int i = 1; i < dims_w.nbDims; i++) { |
| if (dims_w.d[i] != dims_t.d[i]) { |
| // if dimension does not match, switch back to channel; |
| VLOG(2) << "channel"; |
| scale_mode = nvinfer1::ScaleMode::kCHANNEL; |
| break; |
| } |
| } |
| // if channel as candidate, validate it |
| if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { |
| for (int i = 1; i < dims_w.nbDims; i++) { |
| if (dims_w.d[i] != 1) |
| return tensorflow::errors::InvalidArgument( |
| "Weight shape not compatible at, " + node_def.name()); |
| } |
| } else { |
| VLOG(2) << "elementwise"; |
| } |
| } else if (dims_w.nbDims == 1 && |
| dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { |
| // channel wise and broadcast required; |
| permutation_flag = true; |
| scale_mode = nvinfer1::ScaleMode::kCHANNEL; |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Weight shape not compatible at, " + node_def.name()); |
| } |
| } |
| |
| // transpose last dimension |
| std::vector<int> permutation(dims_t.nbDims + 1); |
| if (permutation_flag) { |
| if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { |
| // we swap the last dimension into channel for trt. |
| // because of tensorflow default broadcasting rules. |
| for (int i = 0; i < static_cast<int>(permutation.size()); i++) { |
| permutation[i] = i; |
| } |
| permutation[1] = dims_t.nbDims; |
| permutation[dims_t.nbDims] = 1; |
| tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), |
| permutation); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name()); |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Transpose cannot be applied, " + node_def.name()); |
| } |
| } |
| |
| if (ctx.isFP16()) { |
| weights = ConvertFP32ToFP16(ctx, weights); |
| } |
| |
| // prepare weights |
| TRT_ShapedWeights shift_weights(weights.type_); |
| TRT_ShapedWeights scale_weights(weights.type_); |
| TRT_ShapedWeights power_weights(weights.type_); |
| |
| // Maybe I should do a switch |
| if (node_def.op() == "Sub") { |
| if (swapped_inputs) { |
| shift_weights = weights; |
| nvinfer1::IUnaryLayer* layer = |
| ctx.network()->addUnary(*const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::UnaryOperation::kNEG); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| tensor = layer->getOutput(0); |
| } else { |
| TRT_ShapedWeights neg_weights = ctx.get_temp_weights_like(weights); |
| LambdaFactory unary_op; |
| unary_op.op = LambdaFactory::OP_CATEGORY::NEG; |
| TF_RETURN_IF_ERROR(UnaryCompute(weights, &neg_weights, unary_op)); |
| shift_weights = neg_weights; |
| } |
| } else if (node_def.op() == "Div" || node_def.op() == "RealDiv") { |
| if (swapped_inputs) { |
| scale_weights = weights; |
| nvinfer1::IUnaryLayer* layer = |
| ctx.network()->addUnary(*const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::UnaryOperation::kRECIP); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| tensor = layer->getOutput(0); |
| } else { |
| TRT_ShapedWeights recip_weights = ctx.get_temp_weights_like(weights); |
| LambdaFactory unary_op; |
| unary_op.op = LambdaFactory::OP_CATEGORY::RECIP; |
| TF_RETURN_IF_ERROR(UnaryCompute(weights, &recip_weights, unary_op)); |
| scale_weights = recip_weights; |
| } |
| } else if (node_def.op() == "Mul") { |
| scale_weights = weights; |
| } else if (node_def.op() == "Add") { |
| shift_weights = weights; |
| } else { |
| return tensorflow::errors::Unimplemented("Binary op not supported: " + |
| node_def.op()); |
| } |
| |
| nvinfer1::IScaleLayer* layer = ctx.network()->addScale( |
| *const_cast<nvinfer1::ITensor*>(tensor), scale_mode, shift_weights, |
| scale_weights, power_weights); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| // transpose back dimension |
| if (permutation_flag) { |
| output_tensor = ctx.TransposeTensor(output_tensor, permutation); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| |
| // Pass the output |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; |
| |
| tensorflow::Status ConvertConv2DHelper( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs, int group) { |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| |
| TFAttrs attrs(node_def); |
| |
| int h_index = 2; |
| int w_index = 3; |
| auto data_format = attrs.get<string>("data_format"); |
| if (data_format == "NHWC") { |
| tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), |
| {0, 3, 1, 2}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name()); |
| h_index = 1; |
| w_index = 2; |
| // TODO(jie): transpose it |
| } |
| |
| // tensor after transpose (NCHW) |
| const auto tensor_dim = tensor->getDimensions(); |
| |
| int num_groups = group; |
| if (num_groups == 0) num_groups = tensor_dim.d[0]; // depthwise convolution |
| VLOG(2) << "groups count: " << num_groups; |
| |
| TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); |
| VLOG(2) << "weight shape: " << weights_rsck.DebugString(); |
| if (weights_rsck.shape_.nbDims != 4) { |
| return tensorflow::errors::Internal( |
| "Conv2D expects kernel of dimension 4, at: " + node_def.name()); |
| } |
| if (ctx.isFP16()) { |
| weights_rsck = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); |
| } |
| |
| TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); |
| ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); |
| TRT_ShapedWeights biases(weights.type_); |
| const int noutput = weights.shape_.d[0] * num_groups; |
| nvinfer1::DimsHW kernel_size; |
| kernel_size.h() = weights.shape_.d[2]; |
| kernel_size.w() = weights.shape_.d[3]; |
| VLOG(2) << "RSCK: " << weights.DebugString(); |
| VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); |
| |
| // TODO(jie): stride. (NHWC/NCHW) |
| const auto tf_stride = attrs.get<std::vector<int>>("strides"); |
| VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; |
| VLOG(2) << "stride: " << tf_stride[0] << tf_stride[1] << tf_stride[2] |
| << tf_stride[3]; |
| const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); |
| |
| std::vector<std::pair<int, int>> padding; |
| // TODO(jie): padding. |
| if (attrs.get<string>("padding") == "SAME") { |
| // This is NCHW tensor with no batch dimension. |
| // 1 -> h |
| // 2 -> w |
| padding = CreateSamePadding( |
| stride, kernel_size, |
| {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])}); |
| } else { |
| padding = {{0, 0}, {0, 0}}; |
| } |
| |
| if (padding[0].first != padding[0].second || |
| padding[1].first != padding[1].second) { |
| // TODO(jie): handle asymmetric padding |
| VLOG(2) << "Padding!!!: " << padding[0].first << padding[0].second |
| << padding[1].first << padding[1].second; |
| VLOG(2) << "TENSOR before: " << DebugString(tensor->getDimensions()); |
| auto pad_layer = ctx.network()->addPadding( |
| *const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::DimsHW(padding[0].first, padding[1].first), |
| nvinfer1::DimsHW(padding[0].second, padding[1].second)); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, node_def.name()); |
| padding = {{0, 0}, {0, 0}}; |
| tensor = pad_layer->getOutput(0); |
| VLOG(2) << "TENSOR after: " << DebugString(tensor->getDimensions()); |
| } |
| |
| nvinfer1::IConvolutionLayer* layer = |
| ctx.network()->addConvolution(*const_cast<nvinfer1::ITensor*>(tensor), |
| noutput, kernel_size, weights, biases); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| layer->setStride(stride); |
| layer->setPadding({padding[0].first, padding[1].first}); |
| layer->setName(node_def.name().c_str()); |
| layer->setNbGroups(num_groups); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| VLOG(2) << "TENSOR out: " << DebugString(output_tensor->getDimensions()); |
| VLOG(2) << "data_format: " << data_format; |
| if (data_format == "NHWC") { |
| // TODO(jie): transpose it back! |
| output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertConv2DHelper( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs, ConvolutionType type) { |
| switch (type) { |
| case ConvolutionType::DEFAULT: |
| return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); |
| case ConvolutionType::DEPTHWISE_CONV: |
| return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); |
| } |
| return tensorflow::errors::Unimplemented("unsupported convolution type at, " + |
| node_def.name()); |
| } |
| |
| // Helper function converts input into tensor with shape specified by dims. |
| bool PrepareTensorForShape(Converter& ctx, const TRT_TensorOrWeights& input, |
| const nvinfer1::Dims& dims, |
| const nvinfer1::ITensor** tensor) { |
| if (input.is_tensor()) { |
| if (DimsEqual(input.shape(), dims)) { |
| *tensor = input.tensor(); |
| } else { |
| nvinfer1::IShuffleLayer* layer = ctx.network()->addShuffle( |
| *const_cast<nvinfer1::ITensor*>(input.tensor())); |
| if (layer != nullptr) { |
| layer->setReshapeDimensions(dims); |
| *tensor = layer->getOutput(0); |
| } else { |
| return false; |
| } |
| } |
| } else { |
| #if NV_TENSORRT_MAJOR > 3 |
| nvinfer1::IConstantLayer* layer = |
| ctx.network()->addConstant(dims, input.weights()); |
| if (layer != nullptr) { |
| *tensor = layer->getOutput(0); |
| } else { |
| return false; |
| } |
| #else |
| return false; |
| #endif |
| } |
| return true; |
| } |
| |
| tensorflow::Status BinaryTensorOpTensor( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const TRT_TensorOrWeights& operand_l, const TRT_TensorOrWeights& operand_r, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| static const std::unordered_map<string, nvinfer1::ElementWiseOperation> ops{ |
| {"Add", nvinfer1::ElementWiseOperation::kSUM}, |
| {"Mul", nvinfer1::ElementWiseOperation::kPROD}, |
| {"Sub", nvinfer1::ElementWiseOperation::kSUB}, |
| {"Div", nvinfer1::ElementWiseOperation::kDIV}, |
| {"RealDiv", nvinfer1::ElementWiseOperation::kDIV}, |
| {"Minimum", nvinfer1::ElementWiseOperation::kMIN}, |
| {"Maximum", nvinfer1::ElementWiseOperation::kMAX}, |
| }; |
| |
| const nvinfer1::ITensor* tensor_l; |
| const nvinfer1::ITensor* tensor_r; |
| |
| nvinfer1::Dims dim_l; |
| nvinfer1::Dims dim_r; |
| |
| if (!TensorRTGetBroadcastShape(operand_l.shape(), operand_l.is_tensor(), |
| operand_r.shape(), operand_r.is_tensor(), |
| &dim_l, &dim_r)) { |
| return tensorflow::errors::InvalidArgument( |
| "Binary op broadcast scheme not supported by TensorRT op: " + |
| node_def.op() + ", at: " + node_def.name()); |
| } |
| |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, operand_l, dim_l, &tensor_l), node_def.name()); |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, operand_r, dim_r, &tensor_r), node_def.name()); |
| |
| // get trt type & shape |
| TFAttrs attrs(node_def); |
| // maybe this part has to be moved into the block of rsqrt later |
| nvinfer1::DataType dtype = attrs.get<nvinfer1::DataType>("T"); |
| |
| // check type consistency |
| TFTRT_CHECK_EQ_TYPE(tensor_l->getType(), dtype); |
| TFTRT_CHECK_EQ_TYPE(tensor_r->getType(), dtype); |
| auto op_pair = ops.find(node_def.op()); |
| if (op_pair == ops.end()) { |
| return tensorflow::errors::Unimplemented( |
| "binary op: ", node_def.op(), " not supported at: ", node_def.name()); |
| } |
| |
| nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( |
| // TODO(aaroey): will tensor_l/tensor_r get modified? |
| *const_cast<nvinfer1::ITensor*>(tensor_l), |
| *const_cast<nvinfer1::ITensor*>(tensor_r), op_pair->second); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| |
| // pass the output |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertPlugin(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| // prepare input |
| std::vector<nvinfer1::ITensor*> all_inputs; |
| for (auto input : inputs) { |
| all_inputs.emplace_back(const_cast<nvinfer1::ITensor*>(input.tensor())); |
| } |
| |
| // plugin is owned by PluginFactory |
| // TODO(jie): destroy plugins later (resource management) |
| PluginTensorRT* plugin = |
| PluginFactoryTensorRT::GetInstance()->CreatePlugin(node_def.op()); |
| |
| // passing attributes |
| // TODO(jie): support more general attribute |
| TFAttrs attrs(node_def); |
| auto attr_key_vector = attrs.GetAllAttrKeys(); |
| for (auto attr_key : attr_key_vector) { |
| // TODO(jie): support only list of float for toy example here. |
| auto data = attrs.get<std::vector<float>>(attr_key); |
| size_t size_data = data.size() * sizeof(float); |
| if (!plugin->SetAttribute(attr_key, static_cast<void*>(data.data()), |
| size_data)) { |
| return tensorflow::errors::InvalidArgument("plugin SetAttribute failed"); |
| } |
| } |
| |
| nvinfer1::IPluginLayer* layer = ctx.network()->addPlugin( |
| &all_inputs[0], static_cast<int>(inputs.size()), *plugin); |
| |
| for (int i = 0; i < layer->getNbOutputs(); i++) { |
| nvinfer1::ITensor* output_tensor = layer->getOutput(i); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| } |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertConv2D(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| return ConvertConv2DHelper(ctx, node_def, inputs, outputs, |
| ConvolutionType::DEFAULT); |
| } |
| |
| tensorflow::Status ConvertConv2DDepthwise( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| return ConvertConv2DHelper(ctx, node_def, inputs, outputs, |
| ConvolutionType::DEPTHWISE_CONV); |
| } |
| |
| tensorflow::Status ConvertPool(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| TFAttrs attrs(node_def); |
| |
| int h_index = 2; |
| int w_index = 3; |
| const auto data_format = attrs.get<string>("data_format"); |
| if (data_format == "NHWC") { |
| h_index = 1; |
| w_index = 2; |
| tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), |
| {0, 3, 1, 2}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name()); |
| } |
| |
| nvinfer1::PoolingType type; |
| if (node_def.op() == "MaxPool") { |
| type = nvinfer1::PoolingType::kMAX; |
| } else if (node_def.op() == "AvgPool") { |
| type = nvinfer1::PoolingType::kAVERAGE; |
| } else { |
| return tensorflow::errors::Unimplemented("Unsupported pool type: ", |
| node_def.op()); |
| } |
| |
| const auto tf_stride = attrs.get<std::vector<int>>("strides"); |
| const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); |
| |
| const auto tf_kernel = attrs.get<std::vector<int>>("ksize"); |
| const nvinfer1::DimsHW ksize(tf_kernel[h_index], tf_kernel[w_index]); |
| |
| auto tensor_dim = tensor->getDimensions(); |
| std::vector<std::pair<int, int>> padding; |
| const string padding_type = attrs.get<string>("padding"); |
| if (padding_type == "SAME") { |
| // This is NCHW tensor with no batch dimension. |
| // 1 -> h |
| // 2 -> w |
| padding = CreateSamePadding( |
| stride, ksize, |
| {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])}); |
| } else if (padding_type == "VALID") { |
| padding = {{0, 0}, {0, 0}}; |
| } else { |
| return tensorflow::errors::Unimplemented("Unsupported padding type: ", |
| padding_type); |
| } |
| |
| if (padding[0].first != padding[0].second || |
| padding[1].first != padding[1].second) { |
| VLOG(2) << "Padding!!!: " << padding[0].first << padding[0].second |
| << padding[1].first << padding[1].second; |
| auto pad_layer = ctx.network()->addPadding( |
| *const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::DimsHW(padding[0].first, padding[1].first), |
| nvinfer1::DimsHW(padding[0].second, padding[1].second)); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(pad_layer, node_def.name()); |
| padding = {{0, 0}, {0, 0}}; |
| tensor = pad_layer->getOutput(0); |
| } |
| |
| nvinfer1::IPoolingLayer* layer = ctx.network()->addPooling( |
| *const_cast<nvinfer1::ITensor*>(tensor), type, ksize); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| layer->setStride(stride); |
| layer->setPadding({padding[0].first, padding[1].first}); |
| layer->setName(node_def.name().c_str()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| |
| if (data_format == "NHWC") { |
| output_tensor = ctx.TransposeTensor(output_tensor, {0, 2, 3, 1}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertActivation( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( |
| *const_cast<nvinfer1::ITensor*>(tensor), nvinfer1::ActivationType::kRELU); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertScale(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| if (inputs.size() != 2 || !inputs.at(0).is_tensor() || |
| !inputs.at(1).is_weights()) { |
| return tensorflow::errors::Unimplemented( |
| "ConvertScale only supports tensor<op>weight: ", node_def.name()); |
| } |
| |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| TRT_ShapedWeights weights = inputs.at(1).weights(); |
| if (ctx.isFP16()) { |
| weights = ConvertFP32ToFP16(ctx, inputs.at(1).weights()); |
| } |
| |
| TRT_ShapedWeights empty_weights(weights.type_); |
| TFAttrs attrs(node_def); |
| |
| const auto data_format = attrs.get<string>("data_format"); |
| int channel_index; |
| const auto dims = tensor->getDimensions(); |
| if (data_format == "NHWC") { |
| // 1). NHWC is really N+C |
| channel_index = dims.nbDims - 1; // batch dimension is implicit here! |
| } else { |
| // 2). NCHW is really N+CHW |
| channel_index = dims.nbDims - 3; // batch dimension is implicit here! |
| } |
| |
| nvinfer1::Permutation permutation; |
| for (int32_t i = 0; i < dims.nbDims; ++i) { |
| permutation.order[i] = i; |
| } |
| |
| if (channel_index >= 0) { |
| permutation.order[0] = channel_index; |
| permutation.order[channel_index] = 0; |
| } else { |
| return tensorflow::errors::Unimplemented( |
| "TFTRT::BiasAdd cannot apply on batch dimension, at ", node_def.name()); |
| } |
| |
| // TensorRT addScale requires input to be of rank 3, we need to apply |
| // transpose as well as reshape |
| if (channel_index != 0 || dims.nbDims != 3) { |
| nvinfer1::IShuffleLayer* shuffle_layer = |
| ctx.network()->addShuffle(*const_cast<nvinfer1::ITensor*>(tensor)); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name()); |
| nvinfer1::Dims reshape_dims; |
| reshape_dims.nbDims = 3; |
| reshape_dims.d[0] = 0; // 0 copy from the input |
| reshape_dims.d[1] = dims.nbDims >= 2 ? 0 : 1; // 0 copy from the input |
| reshape_dims.d[2] = dims.nbDims >= 3 ? -1 : 1; // -1 infer from the rest |
| if (channel_index != 0) { |
| // maybe we do not need this check. concerned about TRT optimization |
| shuffle_layer->setFirstTranspose(permutation); |
| } |
| shuffle_layer->setReshapeDimensions(reshape_dims); |
| tensor = shuffle_layer->getOutput(0); |
| } |
| |
| nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; |
| if (weights.shape_.d[0] == 1) { |
| mode = nvinfer1::ScaleMode::kUNIFORM; |
| } |
| |
| nvinfer1::IScaleLayer* layer = |
| ctx.network()->addScale(*const_cast<nvinfer1::ITensor*>(tensor), mode, |
| weights, empty_weights, empty_weights); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| |
| // restore transpose & reshape |
| if (channel_index != 0 || dims.nbDims != 3) { |
| nvinfer1::IShuffleLayer* shuffle_layer = ctx.network()->addShuffle( |
| *const_cast<nvinfer1::ITensor*>(output_tensor)); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(shuffle_layer, node_def.name()); |
| nvinfer1::Dims reshape_dims = dims; |
| int tmp = reshape_dims.d[channel_index]; |
| reshape_dims.d[channel_index] = reshape_dims.d[0]; |
| reshape_dims.d[0] = tmp; |
| shuffle_layer->setReshapeDimensions(reshape_dims); |
| if (channel_index != 0) { |
| shuffle_layer->setSecondTranspose(permutation); |
| } |
| output_tensor = shuffle_layer->getOutput(0); |
| } |
| |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertConst(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| const auto& weights_tensor = node_def.attr().at("value").tensor(); |
| |
| // Get trt type & shape |
| TFAttrs attrs(node_def); |
| const tensorflow::DataType dtype = attrs.get<tensorflow::DataType>("dtype"); |
| |
| // Create shaped weights as output |
| tensorflow::Tensor tensor; |
| if (!tensor.FromProto(weights_tensor)) { |
| return tensorflow::errors::Internal("Cannot parse weight tensor proto: ", |
| node_def.name()); |
| } |
| |
| TRT_ShapedWeights weights(dtype); |
| // TODO(aaroey): we should choose the array using dtype and shape. |
| if (!weights_tensor.float_val().empty()) { |
| VLOG(2) << "SCALAR!!!" << node_def.name(); |
| nvinfer1::Dims scalar_shape; |
| if (tensor.dims() > 0) { |
| VLOG(2) << "dimensions: " << tensor.dims(); |
| VLOG(2) << "size: " << weights_tensor.float_val_size(); |
| scalar_shape = GetTensorShape(tensor); |
| VLOG(2) << "details: "; |
| for (int i = 0; i < scalar_shape.nbDims; i++) |
| VLOG(2) << scalar_shape.d[i]; |
| if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size() && |
| weights_tensor.float_val_size() != 1) { |
| LOG(ERROR) << "Broadcast on weights only supports kCHANNEL and" |
| << " kUNIFORM, at: " << node_def.name(); |
| string err_str("Broadcast method is not supported for '"); |
| StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); |
| return tensorflow::errors::InvalidArgument(err_str); |
| } |
| } else { |
| VLOG(2) << "Dimensions: " << tensor.dims(); |
| scalar_shape.nbDims = 1; |
| // no dimension provided. flatten it |
| scalar_shape.d[0] = weights_tensor.float_val_size(); |
| scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; |
| for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { |
| scalar_shape.d[i] = 0; |
| } |
| } |
| // TODO(aaroey): use GetShapeSize(). |
| size_t len_data = tensorflow::DataTypeSize(dtype); |
| for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; |
| ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); |
| void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); |
| if (weights_tensor.float_val_size() == 1) { |
| std::fill_n((float*)dst, GetShapeSize(scalar_shape), |
| *weights_tensor.float_val().begin()); |
| } else { |
| // TODO(aaroey): get rid of this copy as RepeatedField is always |
| // contiguous make a local copy first to flatten doesn't have to be |
| // contiguous |
| std::vector<float> tensor_data(weights_tensor.float_val().begin(), |
| weights_tensor.float_val().end()); |
| memcpy(dst, tensor_data.data(), len_data); // store into weight store |
| } |
| VLOG(2) << "create shape details: "; |
| for (int i = 0; i < scalar_shape.nbDims; i++) VLOG(2) << scalar_shape.d[i]; |
| weights = TRT_ShapedWeights(dtype, dst, scalar_shape); |
| } else if (!weights_tensor.int_val().empty()) { |
| // TODO(aaroey): this is very similar to the above code for float, merge |
| // them. |
| VLOG(2) << "int!!!" << node_def.name(); |
| nvinfer1::Dims scalar_shape; |
| if (tensor.dims() > 0) { |
| VLOG(2) << "dimensions: " << tensor.dims(); |
| scalar_shape = GetTensorShape(tensor); |
| if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size() && |
| weights_tensor.int_val_size() != 1) { |
| LOG(WARNING) << "Broadcast on weights only supports kCHANNEL and" |
| << " kUNIFORM, at: " << node_def.name(); |
| string err_str("Broadcast method is not supported for '"); |
| StrAppend(&err_str, node_def.name(), "' of type ", node_def.op()); |
| return tensorflow::errors::InvalidArgument(err_str); |
| } |
| } else { |
| VLOG(2) << "dimensions: " << tensor.dims(); |
| scalar_shape.nbDims = 1; |
| // no dimension provided. flatten it |
| scalar_shape.d[0] = weights_tensor.int_val_size(); |
| scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; |
| for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { |
| scalar_shape.d[i] = 0; |
| scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; |
| } |
| } |
| // we should not have converted |
| size_t len_data = tensorflow::DataTypeSize(dtype); |
| for (int i = 0; i < scalar_shape.nbDims; i++) len_data *= scalar_shape.d[i]; |
| size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); |
| len_data = std::max(len_data, len_tensor); |
| ctx.weight_store()->store_.push_back(std::vector<uint8_t>(len_data)); |
| void* dst = static_cast<void*>(&(ctx.weight_store()->store_.back()[0])); |
| if (weights_tensor.int_val_size() == 1) { |
| std::fill_n((int*)dst, GetShapeSize(scalar_shape), |
| *weights_tensor.int_val().begin()); |
| } else { |
| // TODO(aaroey): get rid of this copy as RepeatedField is always |
| // contiguous make a local copy first to flatten doesn't have to be |
| // contiguous |
| std::vector<int32> tensor_data(weights_tensor.int_val().begin(), |
| weights_tensor.int_val().end()); |
| memcpy(dst, tensor_data.data(), len_tensor); // store into weight store |
| } |
| weights = TRT_ShapedWeights(dtype, dst, scalar_shape); |
| } else if (!weights_tensor.tensor_content().empty()) { |
| // obsolete method. |
| // After optimization path, we do not see weights in this format. |
| // TODO(aaroey): why? |
| // fp16 conversion technically should be needed here. |
| VLOG(2) << "TENSOR!!!" << node_def.name(); |
| const auto& content = weights_tensor.tensor_content(); |
| |
| weights = ctx.get_temp_weights(dtype, GetTensorShape(tensor)); |
| if (content.size() > 0) { |
| const int dtype_size = tensorflow::DataTypeSize(dtype); |
| CHECK_EQ(0, content.size() % dtype_size) |
| << "Tensor content size (" << content.size() |
| << ") is not a multiple of " << dtype_size; |
| port::CopyToArray( |
| content, static_cast<char*>(const_cast<void*>(weights.GetValues()))); |
| } |
| } else { |
| return tensorflow::errors::Unimplemented("Not supported constant type, at ", |
| node_def.name()); |
| } |
| // Pass the output |
| outputs->push_back(TRT_TensorOrWeights(weights)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertIdentity( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| outputs->push_back(inputs.at(0)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertBinary(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| if (inputs.size() != 2) { |
| return tensorflow::errors::FailedPrecondition( |
| "Binary ops require two tensor input, at ", node_def.name()); |
| } |
| |
| // Constant folding should have been done by TensorFlow |
| |
| if (inputs.at(0).is_weights() && inputs.at(1).is_weights()) { |
| return tensorflow::errors::Unimplemented( |
| "Constant folding is falled back to TensorFlow, binary op received " |
| "both input as constant at: ", |
| node_def.name()); |
| } |
| |
| // Try to convert into Scale layer first (for better performance) |
| // Since scale layer supports restricted broadcast policy and op types, we |
| // allow failure and try to handle it through Elementwise op |
| // (BinaryTensorOpTensor) |
| Status status = tensorflow::Status::OK(); |
| if (inputs.at(0).is_tensor() && inputs.at(1).is_weights()) { |
| status = BinaryTensorOpWeight(ctx, node_def, inputs.at(0).tensor(), |
| inputs.at(1).weights(), false, outputs); |
| } else if (inputs.at(0).is_weights() && inputs.at(1).is_tensor()) { |
| status = BinaryTensorOpWeight(ctx, node_def, inputs.at(1).tensor(), |
| inputs.at(0).weights(), true, outputs); |
| #if NV_TENSORRT_MAJOR == 3 |
| } else { |
| #else |
| } |
| if ((inputs.at(0).is_tensor() && inputs.at(1).is_tensor()) || !status.ok()) { |
| #endif |
| status = BinaryTensorOpTensor(ctx, node_def, inputs.at(0), inputs.at(1), |
| outputs); |
| } |
| return status; |
| } |
| |
| tensorflow::Status ConvertUnary(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| static const std::unordered_map<string, nvinfer1::UnaryOperation> ops{ |
| {"Neg", nvinfer1::UnaryOperation::kNEG}, |
| {"Exp", nvinfer1::UnaryOperation::kEXP}, |
| {"Log", nvinfer1::UnaryOperation::kLOG}, |
| {"Sqrt", nvinfer1::UnaryOperation::kSQRT}, |
| {"Abs", nvinfer1::UnaryOperation::kABS}, |
| {"Reciprocal", nvinfer1::UnaryOperation::kRECIP}, |
| }; |
| |
| if (inputs.size() != 1) { |
| return tensorflow::errors::FailedPrecondition( |
| "Unary ops require single tensor input, at ", node_def.name()); |
| } |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| if (inputs.at(0).is_weights()) { |
| return tensorflow::errors::Unimplemented( |
| "Constant folding for unary op is not supported", node_def.name()); |
| } |
| #endif |
| |
| // TODO(jie): check type |
| const nvinfer1::ITensor* tensor; |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, inputs.at(0), inputs.at(0).shape(), &tensor), |
| node_def.name()); |
| |
| nvinfer1::IUnaryLayer* layer; |
| if (node_def.op() == "Rsqrt") { |
| layer = ctx.network()->addUnary(*const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::UnaryOperation::kSQRT); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| tensor = layer->getOutput(0); |
| layer = ctx.network()->addUnary(*const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::UnaryOperation::kRECIP); |
| } else if (ops.count(node_def.op()) != 0) { |
| layer = ctx.network()->addUnary(*const_cast<nvinfer1::ITensor*>(tensor), |
| ops.at(node_def.op())); |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Binary op: ", node_def.op(), " not supported, at ", node_def.name()); |
| } |
| |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| tensorflow::Status ConvertReducePool( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| if (inputs.size() != 2 || !inputs.at(0).is_tensor() || |
| !inputs.at(1).is_weights()) { |
| return tensorflow::errors::InvalidArgument( |
| "Input expects tensor and weights, at", node_def.name()); |
| } |
| |
| // Implement tensor binaryOp weight [channel wise] for now; |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| const auto dims = tensor->getDimensions(); |
| // Restore implicit batch dimension |
| const int nb_dims = dims.nbDims + 1; |
| |
| TRT_ShapedWeights index_list = inputs.at(1).weights(); |
| TFAttrs attrs(node_def); |
| auto index_type = attrs.get<tensorflow::DataType>("Tidx"); |
| |
| // Only expect to handle INT32 as attributes for now |
| if (index_type != tensorflow::DataType::DT_INT32) { |
| return tensorflow::errors::Unimplemented("Tidx supports only DT_INT32"); |
| } |
| const auto index_list_data = |
| static_cast<int*>(const_cast<void*>(index_list.GetValues())); |
| |
| if (nb_dims != 4) { |
| return tensorflow::errors::InvalidArgument( |
| "TRT only support reduce on 4 dimensional tensors, at", |
| node_def.name()); |
| } |
| if (index_list.count() > 2) { |
| return tensorflow::errors::InvalidArgument( |
| "TRT cannot support reduce on more than 2 dimensions, at", |
| node_def.name()); |
| } |
| |
| std::set<int> idx_set; |
| // We cannot operate on Channel. permutation flag used to transpose tensor |
| int permuted_index = -1; |
| for (int i = 0; i < index_list.count(); i++) { |
| if (index_list_data[i] == 0) { |
| return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at", |
| node_def.name()); |
| } |
| if (index_list_data[i] == 1) permuted_index = 1; |
| idx_set.emplace(index_list_data[i]); |
| } |
| |
| std::vector<int> permutation_order(nb_dims); |
| nvinfer1::DimsHW pool_kernel; |
| if (permuted_index == 1) { |
| for (int i = 2; i < nb_dims; i++) { |
| if (idx_set.count(i) == 0) { |
| permuted_index = i; |
| break; |
| } |
| } |
| for (int i = 0; i < nb_dims; i++) permutation_order[i] = i; |
| |
| permutation_order[permuted_index] = 1; |
| permutation_order[1] = permuted_index; |
| |
| // Apply permutation before extracting dimension for pool_kernel |
| tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), |
| permutation_order); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name()); |
| } |
| |
| // Apply permutation before extracting dimension for pool_kernel |
| pool_kernel.d[0] = (idx_set.count(2) || permuted_index == 2) ? dims.d[1] : 1; |
| pool_kernel.d[1] = (idx_set.count(3) || permuted_index == 3) ? dims.d[2] : 1; |
| |
| nvinfer1::ITensor* output_tensor; |
| |
| if (node_def.op() == "Mean") { |
| nvinfer1::IPoolingLayer* layer = |
| ctx.network()->addPooling(*const_cast<nvinfer1::ITensor*>(tensor), |
| nvinfer1::PoolingType::kAVERAGE, pool_kernel); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| output_tensor = layer->getOutput(0); |
| } else { |
| return tensorflow::errors::Unimplemented("Op not supported ", node_def.op(), |
| " , at ", node_def.name()); |
| } |
| if (permuted_index != -1) { |
| // Apply permutation before extracting dimension for pool_kernel |
| output_tensor = ctx.TransposeTensor( |
| const_cast<nvinfer1::ITensor*>(output_tensor), permutation_order); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| #elif NV_TENSORRT_MAJOR > 3 |
| tensorflow::Status ConvertReduce(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| if (inputs.size() != 2 || !inputs.at(0).is_tensor() || |
| !inputs.at(1).is_weights()) { |
| return tensorflow::errors::InvalidArgument( |
| "Input expects tensor and weights, at", node_def.name()); |
| } |
| |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| TRT_ShapedWeights index_list = inputs.at(1).weights(); |
| |
| TFAttrs attrs(node_def); |
| auto index_type = attrs.get<tensorflow::DataType>("Tidx"); |
| |
| // Only expect to handle INT32 as attributes for now |
| if (index_type != tensorflow::DataType::DT_INT32) { |
| return tensorflow::errors::Unimplemented("Tidx supports only DT_INT32"); |
| } |
| |
| int axes = 0; |
| if (index_list.count() == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "TRT cannot support reduce on all (batch) dimensions, at", |
| node_def.name()); |
| } else { |
| auto index_list_data = |
| static_cast<int*>(const_cast<void*>(index_list.GetValues())); |
| for (int i = 0; i < index_list.count(); i++) { |
| int axis = index_list_data[i]; |
| if (axis < 0) axis += tensor->getDimensions().nbDims + 1; |
| if (axis == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "TRT cannot reduce at batch dimension, at", node_def.name()); |
| } |
| axes |= (1 << (axis - 1)); |
| } |
| } |
| |
| nvinfer1::ReduceOperation reduce_operation; |
| if (node_def.op() == "Sum") { |
| reduce_operation = nvinfer1::ReduceOperation::kSUM; |
| } else if (node_def.op() == "Prod") { |
| reduce_operation = nvinfer1::ReduceOperation::kPROD; |
| } else if (node_def.op() == "Max") { |
| reduce_operation = nvinfer1::ReduceOperation::kMAX; |
| } else if (node_def.op() == "Min") { |
| reduce_operation = nvinfer1::ReduceOperation::kMIN; |
| } else if (node_def.op() == "Mean") { |
| reduce_operation = nvinfer1::ReduceOperation::kAVG; |
| } else { |
| return tensorflow::errors::Unimplemented("Op not supported ", node_def.op(), |
| " , at ", node_def.name()); |
| } |
| |
| const auto keep_dims = attrs.get<bool>("keep_dims"); |
| nvinfer1::ILayer* layer = |
| ctx.network()->addReduce(*const_cast<nvinfer1::ITensor*>(tensor), |
| reduce_operation, axes, keep_dims); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| outputs->push_back(TRT_TensorOrWeights(layer->getOutput(0))); |
| return tensorflow::Status::OK(); |
| } |
| #endif |
| |
| tensorflow::Status ConvertPad(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| // TODO(aaroey): make a routine for this check and reuse it. |
| if (inputs.size() != 2 || !inputs.at(0).is_tensor() || |
| !inputs.at(1).is_weights()) { |
| return tensorflow::errors::InvalidArgument( |
| "Input expects tensor and weights, at", node_def.name()); |
| } |
| |
| // Implement tensor binaryOp weight [channel wise] for now; |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| const auto dims = tensor->getDimensions(); |
| // Restore implicit batch dimension |
| const int nb_dims = dims.nbDims + 1; |
| |
| TRT_ShapedWeights pads = inputs.at(1).weights(); |
| |
| TFAttrs attrs(node_def); |
| // Padding type here is done through TF type |
| // so I can leverage their EnumToDataType for my cast |
| auto padding_type = attrs.get<tensorflow::DataType>("Tpaddings"); |
| // TODO(jie): handle data type conversion for TRT? |
| |
| if (pads.shape_.d[0] != nb_dims || pads.shape_.d[1] != 2) { |
| return tensorflow::errors::InvalidArgument( |
| "Pad only supports explicit padding on 4 dimensional tensor, at ", |
| node_def.name()); |
| } |
| |
| // Only expect to handle INT32 as attributes for now |
| if (padding_type != tensorflow::DataType::DT_INT32) { |
| return tensorflow::errors::Unimplemented( |
| "Tpaddings supports only DT_INT32"); |
| } |
| auto pad_data = static_cast<int*>(const_cast<void*>(pads.GetValues())); |
| |
| std::vector<int32_t> pad_index; |
| for (int i = 0; i < nb_dims; i++) { |
| if (pad_data[2 * i] != 0 || pad_data[2 * i + 1] != 0) { |
| pad_index.push_back(i); |
| } |
| } |
| |
| // No padding at all, we should exit |
| if (pad_index.size() == 0) { |
| outputs->push_back(inputs.at(0)); |
| return tensorflow::Status::OK(); |
| } |
| |
| // Only supports padding on less than 2 axis GIE-2579 |
| if (pad_index.size() > 2) { |
| return tensorflow::errors::InvalidArgument( |
| "Padding layer does not support padding on > 2"); |
| } |
| |
| // Padding on batch dimension is not supported |
| if (pad_index[0] == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "Padding layer does not support padding on batch dimension"); |
| } |
| |
| // Not doing the legit thing here. ignoring padding on dim 1 and 3; |
| // TODO(jie): implement pad as uff parser |
| if (pad_index.size() == 2 && pad_index[0] == 0 && pad_index[1] == 3) { |
| return tensorflow::errors::Unimplemented( |
| "Padding layer does not support padding on dimension 1 and 3 yet"); |
| } |
| |
| bool legit_pad = true; |
| nvinfer1::DimsHW pre_padding(0, 0); |
| nvinfer1::DimsHW post_padding(0, 0); |
| |
| std::vector<int32_t> permuted_pad_index(pad_index); |
| if (pad_index[0] == 1) { |
| legit_pad = false; |
| tensor = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor), |
| {0, 3, 2, 1}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor, node_def.name()); |
| permuted_pad_index[0] = 3; |
| } |
| |
| for (size_t i = 0; i < pad_index.size(); i++) { |
| int index = pad_index[i]; |
| if (permuted_pad_index[i] == 2) { |
| pre_padding.h() = pad_data[index * 2]; |
| post_padding.h() = pad_data[index * 2 + 1]; |
| } else if (permuted_pad_index[i] == 3) { |
| pre_padding.w() = pad_data[index * 2]; |
| post_padding.w() = pad_data[index * 2 + 1]; |
| } |
| } |
| |
| nvinfer1::IPaddingLayer* layer = ctx.network()->addPadding( |
| *const_cast<nvinfer1::ITensor*>(tensor), pre_padding, post_padding); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| |
| if (!legit_pad) { |
| output_tensor = ctx.TransposeTensor( |
| const_cast<nvinfer1::ITensor*>(output_tensor), {0, 3, 2, 1}); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertConcat(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| // not including the last input (axis) here |
| int input_size = static_cast<int>(inputs.size()) - 1; |
| |
| if (!inputs.at(0).is_tensor()) { |
| return tensorflow::errors::InvalidArgument( |
| "Concat in TRT support only Tensor input, at ", node_def.name()); |
| } |
| |
| // We are retrieving the axis |
| TRT_ShapedWeights axis = inputs.at(input_size).weights(); |
| |
| TFAttrs attrs(node_def); |
| auto index_type = attrs.get<tensorflow::DataType>("Tidx"); |
| |
| // TODO(jie): handle data type |
| // Only expect to handle INT32 as index attributes for now |
| if (index_type != tensorflow::DataType::DT_INT32) |
| return tensorflow::errors::Unimplemented("Tidx supports only DT_INT32, at ", |
| node_def.name()); |
| |
| int index = *(static_cast<int*>(const_cast<void*>(axis.GetValues()))); |
| |
| // TODO(jie): early termination with no-op (attr_size==1) |
| |
| auto dim = inputs.at(0).tensor()->getDimensions(); |
| // dimension check |
| if (index > dim.nbDims + 1) { |
| return tensorflow::errors::InvalidArgument( |
| "Concatenate on axis out of dimension range, at ", node_def.name()); |
| } |
| if (index == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "Concatenate on batch dimension not supported, at ", node_def.name()); |
| } |
| if (index < 0) { |
| index = dim.nbDims + index + 1; |
| } |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| // incase we need permutation; |
| std::vector<int> permutation_order(dim.nbDims + 1); |
| |
| for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; |
| |
| if (index != 1) { |
| permutation_order[1] = index; |
| permutation_order[index] = 1; |
| } |
| #endif |
| |
| std::vector<nvinfer1::ITensor const*> inputs_vec; |
| // Shap chack (all input tensor should have same shape) |
| // starting from 0 since we are probably also doing transpose here; |
| for (int i = 0; i < input_size; i++) { |
| auto tensor_i = inputs.at(i).tensor(); |
| auto dim_i = tensor_i->getDimensions(); |
| if (dim_i.nbDims != dim.nbDims) { |
| return tensorflow::errors::InvalidArgument( |
| "Concatenate receives inputs with inconsistent dimensions, at ", |
| node_def.name()); |
| } |
| for (int j = 0; j < dim.nbDims; j++) { |
| // check dimension consistency on non-concatenate axis |
| if (j != index - 1 && dim_i.d[j] != dim.d[j]) { |
| return tensorflow::errors::InvalidArgument( |
| "Concatenate receives inputs with inconsistent shape, at", |
| node_def.name()); |
| } |
| } |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| // TRT3 does concatenation only on channel! |
| if (index != 1) { |
| tensor_i = ctx.TransposeTensor(const_cast<nvinfer1::ITensor*>(tensor_i), |
| permutation_order); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(tensor_i, node_def.name()); |
| } |
| #endif |
| inputs_vec.push_back(tensor_i); |
| } |
| |
| // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); |
| nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( |
| const_cast<nvinfer1::ITensor* const*>(inputs_vec.data()), |
| inputs_vec.size()); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| #if NV_TENSORRT_MAJOR > 3 |
| layer->setAxis(index - 1); |
| #endif |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| if (index != 1) { |
| output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(output_tensor, node_def.name()); |
| } |
| #endif |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertFusedBatchNorm( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| TFAttrs attrs(node_def); |
| float epsilon = attrs.get<float>("epsilon"); |
| auto data_format = attrs.get<string>("data_format"); |
| if (data_format != "NCHW") { |
| return tensorflow::errors::Unimplemented( |
| "only data_format=NCHW is supported, at " + node_def.name()); |
| } |
| bool is_training = attrs.get<bool>("is_training"); |
| if (is_training) { |
| return tensorflow::errors::Unimplemented( |
| "only is_training=false is supported, at " + node_def.name()); |
| } |
| nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); |
| |
| // Check parameter types |
| auto parameter_type = inputs.at(1).weights().type_; |
| if ((parameter_type != tensorflow::DataType::DT_FLOAT) && |
| (parameter_type != tensorflow::DataType::DT_HALF)) { |
| return tensorflow::errors::Unimplemented( |
| "only float32 or float16 weight data type is supported, for node " + |
| node_def.name() + " got " + tensorflow::DataTypeString(parameter_type)); |
| } |
| for (int i = 1; i < 5; i++) { |
| if (inputs.at(i).weights().type_ != parameter_type) { |
| return tensorflow::errors::Unimplemented( |
| "Inconsistent parameter type for batchnormis not supported, at: " + |
| node_def.name()); |
| } |
| } |
| |
| TRT_ShapedWeights dummy_power_weights(parameter_type); |
| size_t nweight = 0; |
| for (int i = 1; i < 5; i++) { |
| nweight = std::max(nweight, (size_t)inputs.at(i).weights().count()); |
| } |
| TRT_ShapedWeights* ptr_shape_weights = nullptr; |
| for (int i = 1; i < 5; i++) { |
| if (inputs.at(i).weights().count() == nweight) { |
| ptr_shape_weights = |
| const_cast<TRT_ShapedWeights*>(&(inputs.at(i).weights())); |
| } else if (inputs.at(i).weights().count() != 1) { |
| return tensorflow::errors::InvalidArgument( |
| "Inconsistent batchnorm parameter count, at: " + node_def.name()); |
| } |
| } |
| // We could technically have two weights with different shape. |
| // that requires two addScale op, arguably less performant |
| TRT_ShapedWeights combined_scale_weights = |
| ctx.get_temp_weights_like(*ptr_shape_weights); |
| TRT_ShapedWeights combined_offset_weights = |
| ctx.get_temp_weights_like(*ptr_shape_weights); |
| |
| const Eigen::half* cast_vals_array[4]; |
| const float* vals_array[4]; |
| for (int j = 0; j < 4; j++) { |
| cast_vals_array[j] = |
| static_cast<Eigen::half const*>(inputs.at(j + 1).weights().GetValues()); |
| vals_array[j] = |
| static_cast<float const*>(inputs.at(j + 1).weights().GetValues()); |
| } |
| Eigen::half* cast_combined_scale_vals = const_cast<Eigen::half*>( |
| static_cast<Eigen::half const*>(combined_scale_weights.GetValues())); |
| Eigen::half* cast_combined_offset_vals = const_cast<Eigen::half*>( |
| static_cast<Eigen::half const*>(combined_offset_weights.GetValues())); |
| float* combined_scale_vals = const_cast<float*>( |
| static_cast<float const*>(combined_scale_weights.GetValues())); |
| float* combined_offset_vals = const_cast<float*>( |
| static_cast<float const*>(combined_offset_weights.GetValues())); |
| |
| for (size_t i = 0; i < nweight; ++i) { |
| float batchnorm_data[4]; |
| for (int j = 0; j < 4; j++) { |
| if (inputs.at(j + 1).weights().count() != 1) { |
| if (parameter_type == tensorflow::DT_FLOAT) { |
| batchnorm_data[j] = vals_array[j][i]; |
| } else if (parameter_type == tensorflow::DT_HALF) { |
| batchnorm_data[j] = |
| Eigen::half_impl::half_to_float(cast_vals_array[j][i]); |
| } |
| } else { |
| if (parameter_type == tensorflow::DT_FLOAT) { |
| batchnorm_data[j] = vals_array[j][0]; |
| } else if (parameter_type == tensorflow::DT_HALF) { |
| batchnorm_data[j] = |
| Eigen::half_impl::half_to_float(cast_vals_array[j][0]); |
| } |
| } |
| } |
| float scale = batchnorm_data[0]; |
| float offset = batchnorm_data[1]; |
| float mean = batchnorm_data[2]; |
| float variance = batchnorm_data[3]; |
| float combined_scale_val = scale / sqrtf(variance + epsilon); |
| float combined_offset_val = offset - mean * combined_scale_val; |
| if (parameter_type == tensorflow::DT_FLOAT) { |
| combined_scale_vals[i] = combined_scale_val; |
| combined_offset_vals[i] = combined_offset_val; |
| } else if (parameter_type == tensorflow::DT_HALF) { |
| cast_combined_scale_vals[i] = Eigen::half(combined_scale_val); |
| cast_combined_offset_vals[i] = Eigen::half(combined_offset_val); |
| } |
| } |
| |
| nvinfer1::ScaleMode mode = nweight == 1 ? nvinfer1::ScaleMode::kUNIFORM |
| : nvinfer1::ScaleMode::kCHANNEL; |
| nvinfer1::IScaleLayer* layer = |
| ctx.network()->addScale(*const_cast<nvinfer1::ITensor*>(tensor), mode, |
| combined_offset_weights.GetWeightsForTRT(), |
| combined_scale_weights.GetWeightsForTRT(), |
| dummy_power_weights.GetWeightsForTRT()); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| #if NV_TENSORRT_MAJOR > 3 |
| tensorflow::Status ConvertMatMulHelper( |
| Converter& ctx, TRT_TensorOrWeights tensor_input, |
| TRT_ShapedWeights weights_raw, bool transpose_weight, string node_name, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| nvinfer1::ITensor* output_tensor; |
| if (!tensor_input.is_tensor()) { |
| return tensorflow::errors::InvalidArgument("Input 0 expects tensor"); |
| } |
| const nvinfer1::ITensor* tensor = tensor_input.tensor(); |
| |
| TRT_ShapedWeights weights(weights_raw.type_); |
| if (transpose_weight) { |
| weights = weights_raw; |
| } else { |
| TRT_ShapedWeights weights_ck = weights_raw; |
| weights = ctx.get_temp_weights_like(weights_ck); |
| ReorderCKtoKC(weights_raw, &weights); |
| } |
| TRT_ShapedWeights biases(weights.type_); |
| |
| int noutput = weights.shape_.d[0]; |
| |
| auto input_dim = tensor->getDimensions(); |
| while (input_dim.nbDims != 3) { |
| input_dim.d[input_dim.nbDims++] = 1; |
| } |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, tensor_input, input_dim, &tensor), node_name); |
| |
| nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( |
| *const_cast<nvinfer1::ITensor*>(tensor), noutput, weights, biases); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_name); |
| output_tensor = layer->getOutput(0); |
| |
| const nvinfer1::ITensor* temp_tensor; |
| auto output_dim = output_tensor->getDimensions(); |
| output_dim.nbDims = 1; |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, TRT_TensorOrWeights(output_tensor), output_dim, |
| &temp_tensor), |
| node_name); |
| output_tensor = const_cast<nvinfer1::ITensor*>(temp_tensor); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| |
| // inputs are both two dimensional (tensorflow::ops::MatMul) |
| tensorflow::Status ConvertMatMul(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| if (!inputs.at(0).is_tensor()) { |
| return tensorflow::errors::InvalidArgument("Input 0 expects tensor, at" + |
| node_def.name()); |
| } |
| |
| TFAttrs attrs(node_def); |
| // TODO(jie): INT32 should be converted? |
| tensorflow::DataType tf_dtype = attrs.get<tensorflow::DataType>("T"); |
| if (tf_dtype != tensorflow::DataType::DT_FLOAT && |
| tf_dtype != tensorflow::DataType::DT_HALF) { |
| return tensorflow::errors::Unimplemented( |
| "data type is not supported, for node " + node_def.name() + " got " + |
| tensorflow::DataTypeString(tf_dtype)); |
| } |
| bool transpose_a = attrs.get<bool>("transpose_a"); |
| bool transpose_b = attrs.get<bool>("transpose_b"); |
| |
| // FullyConnected: |
| if (transpose_a) { |
| return tensorflow::errors::Internal( |
| "Transpose_a is not supported for TensorRT FullyConnected (op: " + |
| node_def.op() + "), at: " + node_def.name()); |
| } |
| if (inputs.at(1).is_tensor()) { |
| return tensorflow::errors::Internal( |
| "Operand 1 must be constant for TensorRT FullyConnected (op: " + |
| node_def.op() + "), at: " + node_def.name()); |
| } |
| return ConvertMatMulHelper(ctx, inputs.at(0), inputs.at(1).weights(), |
| transpose_b, node_def.name(), outputs); |
| } |
| |
| tensorflow::Status ConvertBatchMatMul( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| TFAttrs attrs(node_def); |
| |
| // TODO(jie): INT32 should be converted? |
| tensorflow::DataType tf_dtype = attrs.get<tensorflow::DataType>("T"); |
| if (tf_dtype != tensorflow::DataType::DT_FLOAT && |
| tf_dtype != tensorflow::DataType::DT_HALF) { |
| return tensorflow::errors::Unimplemented( |
| "data type is not supported, for node " + node_def.name() + " got " + |
| tensorflow::DataTypeString(tf_dtype)); |
| } |
| |
| bool transpose_a = attrs.get<bool>("adj_x"); |
| bool transpose_b = attrs.get<bool>("adj_y"); |
| |
| auto dims = inputs.at(0).shape(); |
| if (dims.nbDims == 1) { // NC * CK is only supported through fully connected |
| if (transpose_a == false && inputs.at(0).is_tensor() && |
| inputs.at(1).is_weights()) { |
| return ConvertMatMulHelper(ctx, inputs.at(0), inputs.at(1).weights(), |
| transpose_b, node_def.name(), outputs); |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Invalid configuration for MatMul, at: " + node_def.name()); |
| } |
| } |
| |
| const nvinfer1::ITensor* tensor_l; |
| const nvinfer1::ITensor* tensor_r; |
| auto dims_l = inputs.at(0).shape(); |
| auto dims_r = inputs.at(1).shape(); |
| if (inputs.at(0).is_weights()) { |
| if (inputs.at(0).shape().d[0] != 1) { |
| return tensorflow::errors::InvalidArgument( |
| "Input 0 as weight assumes broadcast across batch for MatMul, at: " + |
| node_def.name()); |
| } else { |
| for (int i = 0; i < dims_l.nbDims - 1; i++) { |
| dims_l.d[i] = dims_l.d[i + 1]; |
| } |
| dims_l.nbDims--; |
| } |
| } |
| if (inputs.at(1).is_weights()) { |
| if (inputs.at(1).shape().d[0] != 1) { |
| return tensorflow::errors::InvalidArgument( |
| "Input 1 as weight assumes broadcast across batch for MatMul, at: " + |
| node_def.name()); |
| } else { |
| for (int i = 0; i < dims_r.nbDims - 1; i++) { |
| dims_r.d[i] = dims_r.d[i + 1]; |
| } |
| dims_r.nbDims--; |
| } |
| } |
| |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, inputs.at(0), dims_l, &tensor_l), |
| node_def.name()); |
| TFTRT_RETURN_ERROR_IF_FALSE( |
| PrepareTensorForShape(ctx, inputs.at(1), dims_r, &tensor_r), |
| node_def.name()); |
| |
| nvinfer1::IMatrixMultiplyLayer* layer = ctx.network()->addMatrixMultiply( |
| *const_cast<nvinfer1::ITensor*>(tensor_l), transpose_a, |
| *const_cast<nvinfer1::ITensor*>(tensor_r), transpose_b); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| #endif |
| |
| #if NV_TENSORRT_MAJOR > 3 |
| tensorflow::Status ConvertSoftmax( |
| Converter& ctx, const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| |
| int nbDims = tensor->getDimensions().nbDims; |
| if (nbDims == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "TensorRT Softmax cannot apply on batch dimension, at" + |
| node_def.name()); |
| } |
| nvinfer1::ISoftMaxLayer* layer = |
| ctx.network()->addSoftMax(*const_cast<nvinfer1::ITensor*>(tensor)); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| // Tensorflow SoftMax assumes applying softmax on the last dimension. |
| layer->setAxes(1 << (nbDims - 1)); |
| |
| nvinfer1::ITensor* output_tensor = layer->getOutput(0); |
| outputs->push_back(TRT_TensorOrWeights(output_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| #endif |
| |
| #if NV_TENSORRT_MAJOR > 3 |
| tensorflow::Status ConvertTopK(Converter& ctx, |
| const tensorflow::NodeDef& node_def, |
| const std::vector<TRT_TensorOrWeights>& inputs, |
| std::vector<TRT_TensorOrWeights>* outputs) { |
| const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); |
| |
| int nbDims = tensor->getDimensions().nbDims; |
| if (nbDims == 0) { |
| return tensorflow::errors::InvalidArgument( |
| "TensorRT TopK cannot apply on batch dimension, at" + node_def.name()); |
| } |
| |
| TRT_ShapedWeights k_w = inputs.at(1).weights(); |
| int k = *(static_cast<int*>(const_cast<void*>(k_w.GetValues()))); |
| |
| nvinfer1::TopKOperation op; |
| uint32_t reducedAxes = 0; |
| if (node_def.op() == "TopKV2") { |
| op = nvinfer1::TopKOperation::kMAX; |
| reducedAxes |= 1 << (nbDims - 1); |
| } else { |
| return tensorflow::errors::Unimplemented( |
| "Operation: " + node_def.op() + |
| " not implemented, at: " + node_def.name()); |
| } |
| |
| nvinfer1::ITopKLayer* layer = ctx.network()->addTopK( |
| *const_cast<nvinfer1::ITensor*>(tensor), op, k, reducedAxes); |
| TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name()); |
| |
| nvinfer1::ITensor* output_value_tensor = layer->getOutput(0); |
| nvinfer1::ITensor* output_indices_tensor = layer->getOutput(1); |
| outputs->push_back(TRT_TensorOrWeights(output_value_tensor)); |
| outputs->push_back(TRT_TensorOrWeights(output_indices_tensor)); |
| return tensorflow::Status::OK(); |
| } |
| #endif |
| |
| void Converter::register_op_converters() { |
| // vgg_16 slim implementation |
| op_registry_["Conv2D"] = ConvertConv2D; |
| op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; |
| op_registry_["Relu"] = ConvertActivation; |
| op_registry_["MaxPool"] = ConvertPool; |
| op_registry_["AvgPool"] = ConvertPool; |
| op_registry_["BiasAdd"] = ConvertScale; |
| op_registry_["Const"] = ConvertConst; |
| // TODO(ben,jie): this is a temp hack. |
| op_registry_["Identity"] = ConvertIdentity; // Identity should be removed |
| op_registry_["Snapshot"] = ConvertIdentity; // Snapshot should be removed |
| |
| // resnet_50_v1 slim implementation |
| op_registry_["Add"] = ConvertBinary; |
| op_registry_["Mul"] = ConvertBinary; |
| op_registry_["Sub"] = ConvertBinary; |
| op_registry_["Pad"] = ConvertPad; |
| |
| op_registry_["ConcatV2"] = ConvertConcat; |
| op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; |
| op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; |
| |
| op_registry_["Div"] = ConvertBinary; |
| op_registry_["RealDiv"] = ConvertBinary; |
| |
| op_registry_["Rsqrt"] = ConvertUnary; |
| op_registry_["Reciprocal"] = ConvertUnary; |
| op_registry_["Exp"] = ConvertUnary; |
| op_registry_["Log"] = ConvertUnary; |
| op_registry_["Sqrt"] = ConvertUnary; |
| op_registry_["Abs"] = ConvertUnary; |
| op_registry_["Neg"] = ConvertUnary; |
| #if NV_TENSORRT_MAJOR == 3 |
| op_registry_["Mean"] = ConvertReducePool; |
| #endif |
| #if NV_TENSORRT_MAJOR > 3 |
| op_registry_["Sum"] = ConvertReduce; |
| op_registry_["Prod"] = ConvertReduce; |
| op_registry_["Max"] = ConvertReduce; |
| op_registry_["Min"] = ConvertReduce; |
| op_registry_["Mean"] = ConvertReduce; |
| op_registry_["Maximum"] = ConvertBinary; |
| op_registry_["Minimum"] = ConvertBinary; |
| op_registry_["Softmax"] = ConvertSoftmax; |
| op_registry_["MatMul"] = ConvertMatMul; |
| op_registry_["BatchMatMul"] = ConvertBatchMatMul; |
| op_registry_["TopKV2"] = ConvertTopK; |
| #endif |
| |
| plugin_converter_ = ConvertPlugin; |
| } |
| |
| } // namespace |
| |
| tensorflow::Status ConvertGraphDefToEngine( |
| const tensorflow::GraphDef& gdef, int precision_mode, int max_batch_size, |
| size_t max_workspace_size_bytes, |
| const std::vector<tensorflow::PartialTensorShape>& input_shapes, |
| Logger* logger, nvinfer1::IGpuAllocator* allocator, |
| TRTInt8Calibrator* calibrator, |
| TrtUniquePtrType<nvinfer1::ICudaEngine>* engine, |
| bool* convert_successfully) { |
| engine->reset(); |
| if (convert_successfully) *convert_successfully = false; |
| |
| // Create the builder. |
| TrtUniquePtrType<nvinfer1::IBuilder> builder( |
| nvinfer1::createInferBuilder(*logger)); |
| builder->setMaxBatchSize(max_batch_size); |
| // TODO(aaroey): use the allocator to allocate the TRT workspace. |
| builder->setMaxWorkspaceSize(max_workspace_size_bytes); |
| #if NV_TENSORRT_MAJOR > 3 |
| builder->setGpuAllocator(allocator); |
| #endif |
| if (precision_mode == FP16MODE) { |
| builder->setHalf2Mode(true); |
| } else if (precision_mode == INT8MODE) { |
| builder->setInt8Mode(true); |
| builder->setInt8Calibrator(calibrator); |
| } |
| |
| // Create the network. |
| auto trt_network = |
| TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetwork()); |
| if (!trt_network) { |
| return tensorflow::errors::Internal( |
| "Failed to create TensorRT network object"); |
| } |
| auto ws = std::unique_ptr<TRTWeightStore>(new TRTWeightStore()); |
| |
| // Build the network |
| VLOG(1) << "Starting engine conversion "; |
| Converter converter(trt_network.get(), ws.get(), precision_mode == FP16MODE); |
| std::vector<std::pair<string, string>> output_tensors; |
| // Graph nodes are already topologically sorted during construction |
| for (const auto& node_def : gdef.node()) { |
| string node_name = node_def.name(); |
| VLOG(2) << "Converting op name=" << node_name << ", op=" << node_def.op(); |
| if (tensorflow::str_util::StartsWith(node_name, kInputPHName) && |
| (node_def.op() == "Placeholder")) { |
| int32 slot_number = -1; |
| if (!tensorflow::strings::safe_strto32( |
| node_name.c_str() + strlen(kInputPHName), &slot_number)) { |
| return tensorflow::errors::InvalidArgument( |
| "Failed to parse slot number from ", node_name); |
| } |
| nvinfer1::DataType dtype; |
| auto shape = input_shapes.at(slot_number); |
| auto status = ValidateInputProperties( |
| shape, node_def.attr().at("dtype").type(), &dtype); |
| if (!status.ok()) { |
| const string error_message = |
| StrCat("Validation failed for ", node_name, " and input slot ", |
| slot_number, ": ", status.error_message()); |
| LOG(WARNING) << error_message; |
| return Status(status.code(), error_message); |
| } |
| |
| #if NV_TENSORRT_MAJOR == 3 |
| nvinfer1::DimsCHW input_dim; |
| #elif NV_TENSORRT_MAJOR > 3 |
| nvinfer1::Dims input_dim; |
| #endif |
| for (int i = 1; i < shape.dims(); i++) { |
| input_dim.d[i - 1] = shape.dim_size(i); |
| } |
| input_dim.nbDims = shape.dims() - 1; |
| nvinfer1::ITensor* input_tensor = |
| converter.network()->addInput(node_name.c_str(), dtype, input_dim); |
| if (!input_tensor) { |
| return tensorflow::errors::InvalidArgument( |
| "Failed to create Input layer tensor ", node_name, |
| " rank=", shape.dims() - 1); |
| } |
| VLOG(2) << "Adding engine input tensor " << node_name << " with shape " |
| << DebugString(input_dim); |
| if (!converter.insert_input_tensor(node_name, input_tensor)) { |
| return tensorflow::errors::AlreadyExists( |
| "Output tensor already exists for op: " + node_name); |
| } |
| } else if (tensorflow::str_util::StartsWith(node_name, kOutputPHName) && |
| (node_def.op() == "Identity")) { |
| int32 slot_number = -1; |
| if (!tensorflow::strings::safe_strto32( |
| node_name.c_str() + strlen(kOutputPHName), &slot_number)) { |
| return tensorflow::errors::InvalidArgument( |
| "Failed to parse slot number from ", node_name); |
| } |
| if (output_tensors.size() <= slot_number) { |
| output_tensors.resize(slot_number + 1); |
| } |
| output_tensors.at(slot_number) = {node_def.input(0), node_name}; |
| } else { |
| VLOG(2) << "Converting node: " << node_def.name() << " , " |
| << node_def.op(); |
| TF_RETURN_IF_ERROR(converter.convert_node(node_def)); |
| } |
| } |
| for (const auto& output : output_tensors) { |
| auto tensor_or_weights = converter.get_tensor(output.first); |
| if (!tensor_or_weights.is_tensor()) { |
| return tensorflow::errors::InvalidArgument( |
| "Output node '" + output.first + "' is weights not tensor"); |
| } |
| nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); |
| tensor->setName(output.second.c_str()); |
| if (!tensor) { |
| return tensorflow::errors::NotFound("Output tensor not found: " + |
| output.first); |
| } |
| VLOG(1) << "Marking output tensor " << output.first << ", as output tensor " |
| << output.second; |
| |
| converter.network()->markOutput(*tensor); |
| } |
| if (convert_successfully) *convert_successfully = true; |
| |
| // Build the engine. |
| VLOG(1) << "Starting engine creation"; |
| engine->reset(builder->buildCudaEngine(*converter.network())); |
| if (engine->get() == nullptr) { |
| return tensorflow::errors::Internal("Failed to build TensorRT engine"); |
| } |
| VLOG(1) << "Finished conversion"; |
| return tensorflow::Status::OK(); |
| } |
| |
| tensorflow::Status ConvertSegmentToGraphDef( |
| const tensorflow::Graph* graph, |
| const tensorflow::grappler::GraphProperties& graph_properties, |
| const std::set<string>& subgraph_node_names, |
| const std::vector<int>& subgraph_node_ids, // In topological order |
| std::vector<EngineConnection>* connections, |
| tensorflow::GraphDef* segment_def, string* common_scope) { |
| std::set<string> marker_nodes; |
| // Update connection shapes/data types and add corresponding input/output |
| // nodes in the segment graphdef. |
| for (size_t i = 0; i < connections->size(); ++i) { |
| auto& connection = connections->at(i); |
| if (connection.is_control_edge()) continue; |
| auto outside_node = graph->FindNodeId(connection.outside_id); |
| if (!outside_node) { |
| // This should never happen, unless the original graph is problematic. |
| return tensorflow::errors::NotFound( |
| "Cannot find node with id ", connection.outside_id, " in the graph."); |
| } |
| // Updates the shape and data types of input/output connections. |
| tensorflow::DataType dtype; |
| tensorflow::PartialTensorShape partial_shape; |
| if (connection.is_input_edge) { |
| GetInputProperties(graph_properties, |
| graph->FindNodeId(connection.outside_id), |
| connection.outside_port, &partial_shape, &dtype); |
| connection.outside_shape = partial_shape; |
| } else { |
| GetOutputProperties(graph_properties, |
| graph->FindNodeId(connection.outside_id), |
| connection.outside_port, &partial_shape, &dtype); |
| connection.inside_shape = partial_shape; |
| } |
| connection.connection_type = dtype; |
| |
| // Add dummy input/output nodes to the segment graphdef. |
| if (connection.is_input_edge) { |
| const string node_name = StrCat(kInputPHName, connection.port_number); |
| if (marker_nodes.count(node_name)) { |
| VLOG(1) << "Reusing input " << node_name << " for the edge " |
| << connection.outside_node_name << ":" |
| << connection.outside_port << " -> " |
| << connection.inside_node_name << ":" << connection.inside_port; |
| continue; |
| } |
| marker_nodes.insert(node_name); |
| auto seg_node = segment_def->add_node(); |
| tensorflow::NodeDefBuilder builder(node_name, "Placeholder"); |
| auto status = builder.Attr("shape", partial_shape) |
| .Attr("dtype", dtype) |
| .Finalize(seg_node); |
| VLOG(1) << "Constructing input " << node_name << " for the edge " |
| << connection.outside_node_name << ":" << connection.outside_port |
| << " -> " << connection.inside_node_name << ":" |
| << connection.inside_port; |
| } else { |
| const string node_name = StrCat(kOutputPHName, connection.port_number); |
| if (marker_nodes.count(node_name)) { |
| VLOG(1) << "Reusing output " << node_name << " for the edge " |
| << connection.inside_node_name << ":" << connection.inside_port |
| << " -> " << connection.outside_node_name << ":" |
| << connection.outside_port; |
| continue; |
| } |
| marker_nodes.insert(node_name); |
| auto seg_node = segment_def->add_node(); |
| tensorflow::NodeDefBuilder builder(node_name, "Identity"); |
| auto status = builder.Input(connection.inside_node_name, 0, dtype) |
| .Finalize(seg_node); |
| VLOG(1) << "Constructing output " << node_name << " for the edge " |
| << connection.inside_node_name << ":" << connection.inside_port |
| << " -> " << connection.outside_node_name << ":" |
| << connection.outside_port; |
| } |
| } // for each connection. |
| |
| std::unordered_map<int, int> old_to_new_id_map; |
| // Copy internal nodes to new graphdef |
| string local_scope = graph->FindNodeId(*subgraph_node_ids.begin())->name(); |
| for (const auto node_id : subgraph_node_ids) { |
| const auto node = graph->FindNodeId(node_id); |
| local_scope = GetCommonNameScope(local_scope, node->name()); |
| old_to_new_id_map[node_id] = segment_def->node_size(); |
| auto snode = segment_def->add_node(); |
| snode->CopyFrom(node->def()); |
| VLOG(2) << "Copying " << snode->name() << " to subgraph"; |
| } |
| // Update the inputs of the new input nodes to point to placeholder nodes. |
| for (int i = 0; i < connections->size(); ++i) { |
| auto& connection = connections->at(i); |
| if (connection.is_control_edge() || !connection.is_input_edge) continue; |
| auto snode = |
| segment_def->mutable_node(old_to_new_id_map[connection.inside_id]); |
| const string placeholder_name = |
| StrCat(kInputPHName, connection.port_number); |
| VLOG(1) << "Updating " << snode->name() << ":" << connection.inside_port |
| << " from " << snode->input(connection.inside_port) << " to " |
| << placeholder_name; |
| snode->set_input(connection.inside_port, placeholder_name); |
| } |
| // Remove control inputs that are not inside the segment. |
| for (int i = 0; i < segment_def->node_size(); ++i) { |
| auto snode = segment_def->mutable_node(i); |
| const int input_size = snode->input_size(); |
| int input_idx = 0; |
| int actual_input_idx = 0; |
| while (input_idx < input_size) { |
| TensorId input = ParseTensorName(snode->input(input_idx)); |
| if (!subgraph_node_names.count( |
| string(input.first.data(), input.first.size())) && |
| !str_util::StartsWith(input.first, kInputPHName)) { |
| if (input.second == Graph::kControlSlot) { |
| VLOG(1) << "... removing control inputs " << input.first |
| << " from subgraph."; |
| ++input_idx; |
| continue; |
| } else { |
| return tensorflow::errors::InvalidArgument( |
| "Found non control input outside the segment that is not an " |
| "engine connection to ", |
| snode->name(), ": ", input.first); |
| } |
| } |
| if (actual_input_idx != input_idx) { |
| snode->set_input(actual_input_idx, snode->input(input_idx)); |
| } |
| ++input_idx; |
| ++actual_input_idx; |
| } |
| for (int remove = input_size - actual_input_idx; remove > 0; --remove) { |
| snode->mutable_input()->RemoveLast(); |
| } |
| } |
| *common_scope = local_scope; |
| VLOG(0) << "Segment @scope '" << local_scope << "', converted to graph"; |
| return tensorflow::Status::OK(); |
| } |
| |
| bool InputEdgeValidator::operator()(const tensorflow::Edge* in_edge) const { |
| if (in_edge->IsControlEdge()) return true; |
| PartialTensorShape shape; |
| tensorflow::DataType dtype; |
| GetInputProperties(graph_properties_, in_edge->src(), in_edge->src_output(), |
| &shape, &dtype); |
| nvinfer1::DataType trt_dtype; |
| Status status = ValidateInputProperties(shape, dtype, &trt_dtype); |
| if (!status.ok()) { |
| VLOG(1) << "--> Need to remove input node " << in_edge->dst()->name() |
| << ": " << status; |
| return false; |
| } |
| |
| |
| if (in_edge->src()->type_string() != "Const" && |
| #if NV_TENSORRT_MAJOR == 3 |
| // TRT 3.x only support 4 dimensional input tensor. |
| shape.dims() != 4) { |
| #else |
| // Single dimensional input tensor is not supported since the first |
| // dimension is treated as batch dimension. |
| shape.dims() < 2) { |
| #endif |
| VLOG(1) << "--> Need to remove input node " << in_edge->dst()->name() |
| << " which has an input at port " << in_edge->dst_input() << " with" |
| #if NV_TENSORRT_MAJOR == 3 |
| << " #dim!=4" |
| #else |
| << " #dim<2" |
| #endif |
| << " and is not a const: " << shape; |
| return false; |
| } |
| return true; |
| } |
| |
| bool OutputEdgeValidator::operator()(const tensorflow::Edge* out_edge) const { |
| if (out_edge->IsControlEdge()) return true; |
| if (out_edge->src()->type_string() == "Const") { |
| VLOG(1) << "--> Need to remove output node " << out_edge->src()->name() |
| << " which is a Const."; |
| return false; |
| } |
| return true; |
| } |
| |
| } // namespace convert |
| } // namespace tensorrt |
| } // namespace tensorflow |
| |
| #endif // GOOGLE_TENSORRT |
| #endif // GOOGLE_CUDA |