| /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| |
| Licensed under the Apache License, Version 2.0 (the "License"); |
| you may not use this file except in compliance with the License. |
| You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==============================================================================*/ |
| |
| #include <memory> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "absl/container/inlined_vector.h" |
| #include "absl/memory/memory.h" |
| #include "absl/strings/str_join.h" |
| #include "tensorflow/compiler/tf2tensorrt/common/datavec.h" |
| #include "tensorflow/compiler/tf2tensorrt/common/utils.h" |
| #include "tensorflow/compiler/tf2tensorrt/convert/utils.h" |
| #include "tensorflow/compiler/tf2tensorrt/utils/trt_engine_instance.pb.h" // NOLINT |
| #include "tensorflow/compiler/tf2tensorrt/utils/trt_logger.h" |
| #include "tensorflow/compiler/tf2tensorrt/utils/trt_lru_cache.h" |
| #include "tensorflow/core/common_runtime/device.h" |
| #include "tensorflow/core/common_runtime/device_factory.h" |
| #include "tensorflow/core/framework/fake_input.h" |
| #include "tensorflow/core/framework/node_def_builder.h" |
| #include "tensorflow/core/framework/op_kernel.h" |
| #include "tensorflow/core/framework/resource_mgr.h" |
| #include "tensorflow/core/framework/tensor.h" |
| #include "tensorflow/core/framework/tensor_shape.h" |
| #include "tensorflow/core/framework/tensor_shape.pb.h" |
| #include "tensorflow/core/framework/tensor_types.h" |
| #include "tensorflow/core/framework/types.h" |
| #include "tensorflow/core/framework/types.pb.h" |
| #include "tensorflow/core/kernels/ops_testutil.h" |
| #include "tensorflow/core/lib/core/status_test_util.h" |
| #include "tensorflow/core/lib/io/record_reader.h" |
| #include "tensorflow/core/platform/env.h" |
| #include "tensorflow/core/platform/errors.h" |
| #include "tensorflow/core/platform/file_system.h" |
| #include "tensorflow/core/platform/path.h" |
| #include "tensorflow/core/platform/status.h" |
| #include "tensorflow/core/platform/test.h" |
| #include "tensorflow/core/platform/tstring.h" |
| #include "tensorflow/core/platform/types.h" |
| |
| #if GOOGLE_CUDA && GOOGLE_TENSORRT |
| |
| namespace tensorflow { |
| namespace tensorrt { |
| |
| struct TestParam { |
| nvinfer1::Dims dims; |
| bool dynamic_shape; |
| int n_inputs; |
| }; |
| |
| class TRTEngineResourceOpsTest |
| : public OpsTestBase, |
| public ::testing::WithParamInterface<TestParam> { |
| public: |
| TRTEngineResourceOpsTest() : param_(GetParam()) {} |
| |
| protected: |
| void Reset() { |
| for (auto& temp : tensors_) { |
| delete temp; |
| } |
| for (auto& temp : managed_outputs_) { |
| delete temp; |
| } |
| tensors_.clear(); |
| managed_outputs_.clear(); |
| inputs_.clear(); |
| } |
| |
| ITensorProxyPtr NetworkWith1Input(nvinfer1::INetworkDefinition* network, |
| ITensorProxyPtr input) { |
| // Add a unary layer. |
| nvinfer1::IUnaryLayer* layer = |
| network->addUnary(*input->trt_tensor(), nvinfer1::UnaryOperation::kEXP); |
| EXPECT_NE(nullptr, layer); |
| return layer->getOutput(0); |
| } |
| |
| // Constructs a network with two inputs, where the second input is a shape |
| // tensor. We take a slice of the first input with the size of the slice |
| // specified by the second input, assuming the first input is a 2D tensor. |
| // We then add the slice to itself to produce the output of the network. |
| ITensorProxyPtr NetworkWith2Inputs(nvinfer1::INetworkDefinition* network, |
| ITensorProxyPtr input) { |
| nvinfer1::Dims dims2{1, {2}}; |
| ITensorProxyPtr input2 = |
| network->addInput(absl::StrCat(IONamePrefixes::kInputPHName, 1).c_str(), |
| nvinfer1::DataType::kINT32, dims2); |
| EXPECT_NE(nullptr, input2->trt_tensor()); |
| |
| nvinfer1::Dims start{2, {0, 0}}; |
| nvinfer1::Dims stride{2, {1, 1}}; |
| auto slice_layer = |
| network->addSlice(*input->trt_tensor(), start, stride, stride); |
| EXPECT_NE(nullptr, slice_layer); |
| |
| slice_layer->setInput(2, *input2->trt_tensor()); |
| ITensorProxyPtr sliced_input = slice_layer->getOutput(0); |
| EXPECT_NE(nullptr, sliced_input->trt_tensor()); |
| |
| auto layer = network->addElementWise(*sliced_input->trt_tensor(), |
| *sliced_input->trt_tensor(), |
| nvinfer1::ElementWiseOperation::kSUM); |
| EXPECT_NE(nullptr, layer); |
| return layer->getOutput(0); |
| } |
| |
| TrtUniquePtrType<nvinfer1::ICudaEngine> CreateTRTEngine() { |
| TrtUniquePtrType<nvinfer1::IBuilder> builder( |
| nvinfer1::createInferBuilder(logger_)); |
| TrtUniquePtrType<nvinfer1::INetworkDefinition> network; |
| #if IS_TRT_VERSION_GE(8, 0, 0, 0) |
| network = |
| TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2( |
| 1U << static_cast<int>( |
| nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH))); |
| #else |
| network = |
| TrtUniquePtrType<nvinfer1::INetworkDefinition>(builder->createNetworkV2( |
| 1U << static_cast<int>( |
| nvinfer1::NetworkDefinitionCreationFlag::kEXPLICIT_BATCH))); |
| #endif |
| |
| // Add the input. |
| nvinfer1::Dims dims = this->param_.dims; |
| if (this->param_.dynamic_shape) { |
| std::fill(dims.d, dims.d + dims.nbDims, -1); |
| } |
| const std::string in_name = StrCat(IONamePrefixes::kInputPHName, 0); |
| ITensorProxyPtr input = |
| network->addInput(in_name.c_str(), nvinfer1::DataType::kFLOAT, dims); |
| EXPECT_NE(nullptr, input->trt_tensor()); |
| // Mark the output. |
| ITensorProxyPtr output = |
| this->param_.n_inputs == 1 |
| ? this->NetworkWith1Input(network.get(), input) |
| : this->NetworkWith2Inputs(network.get(), input); |
| output->setName("output"); |
| network->markOutput(*output->trt_tensor()); |
| |
| // Build the engine |
| TrtUniquePtrType<nvinfer1::IBuilderConfig> builder_config( |
| builder->createBuilderConfig()); |
| builder_config->setMaxWorkspaceSize(1 << 10); |
| builder->setMaxBatchSize(1); |
| |
| if (this->param_.dynamic_shape) { |
| TrtShapeOptimizationProfile profile; |
| profile.SetShapeTensorMask(network.get()); |
| // The for loop defines three optimization profiles for the network. |
| for (int i = 1; i <= 3; i++) { |
| const int n_input = param_.n_inputs; |
| std::vector<TensorShape> shape_vec(n_input); |
| // Define a shape with all dimensions set to 3*i. |
| std::vector<int> dimvec(this->param_.dims.nbDims, 3 * i); |
| TensorShape shape; |
| TF_CHECK_OK( |
| TensorShapeUtils::MakeShape(dimvec.data(), dimvec.size(), &shape)); |
| |
| const ITensorProxyPtr input = network->getInput(0); |
| const char* name = input->getName(); |
| VLOG(2) << "Defining profile for input " << name; |
| shape_vec[0] = shape; |
| if (this->param_.n_inputs == 2) { |
| // The shape of the shape tensor. |
| TF_CHECK_OK(TensorShapeUtils::MakeShape( |
| std::vector<int32>{param_.dims.nbDims}, &shape)); |
| shape_vec[1] = shape; |
| // Values of the shape tensor |
| Tensor shape_tensor(DT_INT32, shape); |
| // Define shape values {1, i}, where 1 is the value of the first dim, |
| // and i is the value of the second dimension. |
| std::vector<int32> vals{1, i}; |
| std::copy_n(vals.data(), vals.size(), |
| shape_tensor.flat<int32_t>().data()); |
| DataVec shape_values{{"one", {}}, {"two", shape_tensor}}; |
| TF_CHECK_OK(profile.CollectShapeValues(shape_values)); |
| } else { |
| TF_CHECK_OK(profile.CollectShapeValues({{"one", {}}})); |
| } |
| profile.AddShape(shape_vec); |
| } |
| std::vector<PartialTensorShape> input_partial_shapes; |
| TF_CHECK_OK(GetNetworkInputShapes(network.get(), &input_partial_shapes)); |
| profile.InitProfiles(input_partial_shapes, |
| ProfileStrategy::kImplicitBatchModeCompatible); |
| // Configure and build engine |
| TF_CHECK_OK(profile.ConfigureBuilder(builder.get(), builder_config.get(), |
| network.get())); |
| } |
| VLOG(2) << "ConfigureBuilder Finished"; |
| TrtUniquePtrType<nvinfer1::ICudaEngine> engine( |
| builder->buildEngineWithConfig(*network, *builder_config)); |
| VLOG(2) << "Engine constructed"; |
| EXPECT_NE(nullptr, engine); |
| return engine; |
| } |
| Logger& logger_ = *Logger::GetLogger(); |
| TestParam param_; |
| }; |
| |
| #if IS_TRT_VERSION_GE(7, 1, 3, 0) |
| constexpr std::array<TestParam, 3> TestParameters = { |
| TestParam{nvinfer1::Dims{1, {1}}, false, 1}, |
| TestParam{nvinfer1::Dims{1, {1}}, true, 1}, |
| TestParam{nvinfer1::Dims{2, {3, 3}}, true, 2}}; |
| #else |
| constexpr std::array<TestParam, 2> TestParameters = { |
| TestParam{nvinfer1::Dims{1, {1}}, false, 1}, |
| TestParam{nvinfer1::Dims{1, {1}}, true, 1}}; |
| #endif |
| |
| INSTANTIATE_TEST_CASE_P(EngineResourceOpsTestInstantiation, |
| TRTEngineResourceOpsTest, |
| ::testing::ValuesIn(TestParameters)); |
| |
| TEST_P(TRTEngineResourceOpsTest, Basic) { |
| // Create the GPU device. |
| std::unique_ptr<Device> device( |
| DeviceFactory::NewDevice("GPU", {}, "/job:worker/replica:0/task:0")); |
| ResourceMgr* rm = device->resource_manager(); |
| SetDevice(DEVICE_GPU, std::move(device)); |
| |
| // Create a resource handle. |
| const string container(kTfTrtContainerName); |
| const string resource_name = "myresource"; |
| Reset(); |
| TF_ASSERT_OK(NodeDefBuilder("op", "CreateTRTResourceHandle") |
| .Attr("resource_name", resource_name) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| TF_ASSERT_OK(RunOpKernel()); |
| ResourceHandle handle = |
| context_->mutable_output(0)->scalar<ResourceHandle>()(); |
| |
| // Check that a resource hasn't been created yet. |
| TRTEngineCacheResource* resource = nullptr; |
| EXPECT_TRUE( |
| errors::IsNotFound(rm->Lookup(container, resource_name, &resource))); |
| |
| // Create a resource and use an empty file to initialize the resource. |
| Reset(); |
| Env* env = Env::Default(); |
| const string filename = io::JoinPath(testing::TmpDir(), "trt_engine_file"); |
| { |
| std::unique_ptr<WritableFile> file; |
| TF_ASSERT_OK(env->NewWritableFile(filename, &file)); |
| } |
| TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource") |
| .Input(FakeInput(DT_RESOURCE)) |
| .Input(FakeInput(DT_STRING)) |
| .Attr("max_cached_engines_count", 1) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| AddInputFromArray<ResourceHandle>(TensorShape({}), {handle}); |
| AddInputFromArray<tstring>(TensorShape({}), {filename}); |
| TF_ASSERT_OK(RunOpKernel()); |
| |
| // Check that the resource is registered with the resource manager and the |
| // cache of the resource is empty. |
| EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok()); |
| EXPECT_EQ(0, resource->cache_.size()); |
| |
| // Create an engine and add it to the cache of the resource. |
| TrtUniquePtrType<nvinfer1::ICudaEngine> engine = CreateTRTEngine(); |
| ExecutionContext context = ExecutionContext::Create(engine.get()); |
| |
| std::vector<TensorShape> engine_input_shape(1); |
| TF_ASSERT_OK(DimsAdapter(param_.dims).TensorShape(&(engine_input_shape[0]))); |
| if (param_.n_inputs > 1) { |
| engine_input_shape.push_back(TensorShape({1, 1})); |
| } |
| resource->cache_.emplace( |
| engine_input_shape, |
| std::make_unique<EngineContext>(std::move(engine), std::move(context))); |
| // Check that the resource has multiple references before it is unregistered |
| // from the resource manager. |
| EXPECT_FALSE(resource->RefCountIsOne()); |
| |
| // Serialize the engine to a file and unregistered the resource from the |
| // resource manager. |
| Reset(); |
| TF_ASSERT_OK(NodeDefBuilder("op", "SerializeTRTResource") |
| .Attr("delete_resource", true) |
| .Input(FakeInput(DT_STRING)) |
| .Input(FakeInput(DT_STRING)) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| AddInputFromArray<tstring>(TensorShape({}), {resource_name}); |
| AddInputFromArray<tstring>(TensorShape({}), {filename}); |
| TF_ASSERT_OK(RunOpKernel()); |
| // Check that the resource now has only one reference. Detach the reference |
| // to the resource to destroy the resource. |
| EXPECT_TRUE(resource->RefCountIsOne()); |
| resource->Unref(); |
| |
| // Check that unregistering the resource from the resource manager returns |
| // an error as the resource has already been unregistered. |
| Reset(); |
| TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp") |
| .Attr("ignore_lookup_error", false) |
| .Input(FakeInput(DT_RESOURCE)) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| AddInputFromArray<ResourceHandle>(TensorShape({}), {handle}); |
| EXPECT_TRUE(errors::IsNotFound(RunOpKernel())); |
| |
| // Verify the file for the serialized engine. |
| std::unique_ptr<RandomAccessFile> file; |
| TF_ASSERT_OK(env->NewRandomAccessFile(filename, &file)); |
| auto reader = std::make_unique<io::RecordReader>(file.get()); |
| uint64 offset = 0; |
| tstring record; |
| TF_ASSERT_OK(reader->ReadRecord(&offset, &record)); |
| TRTEngineInstance engine_instance; |
| engine_instance.ParseFromString(record); |
| EXPECT_EQ(param_.n_inputs, engine_instance.input_shapes_size()); |
| EXPECT_EQ(param_.dims.nbDims, engine_instance.input_shapes(0).dim_size()); |
| for (int i = 0; i < param_.dims.nbDims; i++) { |
| EXPECT_EQ(param_.dims.d[i], engine_instance.input_shapes(0).dim(i).size()); |
| } |
| EXPECT_TRUE(errors::IsOutOfRange(reader->ReadRecord(&offset, &record))); |
| |
| // Recreate the resource and use the file with the serialized engine to |
| // initialize the resource. |
| Reset(); |
| TF_ASSERT_OK(NodeDefBuilder("op", "InitializeTRTResource") |
| .Input(FakeInput(DT_RESOURCE)) |
| .Input(FakeInput(DT_STRING)) |
| .Attr("max_cached_engines_count", 1) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| AddInputFromArray<ResourceHandle>(TensorShape({}), {handle}); |
| AddInputFromArray<tstring>(TensorShape({}), {filename}); |
| TF_ASSERT_OK(RunOpKernel()); |
| |
| // Check that the resource is registered with the resource manager again and |
| // the cache of the resource is not empty. |
| EXPECT_TRUE(rm->Lookup(container, resource_name, &resource).ok()); |
| EXPECT_EQ(1, resource->cache_.size()); |
| if (this->param_.dynamic_shape) { |
| EXPECT_EQ(3, resource->profiles_.GetNumProfiles()); |
| EXPECT_EQ(3, resource->cache_.begin()->second->GetNumContexts()); |
| |
| if (this->param_.n_inputs == 1) { |
| // Check if profiles are restored correctly. |
| std::vector<TensorShape> shapes(1); |
| // We create a shape vector that matches only profile 1. |
| TF_CHECK_OK( |
| TensorShapeUtils::MakeShape(std::vector<int32>{6}, &shapes[0])); |
| EXPECT_EQ(1, resource->profiles_.GetProfileNumber(shapes)); |
| } else { |
| // Check if shape values are restored corretly. |
| std::vector<TensorShape> shapes(2); |
| // We create a shape vector that matches only profile 2. |
| TF_CHECK_OK( |
| TensorShapeUtils::MakeShape(std::vector<int32>{9, 9}, &shapes[0])); |
| TF_CHECK_OK( |
| TensorShapeUtils::MakeShape(std::vector<int32>{2}, &shapes[1])); |
| Tensor shape_tensor(DT_INT32, shapes[1]); |
| std::vector<int32> vals{1, 3}; |
| std::copy_n(vals.data(), vals.size(), |
| shape_tensor.flat<int32_t>().data()); |
| // DataVec names are not in used CollectShapeValues, only the order |
| // matters. |
| DataVec shape_values{{"one", {}}, {"two", shape_tensor}}; |
| TF_CHECK_OK(resource->profiles_.CollectShapeValues(shape_values)); |
| EXPECT_EQ(2, resource->profiles_.GetProfileNumber(shapes)); |
| } |
| } |
| // Check that the resource has multiple references before it is unregistered |
| // from the resource manager. |
| EXPECT_FALSE(resource->RefCountIsOne()); |
| |
| // Unregister the resource from the resource manager two times, expect that |
| // the second time produces an error. |
| Reset(); |
| TF_ASSERT_OK(NodeDefBuilder("op", "DestroyResourceOp") |
| .Attr("ignore_lookup_error", false) |
| .Input(FakeInput(DT_RESOURCE)) |
| .Finalize(node_def())); |
| TF_ASSERT_OK(InitOp()); |
| AddInputFromArray<ResourceHandle>(TensorShape({}), {handle}); |
| TF_ASSERT_OK(RunOpKernel()); |
| EXPECT_TRUE(errors::IsNotFound(RunOpKernel())); |
| |
| // Check that the resource now has only one reference. Detach the reference |
| // to the resource to destroy resource. |
| EXPECT_TRUE(resource->RefCountIsOne()); |
| resource->Unref(); |
| } |
| |
| } // namespace tensorrt |
| } // namespace tensorflow |
| |
| #endif // GOOGLE_CUDA && GOOGLE_TENSORRT |