Move TensorRT builder configs to converter build function

commit: d1f49f699f0691f349b10e4e75ade0a36b712af0 [log] [tgz]
author: Pooya Davoodi <pdavoodi@nvidia.com> Fri Nov 08 19:53:46 2019 -0800
committer: Pooya Davoodi <pdavoodi@nvidia.com> Mon Nov 18 12:50:59 2019 -0800
tree: 8fecfd08063a6a46b81ed76bda29e624777a91bb
parent: a73d7ace8e3c5a59d2325b95f3b02e225a977ff2 [diff]
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index c800e50..0dca101 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc

@@ -1369,13 +1369,33 @@
 }
 
 Status Converter::BuildCudaEngine(
-    TrtUniquePtrType<nvinfer1::ICudaEngine>* engine) {
-  VLOG(1) << "Starting engine creation";
+    TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
+    int max_batch_size, size_t max_workspace_size_bytes,
+    nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator) {
+  VLOG(1) << "Configuring TensorRT builder";
+  trt_builder_->setMaxBatchSize(max_batch_size);
+  trt_builder_->setMaxWorkspaceSize(max_workspace_size_bytes);
+  trt_builder_->setGpuAllocator(allocator);
+  if (precision_mode_ == TrtPrecisionMode::FP16) {
+    trt_builder_->setFp16Mode(true);
+  } else if (precision_mode_ == TrtPrecisionMode::INT8) {
+    // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
+    // use them in situations where they are faster than INT8 or where INT8 is
+    // not supported for a given layer.
+    trt_builder_->setFp16Mode(true);
+    trt_builder_->setInt8Mode(true);
+    if (use_calibration_) {
+      trt_builder_->setInt8Calibrator(calibrator);
+    } else {
+      trt_builder_->setInt8Calibrator(nullptr);
+    }
+  }
+
+  VLOG(1) << "Building TensorRT engine";
   engine->reset(trt_builder_->buildCudaEngine(*network()));
   if (engine->get() == nullptr) {
     return errors::Internal("Failed to build TensorRT engine");
   }
-  VLOG(1) << "Finished conversion";
   return Status::OK();
 }
 
@@ -5620,37 +5640,17 @@
   engine->reset();
   if (convert_successfully) *convert_successfully = false;
 
-  // Create the builder.
+  VLOG(1) << "Creating TensorRT builder";
   TrtUniquePtrType<nvinfer1::IBuilder> builder(
       nvinfer1::createInferBuilder(*trt_logger));
-  builder->setMaxBatchSize(max_batch_size);
-  builder->setMaxWorkspaceSize(max_workspace_size_bytes);
-  builder->setGpuAllocator(allocator);
-  if (precision_mode == TrtPrecisionMode::FP16) {
-    builder->setFp16Mode(true);
-  } else if (precision_mode == TrtPrecisionMode::INT8) {
-    // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
-    // use them in situations where they are faster than INT8 or where INT8 is
-    // not supported for a given layer.
-    builder->setFp16Mode(true);
-    builder->setInt8Mode(true);
-    if (use_calibration) {
-      builder->setInt8Calibrator(calibrator);
-    } else {
-      builder->setInt8Calibrator(nullptr);
-    }
-  }
 
-  // Build the network
-  if (VLOG_IS_ON(1)) {
-    string mode_str;
-    TF_RETURN_IF_ERROR(TrtPrecisionModeToName(precision_mode, &mode_str));
-    VLOG(1) << "Starting engine conversion, precision mode: " << mode_str;
-  }
+  VLOG(1) << "Creating converter and TensorRT network";
   auto statusor = Converter::Create(builder.get(), precision_mode,
                                     use_calibration, trt_logger);
   TF_RETURN_IF_ERROR(statusor.status());
   auto converter = std::move(statusor.ValueOrDie());
+
+  VLOG(1) << "Starting to convert TensorFlow ops to TensorRT layers";
   std::vector<Converter::EngineOutputInfo> output_tensors;
   // Graph nodes are already topologically sorted during construction
   for (const auto& node_def : gdef.node()) {
@@ -5737,7 +5737,10 @@
   converter->MaybeApplyQuantizationRanges();
 
   // Build the engine.
-  TF_RETURN_IF_ERROR(converter->BuildCudaEngine(engine));
+  TF_RETURN_IF_ERROR(converter->BuildCudaEngine(
+      engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator));
+
+  VLOG(1) << "Finished conversion";
   return Status::OK();
 }
 

diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index 0009939..b3dc373 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h

@@ -467,7 +467,10 @@
       const std::vector<EngineOutputInfo>& output_tensors);
 
   // Build a TRT engine using the created network.
-  Status BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine>* engine);
+  Status BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
+                         int max_batch_size, size_t max_workspace_size_bytes,
+                         nvinfer1::IGpuAllocator* allocator,
+                         TRTInt8Calibrator* calibrator);
 
   //////////////////////////////////////////////////////////////////////////////
   // Methods used by op converters to convert individual TF node and add layers
commit	d1f49f699f0691f349b10e4e75ade0a36b712af0	[log] [tgz]
author	Pooya Davoodi <pdavoodi@nvidia.com>	Fri Nov 08 19:53:46 2019 -0800
committer	Pooya Davoodi <pdavoodi@nvidia.com>	Mon Nov 18 12:50:59 2019 -0800
tree	8fecfd08063a6a46b81ed76bda29e624777a91bb
parent	a73d7ace8e3c5a59d2325b95f3b02e225a977ff2 [diff]