Move TensorRT builder configs to converter build function
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
index c800e50..0dca101 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
@@ -1369,13 +1369,33 @@
}
Status Converter::BuildCudaEngine(
- TrtUniquePtrType<nvinfer1::ICudaEngine>* engine) {
- VLOG(1) << "Starting engine creation";
+ TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
+ int max_batch_size, size_t max_workspace_size_bytes,
+ nvinfer1::IGpuAllocator* allocator, TRTInt8Calibrator* calibrator) {
+ VLOG(1) << "Configuring TensorRT builder";
+ trt_builder_->setMaxBatchSize(max_batch_size);
+ trt_builder_->setMaxWorkspaceSize(max_workspace_size_bytes);
+ trt_builder_->setGpuAllocator(allocator);
+ if (precision_mode_ == TrtPrecisionMode::FP16) {
+ trt_builder_->setFp16Mode(true);
+ } else if (precision_mode_ == TrtPrecisionMode::INT8) {
+ // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
+ // use them in situations where they are faster than INT8 or where INT8 is
+ // not supported for a given layer.
+ trt_builder_->setFp16Mode(true);
+ trt_builder_->setInt8Mode(true);
+ if (use_calibration_) {
+ trt_builder_->setInt8Calibrator(calibrator);
+ } else {
+ trt_builder_->setInt8Calibrator(nullptr);
+ }
+ }
+
+ VLOG(1) << "Building TensorRT engine";
engine->reset(trt_builder_->buildCudaEngine(*network()));
if (engine->get() == nullptr) {
return errors::Internal("Failed to build TensorRT engine");
}
- VLOG(1) << "Finished conversion";
return Status::OK();
}
@@ -5620,37 +5640,17 @@
engine->reset();
if (convert_successfully) *convert_successfully = false;
- // Create the builder.
+ VLOG(1) << "Creating TensorRT builder";
TrtUniquePtrType<nvinfer1::IBuilder> builder(
nvinfer1::createInferBuilder(*trt_logger));
- builder->setMaxBatchSize(max_batch_size);
- builder->setMaxWorkspaceSize(max_workspace_size_bytes);
- builder->setGpuAllocator(allocator);
- if (precision_mode == TrtPrecisionMode::FP16) {
- builder->setFp16Mode(true);
- } else if (precision_mode == TrtPrecisionMode::INT8) {
- // Setting FP16 mode as well allows TRT to also consider FP16 kernels and
- // use them in situations where they are faster than INT8 or where INT8 is
- // not supported for a given layer.
- builder->setFp16Mode(true);
- builder->setInt8Mode(true);
- if (use_calibration) {
- builder->setInt8Calibrator(calibrator);
- } else {
- builder->setInt8Calibrator(nullptr);
- }
- }
- // Build the network
- if (VLOG_IS_ON(1)) {
- string mode_str;
- TF_RETURN_IF_ERROR(TrtPrecisionModeToName(precision_mode, &mode_str));
- VLOG(1) << "Starting engine conversion, precision mode: " << mode_str;
- }
+ VLOG(1) << "Creating converter and TensorRT network";
auto statusor = Converter::Create(builder.get(), precision_mode,
use_calibration, trt_logger);
TF_RETURN_IF_ERROR(statusor.status());
auto converter = std::move(statusor.ValueOrDie());
+
+ VLOG(1) << "Starting to convert TensorFlow ops to TensorRT layers";
std::vector<Converter::EngineOutputInfo> output_tensors;
// Graph nodes are already topologically sorted during construction
for (const auto& node_def : gdef.node()) {
@@ -5737,7 +5737,10 @@
converter->MaybeApplyQuantizationRanges();
// Build the engine.
- TF_RETURN_IF_ERROR(converter->BuildCudaEngine(engine));
+ TF_RETURN_IF_ERROR(converter->BuildCudaEngine(
+ engine, max_batch_size, max_workspace_size_bytes, allocator, calibrator));
+
+ VLOG(1) << "Finished conversion";
return Status::OK();
}
diff --git a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
index 0009939..b3dc373 100644
--- a/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
+++ b/tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h
@@ -467,7 +467,10 @@
const std::vector<EngineOutputInfo>& output_tensors);
// Build a TRT engine using the created network.
- Status BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine>* engine);
+ Status BuildCudaEngine(TrtUniquePtrType<nvinfer1::ICudaEngine>* engine,
+ int max_batch_size, size_t max_workspace_size_bytes,
+ nvinfer1::IGpuAllocator* allocator,
+ TRTInt8Calibrator* calibrator);
//////////////////////////////////////////////////////////////////////////////
// Methods used by op converters to convert individual TF node and add layers