Disable RedzoneAllocator extra memory when xla_gpu_autotune_level < 4.
Currently XLA GPU Autotune allocates extra red zone memory even when red zone is not checked. This PR disables the extra memory allocation.
PiperOrigin-RevId: 392949173
Change-Id: I9b76b1c23aa01608ffe6a4437ac691a2949d85f1
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
index b423b16..11d9e13 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
@@ -72,10 +72,13 @@
const bool reinit_cublas_data = cublas_autotune_level >= 3;
const bool check_cublas = cublas_autotune_level >= 4;
+ const int64_t redzone_size =
+ check_cublas ? se::RedzoneAllocator::kDefaultRedzoneSize : 0;
se::RedzoneAllocator input_output_allocator(
stream, allocator,
PtxOptsFromDebugOptions(hlo_module_config.debug_options()),
- /*memory_limit=*/std::numeric_limits<int64_t>::max());
+ /*memory_limit=*/std::numeric_limits<int64_t>::max(),
+ /*redzone_size=*/redzone_size);
BufferComparator comparator(gemm->shape(), hlo_module_config);
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
index 351a62f..ce13a4c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
@@ -365,9 +365,13 @@
};
// Allocate space for the input, filter, and output of the convolution.
+ const int64_t redzone_size =
+ check_conv ? se::RedzoneAllocator::kDefaultRedzoneSize : 0;
se::RedzoneAllocator input_output_allocator(
stream, allocator,
- PtxOptsFromDebugOptions(hlo_module_config.debug_options()));
+ PtxOptsFromDebugOptions(hlo_module_config.debug_options()),
+ /*memory_limit=*/se::RedzoneAllocator::kDefaultMemoryLimit,
+ /*redzone_size=*/redzone_size);
std::vector<se::DeviceMemoryBase> operand_buffers;
for (const auto* operand : instr->operands()) {
TF_ASSIGN_OR_RETURN(auto buffer,