Disable RedzoneAllocator extra memory when xla_gpu_autotune_level < 4.

Currently XLA GPU Autotune allocates extra red zone memory even when red zone is not checked. This PR disables the extra memory allocation.

PiperOrigin-RevId: 392949173
Change-Id: I9b76b1c23aa01608ffe6a4437ac691a2949d85f1
diff --git a/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
index b423b16..11d9e13 100644
--- a/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gemm_algorithm_picker.cc
@@ -72,10 +72,13 @@
   const bool reinit_cublas_data = cublas_autotune_level >= 3;
   const bool check_cublas = cublas_autotune_level >= 4;
 
+  const int64_t redzone_size =
+      check_cublas ? se::RedzoneAllocator::kDefaultRedzoneSize : 0;
   se::RedzoneAllocator input_output_allocator(
       stream, allocator,
       PtxOptsFromDebugOptions(hlo_module_config.debug_options()),
-      /*memory_limit=*/std::numeric_limits<int64_t>::max());
+      /*memory_limit=*/std::numeric_limits<int64_t>::max(),
+      /*redzone_size=*/redzone_size);
 
   BufferComparator comparator(gemm->shape(), hlo_module_config);
 
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
index 351a62f..ce13a4c 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
@@ -365,9 +365,13 @@
   };
 
   // Allocate space for the input, filter, and output of the convolution.
+  const int64_t redzone_size =
+      check_conv ? se::RedzoneAllocator::kDefaultRedzoneSize : 0;
   se::RedzoneAllocator input_output_allocator(
       stream, allocator,
-      PtxOptsFromDebugOptions(hlo_module_config.debug_options()));
+      PtxOptsFromDebugOptions(hlo_module_config.debug_options()),
+      /*memory_limit=*/se::RedzoneAllocator::kDefaultMemoryLimit,
+      /*redzone_size=*/redzone_size);
   std::vector<se::DeviceMemoryBase> operand_buffers;
   for (const auto* operand : instr->operands()) {
     TF_ASSIGN_OR_RETURN(auto buffer,