Report errors from individual GPU conv autotuning failures. PiperOrigin-RevId: 403201665 Change-Id: Id0325181cafc42542e0c4b76bf1e4f187ebdc37f

commit: cd6d20bb8f40e11f80bd27db3f954e69764a3bd8 [log] [tgz]
author: A. Unique TensorFlower <gardener@tensorflow.org> Thu Oct 14 15:27:13 2021 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> Thu Oct 14 15:31:52 2021 -0700
tree: 3c625a7dd8e98ae2effc2a86caaff72f0331a119
parent: 30eeb202e35276e3642fb01869021f1409fea199 [diff]
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
index ff14470..effbaba 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_conv_algorithm_picker.cc

@@ -412,16 +412,9 @@
                      alg.ToString()),
         2);
 
-    profile_results.emplace_back();
-    AutotuneResult& result = profile_results.back();
-    result.mutable_conv()->set_algorithm(alg.algo_id());
-    result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());
-
     if (absl::c_linear_search(disabled_algos, alg)) {
       LOG(INFO) << "Omitted potentially buggy algorithm " << alg.ToString()
                 << " for conv " << instr->ToString();
-      result.mutable_failure()->set_kind(AutotuneResult::UNKNOWN);
-      result.mutable_failure()->set_msg("Disqualified for being known-buggy.");
       continue;
     }
 
@@ -432,8 +425,6 @@
     if (kind == CudnnConvKind::kForwardActivation &&
         backend_config.activation_mode() == se::dnn::ActivationMode::kNone &&
         alg.algo_id() != CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM) {
-      result.mutable_failure()->set_kind(AutotuneResult::UNKNOWN);
-      result.mutable_failure()->set_msg("Disqualified for implicit RELU.");
       continue;
     }
 
@@ -454,22 +445,19 @@
 
     if (!launch_status.ok()) {
       VLOG(4) << "Launch failed: " << launch_status;
-      result.mutable_failure()->set_kind(AutotuneResult::UNKNOWN);
-      result.mutable_failure()->set_msg(
-          absl::StrCat("Profiling failure on cuDNN engine ", alg.ToString(),
-                       ": ", launch_status.ToString()));
       continue;
     }
 
     if (!profile_result.is_valid()) {
       VLOG(4) << "Launch succeeded but profile result is invalid.";
-      result.mutable_failure()->set_kind(AutotuneResult::UNKNOWN);
-      result.mutable_failure()->set_msg(absl::StrCat(
-          "Launch succeeded but profile result is invalid, with cuDNN engine ",
-          alg.ToString(), ": ", launch_status.ToString()));
       continue;
     }
 
+    profile_results.emplace_back();
+    AutotuneResult& result = profile_results.back();
+    result.mutable_conv()->set_algorithm(alg.algo_id());
+    result.mutable_conv()->set_tensor_ops_enabled(alg.tensor_ops_enabled());
+
     int64_t scratch_bytes_used =
         scratch_allocator.TotalAllocatedBytesExcludingRedzones();
     result.set_scratch_bytes(scratch_bytes_used);

diff --git a/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc b/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc
index 2525f79..ad7ab92 100644
--- a/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc
+++ b/tensorflow/compiler/xla/service/gpu/stream_executor_util.cc

@@ -526,13 +526,10 @@
       });
 
   if (filtered_results.empty()) {
-    std::ostringstream msg;
-    msg << "All algorithms tried for " << instr.ToString()
-        << " failed. Falling back to default algorithm.  Per-algorithm errors:";
-    for (const auto& result : profile_results) {
-      msg << "\n  " << result.failure().msg();
-    }
-    return InternalError("%s", msg.str());
+    return InternalError(
+        "All algorithms tried for %s failed. Falling back to "
+        "default algorithm. ",
+        instr.ToString());
   }
 
   auto selected_result = filtered_results.begin();
commit	cd6d20bb8f40e11f80bd27db3f954e69764a3bd8	[log] [tgz]
author	A. Unique TensorFlower <gardener@tensorflow.org>	Thu Oct 14 15:27:13 2021 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	Thu Oct 14 15:31:52 2021 -0700
tree	3c625a7dd8e98ae2effc2a86caaff72f0331a119
parent	30eeb202e35276e3642fb01869021f1409fea199 [diff]