Removed TENSOR_OP disable env vars.
* TF_DISABLE_CUBLAS_TENSOR_OP_MATH
* TF_DISABLE_CUDNN_TENSOR_OP_MATH
* TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH
diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc
index c9f0fc4..65c07e7 100644
--- a/tensorflow/stream_executor/cuda/cuda_blas.cc
+++ b/tensorflow/stream_executor/cuda/cuda_blas.cc
@@ -101,18 +101,6 @@
}
}
-// Decide whether to enable TENSOR_OP_MATH
-static bool TensorOpMathEnabled() {
- static bool is_enabled = [] {
- bool is_disabled;
- TF_CHECK_OK(
- tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUBLAS_TENSOR_OP_MATH",
- /*default_val=*/false, &is_disabled));
- return !is_disabled;
- }();
- return is_enabled;
-}
-
// cuBLAS has interfaces that permit pointers to be passed from either the host
// memory space or the device memory space; however, you must instruct it as to
// which address space those pointers are in with cublasSetPointerMode.
@@ -1640,7 +1628,7 @@
&cc_minor);
// GPUs < sm_70 don't support tensor ops.
- if (cc_major >= 7 && TensorOpMathEnabled()) {
+ if (cc_major >= 7) {
use_tensor_ops = true;
}
#endif
@@ -1921,8 +1909,7 @@
// strictly correct. We can't simply enable it, though, as that would change
// clients' behavior significantly: Using tensor ops on fp32 inputs cause them
// to be rounded to fp16.
- if (cc_major >= 7 && TensorOpMathEnabled() &&
- std::is_same<InType, Eigen::half>::value) {
+ if (cc_major >= 7 && std::is_same<InType, Eigen::half>::value) {
return true;
}
#endif
@@ -2270,7 +2257,7 @@
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
&cc_major, &cc_minor) &&
cc_major >= 5) {
- bool use_tensor_ops = TensorOpMathEnabled() && data_type == CUDA_R_16F;
+ bool use_tensor_ops = data_type == CUDA_R_16F;
cublasGemmAlgo_t algo =
(use_tensor_ops ? CUBLAS_GEMM_DFALT_TENSOR_OP : CUBLAS_GEMM_DFALT);
cudaDataType_t compute_type =
@@ -2425,7 +2412,7 @@
if (stream->parent()->GetDeviceDescription().cuda_compute_capability(
&cc_major, &cc_minor)) {
// GPUs < sm_70 don't support tensor ops.
- if (cc_major >= 7 && TensorOpMathEnabled()) {
+ if (cc_major >= 7) {
use_tensor_ops = true;
}
#if CUDA_VERSION >= 9010
diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc
index be18c98..e46c271 100644
--- a/tensorflow/stream_executor/cuda/cuda_dnn.cc
+++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc
@@ -601,31 +601,6 @@
SE_DISALLOW_COPY_AND_ASSIGN(CudnnFilterDescriptor);
};
-// A helper function to decide whether to enable the TENSOR_OP_MATH math type
-bool TensorOpMathEnabled() {
- static bool is_enabled = [] {
- bool is_disabled = false;
- TF_CHECK_OK(
- tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_TENSOR_OP_MATH",
- /*default_val=*/false, &is_disabled));
- return !is_disabled;
- }();
- return is_enabled;
-}
-
-// A helper function to decide whether to enable the TENSOR_OP_MATH math type
-// for RNNs.
-bool RnnTensorOpMathEnabled() {
- static bool is_enabled = [] {
- bool is_disabled = false;
- TF_CHECK_OK(
- tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH",
- /*default_val=*/false, &is_disabled));
- return !is_disabled;
- }();
- return is_enabled;
-}
-
// A helper function to decide whether to use
// CUDNN_BATCHNORM_SPATIAL_PERSISTENT in batchnorm. This mode can be faster in
// some tasks because an optimized path may be selected for CUDNN_DATA_FLOAT
@@ -749,9 +724,7 @@
#if CUDNN_VERSION >= 7000
cudnnMathType_t math_type =
(use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH);
- if (TensorOpMathEnabled()) {
- CHECK_CUDNN_OK(cudnnSetConvolutionMathType(handle_.get(), math_type));
- }
+ CHECK_CUDNN_OK(cudnnSetConvolutionMathType(handle_.get(), math_type));
#endif
}
@@ -1155,21 +1128,19 @@
// in profile mode, which is run with algorithms returned from
// GetRnnAlgorithms() (which are non-default and explicitly set whether to
// use tensor ops). CuDNN 7.2.1 fixed this issue
- if (RnnTensorOpMathEnabled()) {
- cudnnMathType_t math_type;
- if (algorithm_config.algorithm().has_value()) {
- math_type = algorithm_config.algorithm()->tensor_ops_enabled()
- ? CUDNN_TENSOR_OP_MATH
- : CUDNN_DEFAULT_MATH;
- } else {
+ cudnnMathType_t math_type;
+ if (algorithm_config.algorithm().has_value()) {
+ math_type = algorithm_config.algorithm()->tensor_ops_enabled()
+ ? CUDNN_TENSOR_OP_MATH
+ : CUDNN_DEFAULT_MATH;
+ } else {
#if CUDNN_VERSION >= 7201
- math_type = CUDNN_TENSOR_OP_MATH;
+ math_type = CUDNN_TENSOR_OP_MATH;
#else
- math_type = CUDNN_DEFAULT_MATH;
+ math_type = CUDNN_DEFAULT_MATH;
#endif // CUDNN_VERSION >= 7201
- }
- CHECK_CUDNN_OK(cudnnSetRNNMatrixMathType(rnn_desc.get(), math_type));
}
+ CHECK_CUDNN_OK(cudnnSetRNNMatrixMathType(rnn_desc.get(), math_type));
#endif // CUDNN_VERSION >= 7000
return CudnnRnnDescriptor(cudnn, std::move(rnn_desc), std::move(rnn_plan),
@@ -2686,7 +2657,7 @@
}
static bool TensorOpMathAvailable(int cc_major) {
- return cc_major >= 7 && CUDNN_VERSION >= 7000 && TensorOpMathEnabled();
+ return cc_major >= 7 && CUDNN_VERSION >= 7000;
}
port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
@@ -3480,9 +3451,7 @@
for (auto i : algo_types) {
out_algorithms->push_back({i, /*use_tensor_ops=*/false});
#if CUDNN_VERSION >= 7100
- if (RnnTensorOpMathEnabled()) {
- out_algorithms->push_back({i, /*use_tensor_ops=*/true});
- }
+ out_algorithms->push_back({i, /*use_tensor_ops=*/true});
#endif
}
return true;