Enable FloorMod JIT compiled GPU kernels by default.

Also fix a race condition in tf_jit_cache.cc.

PiperOrigin-RevId: 421821420
Change-Id: Id442562f8640d8c4e82fb176de04a7dffc5cbc18
diff --git a/tensorflow/compiler/mlir/tools/kernel_gen/tf_jit_cache.cc b/tensorflow/compiler/mlir/tools/kernel_gen/tf_jit_cache.cc
index 9dc324d..02b95c0 100644
--- a/tensorflow/compiler/mlir/tools/kernel_gen/tf_jit_cache.cc
+++ b/tensorflow/compiler/mlir/tools/kernel_gen/tf_jit_cache.cc
@@ -48,10 +48,10 @@
   // Insert the compiled module into our cache and return a raw pointer.
   {
     tensorflow::mutex_lock lock(mu_);
-    // Check again whether we already have a compiled module in the cache. It
-    // may have been added during the time we ran compile_callback().
-    return execution_engine_by_key_.try_emplace(code, std::move(engine.get()))
-        .first->second.get();
+    assert(!execution_engine_by_key_.contains(code) &&
+           "Cache must not contain key if JIT compilation is triggered.");
+    execution_engine_by_key_[code] = std::move(engine.get());
+    return execution_engine_by_key_[code].get();
   }
 }
 
diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD
index 856086d..6053475 100644
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@@ -244,7 +244,6 @@
         "gpu_op_div_no_nan.cc",
         "gpu_op_equal.cc",
         "gpu_op_floor_div.cc",
-        "gpu_op_floor_mod.cc",
         "gpu_op_greater.cc",
         "gpu_op_greater_equal.cc",
         "gpu_op_left_shift.cc",
@@ -267,6 +266,8 @@
         "gpu_op_xlog1py.cc",
         "gpu_op_xlogy.cc",
         "gpu_op_zeta.cc",
+    ]) + if_mlir_generated_experimental_kernels_enabled([
+        "gpu_op_floor_mod.cc",
     ]),
     copts = if_mlir_generated_experimental_kernels_enabled([
         "-DMLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED",
@@ -285,7 +286,6 @@
         ":gpu_div_no_nan_kernels",
         ":gpu_equal_kernels",
         ":gpu_floor_div_kernels",
-        ":gpu_floor_mod_kernels",
         ":gpu_greater_equal_kernels",
         ":gpu_greater_kernels",
         ":gpu_left_shift_kernels",
@@ -309,7 +309,7 @@
         ":gpu_xlogy_kernels",
         ":gpu_zeta_kernels",
         "//third_party/eigen3",
-    ]),
+    ]) + if_mlir_generated_experimental_kernels_enabled([":gpu_floor_mod_kernels"]),
 )
 
 tf_kernel_library(
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
index 445aed2..33556b4 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
@@ -628,7 +628,8 @@
 }
 
 /// Test the JIT-compiled kernels.
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     FloorMod,
     /*test_name=*/Int8, int8_t, int8_t, test::DefaultInput<int8_t>(),