[MLIR][KernelGen] Add experimental JIT-compiled min and max kernels for i8, ui16, ui32, and ui64 on GPU
PiperOrigin-RevId: 404632894
Change-Id: I877348b44680f7518b174ed5cb367fd1611ff7e7
diff --git a/tensorflow/core/kernels/mlir_generated/BUILD b/tensorflow/core/kernels/mlir_generated/BUILD
index c7e17d6..8b4e9f4 100644
--- a/tensorflow/core/kernels/mlir_generated/BUILD
+++ b/tensorflow/core/kernels/mlir_generated/BUILD
@@ -1207,6 +1207,12 @@
[
gpu_kernel_library(
name = "gpu_" + op + "_kernels",
+ jit_types = [
+ "i8",
+ "ui16",
+ "ui32",
+ "ui64",
+ ],
op = op,
tile_size = "1024",
types = [
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
index 013d1dc..8289091 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_binary_ops_test.cc
@@ -826,6 +826,24 @@
baseline_maximum,
test::OpsTestConfig().ExpectStrictlyEqual())
+/// Test the experimental JIT-compiled kernels.
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+ defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/Int8, int8_t, int8_t,
+ baseline_maximum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/UInt16, uint16_t, uint16_t,
+ baseline_maximum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/UInt32, uint32_t, uint32_t,
+ baseline_maximum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS(Maximum, /*test_name=*/UInt64, uint64_t, uint64_t,
+ baseline_maximum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+#endif
+
/// Test `tf.Minmum`.
template <typename T>
@@ -852,6 +870,24 @@
baseline_minimum,
test::OpsTestConfig().ExpectStrictlyEqual())
+/// Test the experimental JIT-compiled kernels.
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+ defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/Int8, int8_t, int8_t,
+ baseline_minimum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/UInt16, uint16_t, uint16_t,
+ baseline_minimum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/UInt32, uint32_t, uint32_t,
+ baseline_minimum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS(Minimum, /*test_name=*/UInt64, uint64_t, uint64_t,
+ baseline_minimum,
+ test::OpsTestConfig().ExpectStrictlyEqual())
+#endif
+
/// Test `tf.Mul`.
template <typename T>
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
index 87b7081..93ca6c9 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_maximum.cc
@@ -26,4 +26,12 @@
GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_INT64);
GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_UINT8);
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_INT8);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_UINT16);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_UINT32);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Maximum, DT_UINT64);
+#endif
+
} // namespace tensorflow
diff --git a/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc b/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
index d6336ce..ec9eff6 100644
--- a/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
+++ b/tensorflow/core/kernels/mlir_generated/gpu_op_minimum.cc
@@ -26,4 +26,12 @@
GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_INT64);
GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_UINT8);
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_INT8);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_UINT16);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_UINT32);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Minimum, DT_UINT64);
+#endif
+
} // namespace tensorflow