[XLA:GPU] Adjust launch_dimension.hlo test.
- Change op from `tanh` to `round-nearest-even` in preparation to enable `tanh` fusion rewrite (but not `round-nearest-even` because it's unsupported, see cl/464576491).
- Make lit check statements less brittle.
PiperOrigin-RevId: 464967659
diff --git a/tensorflow/compiler/xla/service/gpu/tests/launch_dimensions.hlo b/tensorflow/compiler/xla/service/gpu/tests/launch_dimensions.hlo
index 85d358a..b417994 100644
--- a/tensorflow/compiler/xla/service/gpu/tests/launch_dimensions.hlo
+++ b/tensorflow/compiler/xla/service/gpu/tests/launch_dimensions.hlo
@@ -1,41 +1,47 @@
// RUN: hlo_to_llvm_ir %s | FileCheck %s
-// This test that we do not increase the grid launch size when
+// This tests that we do not increase the grid launch size when
// few_waves is enabled.
// CHECK-LABEL: entry:
-// CHECK: !2 = !{i32 0, i32 2}
-// CHECK: !3 = !{i32 0, i32 256}
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
+// CHECK-DAG: ![[ctaid_range]] = !{i32 0, i32 2}
+// CHECK-DAG: ![[tid_range]] = !{i32 0, i32 256}
HloModule Test
ENTRY main {
a = f32[100, 20]{1,0} parameter(0)
- ROOT b = f32[100, 20]{1,0} tanh(a)
+ ROOT b = f32[100, 20]{1,0} round-nearest-even(a)
}
// -----
-// This test that we cap grid launch code when few_waves is enabled.
+// This tests that we cap grid launch code when few_waves is enabled.
// CHECK-LABEL: entry:
-// CHECK: !2 = !{i32 0, i32 1280}
-// CHECK: !3 = !{i32 0, i32 128}
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
+// CHECK-DAG: ![[ctaid_range]] = !{i32 0, i32 1280}
+// CHECK-DAG: ![[tid_range]] = !{i32 0, i32 128}
HloModule Test
ENTRY main {
a = f32[10000, 10000]{1,0} parameter(0)
- ROOT b = f32[10000, 10000]{1,0} tanh(a)
+ ROOT b = f32[10000, 10000]{1,0} round-nearest-even(a)
}
// -----
-// This test that we cap grid launch code when few_waves is enabled
+// This tests that we cap grid launch code when few_waves is enabled
// and scalar broadcast are present.
// CHECK-LABEL: entry:
-// CHECK: !2 = !{i32 0, i32 1280}
-// CHECK: !3 = !{i32 0, i32 128}
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
+// CHECK-DAG: ![[ctaid_range]] = !{i32 0, i32 1280}
+// CHECK-DAG: ![[tid_range]] = !{i32 0, i32 128}
HloModule ScalarBroadcast
@@ -55,12 +61,14 @@
// -----
-// This test the GELU kernel. The original kernel that
+// This tests the GELU kernel. The original kernel that
// motivated few_waves implementation.
// CHECK-LABEL: entry:
-// CHECK: !2 = !{i32 0, i32 1280}
-// CHECK: !3 = !{i32 0, i32 128}
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x(), !range ![[ctaid_range:[0-9]+]]
+// CHECK-DAG: call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range ![[tid_range:[0-9]+]]
+// CHECK-DAG: ![[ctaid_range]] = !{i32 0, i32 1280}
+// CHECK-DAG: ![[tid_range]] = !{i32 0, i32 128}
HloModule Test