Use veqq_s32 for comparing int32x4.

PiperOrigin-RevId: 274589060
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 3b2e1d8..3da37cb 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -1620,11 +1620,11 @@
   int v = 0;
   for (; v < postamble_start; v += kInt8ValuesPerNeonVector) {
     const int32x4_t i_x4_int32 = vreinterpretq_s32_s8(vld1q_s8(vector + v));
-    const uint32x4_t cmp_result = vceqq_s8(i_x4_int32, zero_x4_int32);
-    if (vgetq_lane_u8(cmp_result, 0) == 0) return false;
-    if (vgetq_lane_u8(cmp_result, 1) == 0) return false;
-    if (vgetq_lane_u8(cmp_result, 2) == 0) return false;
-    if (vgetq_lane_u8(cmp_result, 3) == 0) return false;
+    const uint32x4_t cmp_result = vceqq_s32(i_x4_int32, zero_x4_int32);
+    if (vgetq_lane_u32(cmp_result, 0) == 0) return false;
+    if (vgetq_lane_u32(cmp_result, 1) == 0) return false;
+    if (vgetq_lane_u32(cmp_result, 2) == 0) return false;
+    if (vgetq_lane_u32(cmp_result, 3) == 0) return false;
   }
   // Postamble loop
   for (; v < v_size; ++v) {