Use veqq_s32 for comparing int32x4.
PiperOrigin-RevId: 274589060
diff --git a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
index 3b2e1d8..3da37cb 100644
--- a/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
+++ b/tensorflow/lite/kernels/internal/optimized/neon_tensor_utils.cc
@@ -1620,11 +1620,11 @@
int v = 0;
for (; v < postamble_start; v += kInt8ValuesPerNeonVector) {
const int32x4_t i_x4_int32 = vreinterpretq_s32_s8(vld1q_s8(vector + v));
- const uint32x4_t cmp_result = vceqq_s8(i_x4_int32, zero_x4_int32);
- if (vgetq_lane_u8(cmp_result, 0) == 0) return false;
- if (vgetq_lane_u8(cmp_result, 1) == 0) return false;
- if (vgetq_lane_u8(cmp_result, 2) == 0) return false;
- if (vgetq_lane_u8(cmp_result, 3) == 0) return false;
+ const uint32x4_t cmp_result = vceqq_s32(i_x4_int32, zero_x4_int32);
+ if (vgetq_lane_u32(cmp_result, 0) == 0) return false;
+ if (vgetq_lane_u32(cmp_result, 1) == 0) return false;
+ if (vgetq_lane_u32(cmp_result, 2) == 0) return false;
+ if (vgetq_lane_u32(cmp_result, 3) == 0) return false;
}
// Postamble loop
for (; v < v_size; ++v) {