[NEON] Remove implicit type promotion in `Vectorized<c10::Half>::operator!=` (#123864) To make code compilable with `gcc`, which `clang` does not allow transparent type promotion between vectorized NEON types of the same sizes, see https://godbolt.org/z/xoasoGM81 as an example Pull Request resolved: https://github.com/pytorch/pytorch/pull/123864 Approved by: https://github.com/malfet

commit: f82d20c207a5fdbc48263734ccc82db6b4ad24c0 [log] [tgz]
author: Rohan <rohanjms@amazon.com> Thu Apr 11 19:37:11 2024 +0000
committer: PyTorch MergeBot <pytorchmergebot@users.noreply.github.com> Thu Apr 11 19:37:11 2024 +0000
tree: 7fb6d55e4cb07f95ca7c84d91de3289fb8c16c41
parent: 5a7fd20aa11c02a10b1223caeefb645de482bcbc [diff]
diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_half_neon.h b/aten/src/ATen/cpu/vec/vec256/vec256_half_neon.h
index bb8716e..aaf1d599 100644
--- a/aten/src/ATen/cpu/vec/vec256/vec256_half_neon.h
+++ b/aten/src/ATen/cpu/vec/vec256/vec256_half_neon.h

@@ -565,9 +565,9 @@
   }
 
   Vectorized<c10::Half> operator!=(const Vectorized<c10::Half>& other) const {
-    float32x4_t r0 = vreinterpretq_f16_u16(
+    float16x8_t r0 = vreinterpretq_f16_u16(
         vmvnq_u16(vceqq_f16(values.val[0], other.values.val[0])));
-    float32x4_t r1 = vreinterpretq_f16_u16(
+    float16x8_t r1 = vreinterpretq_f16_u16(
         vmvnq_u16(vceqq_f16(values.val[1], other.values.val[1])));
     return Vectorized<c10::Half>(r0, r1);
   }
commit	f82d20c207a5fdbc48263734ccc82db6b4ad24c0	[log] [tgz]
author	Rohan <rohanjms@amazon.com>	Thu Apr 11 19:37:11 2024 +0000
committer	PyTorch MergeBot <pytorchmergebot@users.noreply.github.com>	Thu Apr 11 19:37:11 2024 +0000
tree	7fb6d55e4cb07f95ca7c84d91de3289fb8c16c41
parent	5a7fd20aa11c02a10b1223caeefb645de482bcbc [diff]