Add Neon path of aom_mse_16xh_16bit
Add aom_mse_16xh_16bit_neon and the corresponding unit tests.
Change-Id: Icb1ca438a93a04dab9f0073da0e8592efc3532ec
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl
index 5791f56..4027289 100755
--- a/aom_dsp/aom_dsp_rtcd_defs.pl
+++ b/aom_dsp/aom_dsp_rtcd_defs.pl
@@ -1379,7 +1379,7 @@
specialize qw/aom_mse_wxh_16bit sse2 avx2 neon/;
add_proto qw/uint64_t/, "aom_mse_16xh_16bit", "uint8_t *dst, int dstride,uint16_t *src, int w, int h";
- specialize qw/aom_mse_16xh_16bit sse2 avx2/;
+ specialize qw/aom_mse_16xh_16bit sse2 avx2 neon/;
foreach (@encoder_block_sizes) {
($w, $h) = @$_;
diff --git a/aom_dsp/arm/variance_neon.c b/aom_dsp/arm/variance_neon.c
index ebcb5e3..357a083 100644
--- a/aom_dsp/arm/variance_neon.c
+++ b/aom_dsp/arm/variance_neon.c
@@ -454,3 +454,17 @@
return horizontal_add_s32x4(vaddq_s32(sse[0], sse[1]));
}
+
+uint64_t aom_mse_16xh_16bit_neon(uint8_t *dst, int dstride, uint16_t *src,
+ int w, int h) {
+ uint64x2_t sum = vdupq_n_u64(0);
+
+ int num_blks = 16 / w;
+ do {
+ sum += mse_wxh_16bit(dst, dstride, src, w, w, h);
+ dst += w;
+ src += w * h;
+ } while (--num_blks != 0);
+
+ return horizontal_add_u64x2(sum);
+}
diff --git a/test/variance_test.cc b/test/variance_test.cc
index 722bec7..c1963e0 100644
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -3283,6 +3283,13 @@
MseWxHParams(2, 3, &aom_mse_wxh_16bit_neon, 8),
MseWxHParams(2, 2, &aom_mse_wxh_16bit_neon, 8)));
+INSTANTIATE_TEST_SUITE_P(
+ NEON, Mse16xHTest,
+ ::testing::Values(Mse16xHParams(3, 3, &aom_mse_16xh_16bit_neon, 8),
+ Mse16xHParams(3, 2, &aom_mse_16xh_16bit_neon, 8),
+ Mse16xHParams(2, 3, &aom_mse_16xh_16bit_neon, 8),
+ Mse16xHParams(2, 2, &aom_mse_16xh_16bit_neon, 8)));
+
INSTANTIATE_TEST_SUITE_P(NEON, SumOfSquaresTest,
::testing::Values(aom_get_mb_ss_neon));