Refactor Right/LeftShift NEON function names This is for clarity and to allow the use of LeftShift from common.h in NEON code without explicit namespacing. PiperOrigin-RevId: 360999725 Change-Id: I05724e2e833ba1989cc0c594233358f7efd0ee49

commit: 8a019f00a2c43c99672bbcb9e9dad2a14b23ba0a [log] [tgz]
author: Alex Peterson <petersonab@google.com> Thu Mar 04 14:05:26 2021 -0800
committer: James Zern <jzern@google.com> Thu Mar 11 10:45:56 2021 -0800
tree: c5c916e55191818986b5fd617a294bb34d34e20c
parent: 8134ec499987c5d00dbdbafc9f95134ca630aed6 [diff]
diff --git a/src/dsp/arm/common_neon.h b/src/dsp/arm/common_neon.h
index cf444e6..61da1f8 100644
--- a/src/dsp/arm/common_neon.h
+++ b/src/dsp/arm/common_neon.h

@@ -282,17 +282,17 @@
 
 // vshXX_n_XX() requires an immediate.
 template <int shift>
-inline uint8x8_t LeftShift(const uint8x8_t vector) {
+inline uint8x8_t LeftShiftVector(const uint8x8_t vector) {
   return vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(vector), shift));
 }
 
 template <int shift>
-inline uint8x8_t RightShift(const uint8x8_t vector) {
+inline uint8x8_t RightShiftVector(const uint8x8_t vector) {
   return vreinterpret_u8_u64(vshr_n_u64(vreinterpret_u64_u8(vector), shift));
 }
 
 template <int shift>
-inline int8x8_t RightShift(const int8x8_t vector) {
+inline int8x8_t RightShiftVector(const int8x8_t vector) {
   return vreinterpret_s8_u64(vshr_n_u64(vreinterpret_u64_s8(vector), shift));
 }
 

diff --git a/src/dsp/arm/convolve_neon.cc b/src/dsp/arm/convolve_neon.cc
index 6625c9b..331bfe2 100644
--- a/src/dsp/arm/convolve_neon.cc
+++ b/src/dsp/arm/convolve_neon.cc

@@ -231,12 +231,12 @@
     int16x8_t sum;
     v_src[0] = vld1_u8(src);
     if (filter_index == 3) {
-      v_src[1] = RightShift<1 * 8>(v_src[0]);
+      v_src[1] = RightShiftVector<1 * 8>(v_src[0]);
       sum = SumOnePassTaps<filter_index, false>(v_src, v_tap + 3);
     } else {
-      v_src[1] = RightShift<1 * 8>(v_src[0]);
-      v_src[2] = RightShift<2 * 8>(v_src[0]);
-      v_src[3] = RightShift<3 * 8>(v_src[0]);
+      v_src[1] = RightShiftVector<1 * 8>(v_src[0]);
+      v_src[2] = RightShiftVector<2 * 8>(v_src[0]);
+      v_src[3] = RightShiftVector<3 * 8>(v_src[0]);
       sum = SumOnePassTaps<filter_index, false>(v_src, v_tap + 2);
     }
     if (is_2d || is_compound) {
@@ -274,15 +274,15 @@
       sum = vmlal_u8(sum, vext_u8(input.val[0], input.val[1], 2), v_tap[4]);
     } else if (filter_index == 4) {
       // tap signs : - + + -
-      sum = vmull_u8(RightShift<2 * 8>(input.val[0]), v_tap[3]);
+      sum = vmull_u8(RightShiftVector<2 * 8>(input.val[0]), v_tap[3]);
       sum = vmlsl_u8(sum, input.val[0], v_tap[2]);
-      sum = vmlal_u8(sum, RightShift<4 * 8>(input.val[0]), v_tap[4]);
+      sum = vmlal_u8(sum, RightShiftVector<4 * 8>(input.val[0]), v_tap[4]);
       sum = vmlsl_u8(sum, vext_u8(input.val[0], input.val[1], 6), v_tap[5]);
     } else {
       // tap signs : + + + +
       sum = vmull_u8(input.val[0], v_tap[2]);
-      sum = vmlal_u8(sum, RightShift<2 * 8>(input.val[0]), v_tap[3]);
-      sum = vmlal_u8(sum, RightShift<4 * 8>(input.val[0]), v_tap[4]);
+      sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input.val[0]), v_tap[3]);
+      sum = vmlal_u8(sum, RightShiftVector<4 * 8>(input.val[0]), v_tap[4]);
       sum = vmlal_u8(sum, vext_u8(input.val[0], input.val[1], 6), v_tap[5]);
     }
     int16x8_t s = vreinterpretq_s16_u16(sum);
@@ -323,18 +323,18 @@
     uint16x8_t sum;
     if (filter_index == 3) {
       sum = vmull_u8(input, v_tap[3]);
-      sum = vmlal_u8(sum, RightShift<1 * 8>(input), v_tap[4]);
+      sum = vmlal_u8(sum, RightShiftVector<1 * 8>(input), v_tap[4]);
     } else if (filter_index == 4) {
-      sum = vmull_u8(RightShift<1 * 8>(input), v_tap[3]);
+      sum = vmull_u8(RightShiftVector<1 * 8>(input), v_tap[3]);
       sum = vmlsl_u8(sum, input, v_tap[2]);
-      sum = vmlal_u8(sum, RightShift<2 * 8>(input), v_tap[4]);
-      sum = vmlsl_u8(sum, RightShift<3 * 8>(input), v_tap[5]);
+      sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input), v_tap[4]);
+      sum = vmlsl_u8(sum, RightShiftVector<3 * 8>(input), v_tap[5]);
     } else {
       assert(filter_index == 5);
       sum = vmull_u8(input, v_tap[2]);
-      sum = vmlal_u8(sum, RightShift<1 * 8>(input), v_tap[3]);
-      sum = vmlal_u8(sum, RightShift<2 * 8>(input), v_tap[4]);
-      sum = vmlal_u8(sum, RightShift<3 * 8>(input), v_tap[5]);
+      sum = vmlal_u8(sum, RightShiftVector<1 * 8>(input), v_tap[3]);
+      sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input), v_tap[4]);
+      sum = vmlal_u8(sum, RightShiftVector<3 * 8>(input), v_tap[5]);
     }
     // |sum| contains an int16_t value.
     sum = vreinterpretq_u16_s16(vrshrq_n_s16(vreinterpretq_s16_u16(sum),

diff --git a/src/dsp/arm/intrapred_directional_neon.cc b/src/dsp/arm/intrapred_directional_neon.cc
index b0e764b..0011d72 100644
--- a/src/dsp/arm/intrapred_directional_neon.cc
+++ b/src/dsp/arm/intrapred_directional_neon.cc

@@ -112,7 +112,7 @@
     // 4 wide subsamples the output. 8 wide subsamples the input.
     if (width == 4) {
       const uint8x8_t left_values = vld1_u8(top + top_base_x);
-      const uint8x8_t right_values = RightShift<8>(left_values);
+      const uint8x8_t right_values = RightShiftVector<8>(left_values);
       const uint8x8_t value = WeightedBlend(left_values, right_values, shift);
 
       // If |upsampled| is true then extract every other value for output.

diff --git a/src/dsp/arm/loop_filter_neon.cc b/src/dsp/arm/loop_filter_neon.cc
index b70433e..8d72892 100644
--- a/src/dsp/arm/loop_filter_neon.cc
+++ b/src/dsp/arm/loop_filter_neon.cc

@@ -35,7 +35,7 @@
 // (abs(p1 - p0) > thresh) || (abs(q1 - q0) > thresh)
 inline uint8x8_t Hev(const uint8x8_t abd_p0p1_q0q1, const uint8_t thresh) {
   const uint8x8_t a = vcgt_u8(abd_p0p1_q0q1, vdup_n_u8(thresh));
-  return vorr_u8(a, RightShift<32>(a));
+  return vorr_u8(a, RightShiftVector<32>(a));
 }
 
 // abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= outer_thresh
@@ -44,7 +44,7 @@
   const uint8x8x2_t a = Interleave32(p0q0, p1q1);
   const uint8x8_t b = vabd_u8(a.val[0], a.val[1]);
   const uint8x8_t p0q0_double = vqadd_u8(b, b);
-  const uint8x8_t p1q1_half = RightShift<32>(vshr_n_u8(b, 1));
+  const uint8x8_t p1q1_half = RightShiftVector<32>(vshr_n_u8(b, 1));
   const uint8x8_t c = vqadd_u8(p0q0_double, p1q1_half);
   return vcle_u8(c, vdup_n_u8(outer_thresh));
 }
@@ -56,7 +56,7 @@
                               const uint8_t inner_thresh,
                               const uint8_t outer_thresh) {
   const uint8x8_t a = vcle_u8(abd_p0p1_q0q1, vdup_n_u8(inner_thresh));
-  const uint8x8_t inner_mask = vand_u8(a, RightShift<32>(a));
+  const uint8x8_t inner_mask = vand_u8(a, RightShiftVector<32>(a));
   const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
   return vand_u8(inner_mask, outer_mask);
 }
@@ -121,7 +121,7 @@
       vcombine_s16(vget_low_s16(p0q1_l), vget_low_s16(q0p1_l));
   // Need to shift the second term or we end up with a2_ma2.
   const int8x8_t a2_ma1 =
-      InterleaveLow32(a2_a1, RightShift<32>(vneg_s8(a2_a1)));
+      InterleaveLow32(a2_a1, RightShiftVector<32>(vneg_s8(a2_a1)));
   const int16x8_t p0q0_a = vaddw_s8(p0q0_l, a2_ma1);
 
   *p1q1_result = vqmovun_s16(p1q1_a3);
@@ -251,7 +251,7 @@
                          const uint8x8_t abd_p0p2_q0q2) {
   const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p0p2_q0q2);
   const uint8x8_t b = vcle_u8(a, vdup_n_u8(1));
-  return vand_u8(b, RightShift<32>(b));
+  return vand_u8(b, RightShiftVector<32>(b));
 }
 
 // abs(p2 - p1) <= inner_thresh && abs(p1 - p0) <= inner_thresh &&
@@ -264,7 +264,7 @@
                               const uint8_t outer_thresh) {
   const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p1p2_q1q2);
   const uint8x8_t b = vcle_u8(a, vdup_n_u8(inner_thresh));
-  const uint8x8_t inner_mask = vand_u8(b, RightShift<32>(b));
+  const uint8x8_t inner_mask = vand_u8(b, RightShiftVector<32>(b));
   const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
   return vand_u8(inner_mask, outer_mask);
 }
@@ -482,7 +482,7 @@
   const uint8x8_t a = vmax_u8(abd_p0n0_q0n0, abd_p0n1_q0n1);
   const uint8x8_t b = vmax_u8(a, abd_p0n2_q0n2);
   const uint8x8_t c = vcle_u8(b, vdup_n_u8(1));
-  return vand_u8(c, RightShift<32>(c));
+  return vand_u8(c, RightShiftVector<32>(c));
 }
 
 // abs(p3 - p2) <= inner_thresh && abs(p2 - p1) <= inner_thresh &&
@@ -498,7 +498,7 @@
   const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p1p2_q1q2);
   const uint8x8_t b = vmax_u8(a, abd_p2p3_q2q3);
   const uint8x8_t c = vcle_u8(b, vdup_n_u8(inner_thresh));
-  const uint8x8_t inner_mask = vand_u8(c, RightShift<32>(c));
+  const uint8x8_t inner_mask = vand_u8(c, RightShiftVector<32>(c));
   const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
   return vand_u8(inner_mask, outer_mask);
 }
commit	8a019f00a2c43c99672bbcb9e9dad2a14b23ba0a	[log] [tgz]
author	Alex Peterson <petersonab@google.com>	Thu Mar 04 14:05:26 2021 -0800
committer	James Zern <jzern@google.com>	Thu Mar 11 10:45:56 2021 -0800
tree	c5c916e55191818986b5fd617a294bb34d34e20c
parent	8134ec499987c5d00dbdbafc9f95134ca630aed6 [diff]