Refactor Right/LeftShift NEON function names
This is for clarity and to allow the use of LeftShift from common.h
in NEON code without explicit namespacing.
PiperOrigin-RevId: 360999725
Change-Id: I05724e2e833ba1989cc0c594233358f7efd0ee49
diff --git a/src/dsp/arm/common_neon.h b/src/dsp/arm/common_neon.h
index cf444e6..61da1f8 100644
--- a/src/dsp/arm/common_neon.h
+++ b/src/dsp/arm/common_neon.h
@@ -282,17 +282,17 @@
// vshXX_n_XX() requires an immediate.
template <int shift>
-inline uint8x8_t LeftShift(const uint8x8_t vector) {
+inline uint8x8_t LeftShiftVector(const uint8x8_t vector) {
return vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(vector), shift));
}
template <int shift>
-inline uint8x8_t RightShift(const uint8x8_t vector) {
+inline uint8x8_t RightShiftVector(const uint8x8_t vector) {
return vreinterpret_u8_u64(vshr_n_u64(vreinterpret_u64_u8(vector), shift));
}
template <int shift>
-inline int8x8_t RightShift(const int8x8_t vector) {
+inline int8x8_t RightShiftVector(const int8x8_t vector) {
return vreinterpret_s8_u64(vshr_n_u64(vreinterpret_u64_s8(vector), shift));
}
diff --git a/src/dsp/arm/convolve_neon.cc b/src/dsp/arm/convolve_neon.cc
index 6625c9b..331bfe2 100644
--- a/src/dsp/arm/convolve_neon.cc
+++ b/src/dsp/arm/convolve_neon.cc
@@ -231,12 +231,12 @@
int16x8_t sum;
v_src[0] = vld1_u8(src);
if (filter_index == 3) {
- v_src[1] = RightShift<1 * 8>(v_src[0]);
+ v_src[1] = RightShiftVector<1 * 8>(v_src[0]);
sum = SumOnePassTaps<filter_index, false>(v_src, v_tap + 3);
} else {
- v_src[1] = RightShift<1 * 8>(v_src[0]);
- v_src[2] = RightShift<2 * 8>(v_src[0]);
- v_src[3] = RightShift<3 * 8>(v_src[0]);
+ v_src[1] = RightShiftVector<1 * 8>(v_src[0]);
+ v_src[2] = RightShiftVector<2 * 8>(v_src[0]);
+ v_src[3] = RightShiftVector<3 * 8>(v_src[0]);
sum = SumOnePassTaps<filter_index, false>(v_src, v_tap + 2);
}
if (is_2d || is_compound) {
@@ -274,15 +274,15 @@
sum = vmlal_u8(sum, vext_u8(input.val[0], input.val[1], 2), v_tap[4]);
} else if (filter_index == 4) {
// tap signs : - + + -
- sum = vmull_u8(RightShift<2 * 8>(input.val[0]), v_tap[3]);
+ sum = vmull_u8(RightShiftVector<2 * 8>(input.val[0]), v_tap[3]);
sum = vmlsl_u8(sum, input.val[0], v_tap[2]);
- sum = vmlal_u8(sum, RightShift<4 * 8>(input.val[0]), v_tap[4]);
+ sum = vmlal_u8(sum, RightShiftVector<4 * 8>(input.val[0]), v_tap[4]);
sum = vmlsl_u8(sum, vext_u8(input.val[0], input.val[1], 6), v_tap[5]);
} else {
// tap signs : + + + +
sum = vmull_u8(input.val[0], v_tap[2]);
- sum = vmlal_u8(sum, RightShift<2 * 8>(input.val[0]), v_tap[3]);
- sum = vmlal_u8(sum, RightShift<4 * 8>(input.val[0]), v_tap[4]);
+ sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input.val[0]), v_tap[3]);
+ sum = vmlal_u8(sum, RightShiftVector<4 * 8>(input.val[0]), v_tap[4]);
sum = vmlal_u8(sum, vext_u8(input.val[0], input.val[1], 6), v_tap[5]);
}
int16x8_t s = vreinterpretq_s16_u16(sum);
@@ -323,18 +323,18 @@
uint16x8_t sum;
if (filter_index == 3) {
sum = vmull_u8(input, v_tap[3]);
- sum = vmlal_u8(sum, RightShift<1 * 8>(input), v_tap[4]);
+ sum = vmlal_u8(sum, RightShiftVector<1 * 8>(input), v_tap[4]);
} else if (filter_index == 4) {
- sum = vmull_u8(RightShift<1 * 8>(input), v_tap[3]);
+ sum = vmull_u8(RightShiftVector<1 * 8>(input), v_tap[3]);
sum = vmlsl_u8(sum, input, v_tap[2]);
- sum = vmlal_u8(sum, RightShift<2 * 8>(input), v_tap[4]);
- sum = vmlsl_u8(sum, RightShift<3 * 8>(input), v_tap[5]);
+ sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input), v_tap[4]);
+ sum = vmlsl_u8(sum, RightShiftVector<3 * 8>(input), v_tap[5]);
} else {
assert(filter_index == 5);
sum = vmull_u8(input, v_tap[2]);
- sum = vmlal_u8(sum, RightShift<1 * 8>(input), v_tap[3]);
- sum = vmlal_u8(sum, RightShift<2 * 8>(input), v_tap[4]);
- sum = vmlal_u8(sum, RightShift<3 * 8>(input), v_tap[5]);
+ sum = vmlal_u8(sum, RightShiftVector<1 * 8>(input), v_tap[3]);
+ sum = vmlal_u8(sum, RightShiftVector<2 * 8>(input), v_tap[4]);
+ sum = vmlal_u8(sum, RightShiftVector<3 * 8>(input), v_tap[5]);
}
// |sum| contains an int16_t value.
sum = vreinterpretq_u16_s16(vrshrq_n_s16(vreinterpretq_s16_u16(sum),
diff --git a/src/dsp/arm/intrapred_directional_neon.cc b/src/dsp/arm/intrapred_directional_neon.cc
index b0e764b..0011d72 100644
--- a/src/dsp/arm/intrapred_directional_neon.cc
+++ b/src/dsp/arm/intrapred_directional_neon.cc
@@ -112,7 +112,7 @@
// 4 wide subsamples the output. 8 wide subsamples the input.
if (width == 4) {
const uint8x8_t left_values = vld1_u8(top + top_base_x);
- const uint8x8_t right_values = RightShift<8>(left_values);
+ const uint8x8_t right_values = RightShiftVector<8>(left_values);
const uint8x8_t value = WeightedBlend(left_values, right_values, shift);
// If |upsampled| is true then extract every other value for output.
diff --git a/src/dsp/arm/loop_filter_neon.cc b/src/dsp/arm/loop_filter_neon.cc
index b70433e..8d72892 100644
--- a/src/dsp/arm/loop_filter_neon.cc
+++ b/src/dsp/arm/loop_filter_neon.cc
@@ -35,7 +35,7 @@
// (abs(p1 - p0) > thresh) || (abs(q1 - q0) > thresh)
inline uint8x8_t Hev(const uint8x8_t abd_p0p1_q0q1, const uint8_t thresh) {
const uint8x8_t a = vcgt_u8(abd_p0p1_q0q1, vdup_n_u8(thresh));
- return vorr_u8(a, RightShift<32>(a));
+ return vorr_u8(a, RightShiftVector<32>(a));
}
// abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= outer_thresh
@@ -44,7 +44,7 @@
const uint8x8x2_t a = Interleave32(p0q0, p1q1);
const uint8x8_t b = vabd_u8(a.val[0], a.val[1]);
const uint8x8_t p0q0_double = vqadd_u8(b, b);
- const uint8x8_t p1q1_half = RightShift<32>(vshr_n_u8(b, 1));
+ const uint8x8_t p1q1_half = RightShiftVector<32>(vshr_n_u8(b, 1));
const uint8x8_t c = vqadd_u8(p0q0_double, p1q1_half);
return vcle_u8(c, vdup_n_u8(outer_thresh));
}
@@ -56,7 +56,7 @@
const uint8_t inner_thresh,
const uint8_t outer_thresh) {
const uint8x8_t a = vcle_u8(abd_p0p1_q0q1, vdup_n_u8(inner_thresh));
- const uint8x8_t inner_mask = vand_u8(a, RightShift<32>(a));
+ const uint8x8_t inner_mask = vand_u8(a, RightShiftVector<32>(a));
const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
return vand_u8(inner_mask, outer_mask);
}
@@ -121,7 +121,7 @@
vcombine_s16(vget_low_s16(p0q1_l), vget_low_s16(q0p1_l));
// Need to shift the second term or we end up with a2_ma2.
const int8x8_t a2_ma1 =
- InterleaveLow32(a2_a1, RightShift<32>(vneg_s8(a2_a1)));
+ InterleaveLow32(a2_a1, RightShiftVector<32>(vneg_s8(a2_a1)));
const int16x8_t p0q0_a = vaddw_s8(p0q0_l, a2_ma1);
*p1q1_result = vqmovun_s16(p1q1_a3);
@@ -251,7 +251,7 @@
const uint8x8_t abd_p0p2_q0q2) {
const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p0p2_q0q2);
const uint8x8_t b = vcle_u8(a, vdup_n_u8(1));
- return vand_u8(b, RightShift<32>(b));
+ return vand_u8(b, RightShiftVector<32>(b));
}
// abs(p2 - p1) <= inner_thresh && abs(p1 - p0) <= inner_thresh &&
@@ -264,7 +264,7 @@
const uint8_t outer_thresh) {
const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p1p2_q1q2);
const uint8x8_t b = vcle_u8(a, vdup_n_u8(inner_thresh));
- const uint8x8_t inner_mask = vand_u8(b, RightShift<32>(b));
+ const uint8x8_t inner_mask = vand_u8(b, RightShiftVector<32>(b));
const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
return vand_u8(inner_mask, outer_mask);
}
@@ -482,7 +482,7 @@
const uint8x8_t a = vmax_u8(abd_p0n0_q0n0, abd_p0n1_q0n1);
const uint8x8_t b = vmax_u8(a, abd_p0n2_q0n2);
const uint8x8_t c = vcle_u8(b, vdup_n_u8(1));
- return vand_u8(c, RightShift<32>(c));
+ return vand_u8(c, RightShiftVector<32>(c));
}
// abs(p3 - p2) <= inner_thresh && abs(p2 - p1) <= inner_thresh &&
@@ -498,7 +498,7 @@
const uint8x8_t a = vmax_u8(abd_p0p1_q0q1, abd_p1p2_q1q2);
const uint8x8_t b = vmax_u8(a, abd_p2p3_q2q3);
const uint8x8_t c = vcle_u8(b, vdup_n_u8(inner_thresh));
- const uint8x8_t inner_mask = vand_u8(c, RightShift<32>(c));
+ const uint8x8_t inner_mask = vand_u8(c, RightShiftVector<32>(c));
const uint8x8_t outer_mask = OuterThreshold(p0q0, p1q1, outer_thresh);
return vand_u8(inner_mask, outer_mask);
}