Cosmetic changes to aom_int_pro_row and tests.
Change-Id: I30ac9c3249fecb8453c0e9f150eb8370572633c5
diff --git a/aom_dsp/x86/avg_intrin_sse2.c b/aom_dsp/x86/avg_intrin_sse2.c
index 3057a71..1c5f6eb 100644
--- a/aom_dsp/x86/avg_intrin_sse2.c
+++ b/aom_dsp/x86/avg_intrin_sse2.c
@@ -409,7 +409,7 @@
void aom_int_pro_row_sse2(int16_t *hbuf, const uint8_t *ref,
const int ref_stride, const int height) {
- int idx;
+ int idx = 1;
__m128i zero = _mm_setzero_si128();
__m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_unpacklo_epi8(src_line, zero);
@@ -417,8 +417,7 @@
__m128i t0, t1;
int height_1 = height - 1;
ref += ref_stride;
-
- for (idx = 1; idx < height_1; idx += 2) {
+ do {
src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
t1 = _mm_unpackhi_epi8(src_line, zero);
@@ -432,7 +431,8 @@
s0 = _mm_adds_epu16(s0, t0);
s1 = _mm_adds_epu16(s1, t1);
ref += ref_stride;
- }
+ idx += 2;
+ } while (idx < height_1);
src_line = _mm_loadu_si128((const __m128i *)ref);
t0 = _mm_unpacklo_epi8(src_line, zero);
@@ -449,6 +449,7 @@
s0 = _mm_srai_epi16(s0, 4);
s1 = _mm_srai_epi16(s1, 4);
} else {
+ assert(height == 16);
s0 = _mm_srai_epi16(s0, 3);
s1 = _mm_srai_epi16(s1, 3);
}
@@ -460,14 +461,14 @@
int16_t aom_int_pro_col_sse2(const uint8_t *ref, const int width) {
__m128i zero = _mm_setzero_si128();
- __m128i src_line = _mm_load_si128((const __m128i *)ref);
+ __m128i src_line = _mm_loadu_si128((const __m128i *)ref);
__m128i s0 = _mm_sad_epu8(src_line, zero);
__m128i s1;
int i;
for (i = 16; i < width; i += 16) {
ref += 16;
- src_line = _mm_load_si128((const __m128i *)ref);
+ src_line = _mm_loadu_si128((const __m128i *)ref);
s1 = _mm_sad_epu8(src_line, zero);
s0 = _mm_adds_epu16(s0, s1);
}
diff --git a/test/avg_test.cc b/test/avg_test.cc
index 0dc06bd..cbd3461 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -42,7 +42,7 @@
static const int kDataBlockSize = 64 * 128;
virtual void SetUp() {
- source_data_ = reinterpret_cast<Pixel *>(
+ source_data_ = static_cast<Pixel *>(
aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
ASSERT_TRUE(source_data_ != NULL);
source_stride_ = (width_ + 31) & ~31;
@@ -138,6 +138,7 @@
typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
const int ref_stride, const int height);
+// Params: height, asm function, c function.
typedef std::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
class IntProRowTest : public AverageTestBase<uint8_t>,
@@ -151,13 +152,13 @@
protected:
virtual void SetUp() {
- source_data_ = reinterpret_cast<uint8_t *>(
+ source_data_ = static_cast<uint8_t *>(
aom_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0])));
ASSERT_TRUE(source_data_ != NULL);
- hbuf_asm_ = reinterpret_cast<int16_t *>(
+ hbuf_asm_ = static_cast<int16_t *>(
aom_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
- hbuf_c_ = reinterpret_cast<int16_t *>(
+ hbuf_c_ = static_cast<int16_t *>(
aom_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
}
@@ -186,6 +187,7 @@
typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
+// Params: width, asm function, c function.
typedef std::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
class IntProColTest : public AverageTestBase<uint8_t>,