Fix overflow issue in 16x16 quantization SSSE3
The 16x16 transform unit test suggested that the peak coefficient
value can reach 32639. This could cause potential overflow issue
in the SSSE3 implmentation of 16x16 block quantization. This commit
fixes this issue by replacing addition with saturated addition.
Change-Id: I6d5bb7c5faad4a927be53292324bd2728690717e
diff --git a/vp9/encoder/vp9_encodemb.c b/vp9/encoder/vp9_encodemb.c
index da9a3bd..86be85d 100644
--- a/vp9/encoder/vp9_encodemb.c
+++ b/vp9/encoder/vp9_encodemb.c
@@ -635,8 +635,8 @@
vp9_short_fht16x16(src_diff, coeff, bw * 4, tx_type);
else
x->fwd_txm16x16(src_diff, coeff, bw * 8);
- vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant,
- p->quant_shift, qcoeff, dqcoeff,
+ vp9_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round,
+ p->quant, p->quant_shift, qcoeff, dqcoeff,
pd->dequant, p->zbin_extra, eob, scan, iscan);
if (!x->skip_encode && *eob) {
if (tx_type == DCT_DCT)
diff --git a/vp9/encoder/vp9_quantize.c b/vp9/encoder/vp9_quantize.c
index 96abeff..cfaa776 100644
--- a/vp9/encoder/vp9_quantize.c
+++ b/vp9/encoder/vp9_quantize.c
@@ -69,6 +69,7 @@
if (x >= zbin) {
x += (round_ptr[rc != 0]);
+ x = clamp(x, INT16_MIN, INT16_MAX);
y = (((int)(((int)(x * quant_ptr[rc != 0]) >> 16) + x)) *
quant_shift_ptr[rc != 0]) >> 16; // quantize (x)
x = (y ^ sz) - sz; // get the sign back
diff --git a/vp9/encoder/x86/vp9_quantize_ssse3.asm b/vp9/encoder/x86/vp9_quantize_ssse3.asm
index ae0d6cd..db30660 100644
--- a/vp9/encoder/x86/vp9_quantize_ssse3.asm
+++ b/vp9/encoder/x86/vp9_quantize_ssse3.asm
@@ -70,15 +70,9 @@
pcmpgtw m7, m6, m0 ; m7 = c[i] >= zbin
punpckhqdq m0, m0
pcmpgtw m12, m11, m0 ; m12 = c[i] >= zbin
-%ifidn %1, b_32x32
- paddsw m6, m1
+ paddsw m6, m1 ; m6 += round
punpckhqdq m1, m1
- paddsw m11, m1
-%else
- paddw m6, m1 ; m6 += round
- punpckhqdq m1, m1
- paddw m11, m1 ; m11 += round
-%endif
+ paddsw m11, m1 ; m11 += round
pmulhw m8, m6, m2 ; m8 = m6*q>>16
punpckhqdq m2, m2
pmulhw m13, m11, m2 ; m13 = m11*q>>16
@@ -132,12 +126,9 @@
pmovmskb r2, m12
or r6, r2
jz .skip_iter
- paddsw m6, m1
- paddsw m11, m1
-%else
- paddw m6, m1 ; m6 += round
- paddw m11, m1 ; m11 += round
%endif
+ paddsw m6, m1 ; m6 += round
+ paddsw m11, m1 ; m11 += round
pmulhw m14, m6, m2 ; m14 = m6*q>>16
pmulhw m13, m11, m2 ; m13 = m11*q>>16
paddw m14, m6 ; m14 += m6