Fixes wrap-around in silk_inner_prod16_sse4_1()
Thanks Tim
diff --git a/silk/fixed/x86/vector_ops_FIX_sse4_1.c b/silk/fixed/x86/vector_ops_FIX_sse4_1.c
index 0cfb08d..a46289b 100644
--- a/silk/fixed/x86/vector_ops_FIX_sse4_1.c
+++ b/silk/fixed/x86/vector_ops_FIX_sse4_1.c
@@ -36,6 +36,7 @@
#include "SigProc_FIX.h"
#include "pitch.h"
+#include "celt/x86/x86cpu.h"
opus_int64 silk_inner_prod16_sse4_1(
const opus_int16 *inVec1, /* I input vector 1 */
@@ -78,7 +79,7 @@
_mm_storel_epi64( (__m128i *)&sum, acc1 );
for( ; i < len; i++ ) {
- sum = silk_SMLABB( sum, inVec1[ i ], inVec2[ i ] );
+ sum = silk_SMLALBB( sum, inVec1[ i ], inVec2[ i ] );
}
#ifdef OPUS_CHECK_ASM