Reland mysterious cast that improves performance.

BUG=499241
TEST=none
TBR=andrew

Review URL: https://codereview.webrtc.org/1206683002

Cr-Commit-Position: refs/heads/master@{#9492}
diff --git a/webrtc/common_audio/signal_processing/cross_correlation.c b/webrtc/common_audio/signal_processing/cross_correlation.c
index c14ce4c..898d934 100644
--- a/webrtc/common_audio/signal_processing/cross_correlation.c
+++ b/webrtc/common_audio/signal_processing/cross_correlation.c
@@ -22,9 +22,11 @@
 
   for (i = 0; i < dim_cross_correlation; i++) {
     int32_t corr = 0;
-    /* Unrolling doesn't seem to improve performance. */
+    // Linux 64-bit performance is improved by the int16_t cast below.
+    // Presumably this is some sort of compiler bug, as there's no obvious
+    // reason why that should result in better code.
     for (j = 0; j < dim_seq; j++)
-      corr += (seq1[j] * seq2[j]) >> right_shifts;
+      corr += (seq1[j] * seq2[j]) >> (int16_t)right_shifts;
     seq2 += step_seq2;
     *cross_correlation++ = corr;
   }