-Removed the state as an input to the FilterAdaptation function. -Renamed the TimeToFrequency and FrequencyToTime functions. -Moved the windowing from the TimeToFrequency function. -Simplified the EchoSubtraction function. Note that the aec state is still an input to the EchoSubtraction function, and it currently needs to be that in order to support the output of the debug file. The longer-term goal is, however, to order the state into substates. This will simplify the parameter lists to the EchoCancellation function as well as replace the aec state as a parameter BUG=webrtc:5201 Review URL: https://codereview.webrtc.org/1456123003 Cr-Commit-Position: refs/heads/master@{#10830}

commit: 7e43138c0890bd99f627fa061b122c8d5716a99d [log] [tgz]
author: peah <peah@webrtc.org> Fri Nov 27 15:24:27 2015 -0800
committer: Commit bot <commit-bot@chromium.org> Fri Nov 27 23:24:32 2015 +0000
tree: 9b4ddce83033f6f504e36b65ea32994f3895470d
parent: 19822d63c1162c0989bc60b524db8c5c27815f72 [diff]
diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c
index 91b55b6..da5c26b 100644
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c

@@ -151,30 +151,31 @@
   return (*da > *db) - (*da < *db);
 }
 
-static void FilterFar(int num_partitions,
-                      int x_fft_buffer_block_pos,
-                      float x_fft_buffer[2][kExtendedNumPartitions * PART_LEN1],
-                      float h_fft_buffer[2][kExtendedNumPartitions * PART_LEN1],
-                      float y_fft[2][PART_LEN1]) {
+static void FilterFar(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
   for (i = 0; i < num_partitions; i++) {
     int j;
-    int x_pos = (i + x_fft_buffer_block_pos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
-    // Check for wrapped buffer.
-    if (i + x_fft_buffer_block_pos >= num_partitions) {
-      x_pos -= num_partitions * (PART_LEN1);
+    // Check for wrap
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
     }
 
     for (j = 0; j < PART_LEN1; j++) {
-      y_fft[0][j] += MulRe(x_fft_buffer[0][x_pos + j],
-                        x_fft_buffer[1][x_pos + j],
-                        h_fft_buffer[0][pos + j],
-                        h_fft_buffer[1][pos + j]);
-      y_fft[1][j] += MulIm(x_fft_buffer[0][x_pos + j],
-                        x_fft_buffer[1][x_pos + j],
-                        h_fft_buffer[0][pos + j],
-                        h_fft_buffer[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
@@ -182,7 +183,7 @@
 static void ScaleErrorSignal(int extended_filter_enabled,
                              float normal_mu,
                              float normal_error_threshold,
-                             float *x_pow,
+                             float x_pow[PART_LEN1],
                              float ef[2][PART_LEN1]) {
   const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
   const float error_threshold = extended_filter_enabled
@@ -207,59 +208,40 @@
   }
 }
 
-// Time-unconstrined filter adaptation.
-// TODO(andrew): consider for a low-complexity mode.
-// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft,
-//                                          float ef[2][PART_LEN1]) {
-//  int i, j;
-//  for (i = 0; i < aec->num_partitions; i++) {
-//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
-//    int pos;
-//    // Check for wrap
-//    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-//      xPos -= aec->num_partitions * PART_LEN1;
-//    }
-//
-//    pos = i * PART_LEN1;
-//
-//    for (j = 0; j < PART_LEN1; j++) {
-//      aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//      aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j],
-//                                      -aec->xfBuf[1][xPos + j],
-//                                      ef[0][j], ef[1][j]);
-//    }
-//  }
-//}
 
-static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) {
+static void FilterAdaptation(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
   int i, j;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+  float fft[PART_LEN2];
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
     int pos;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
     }
 
     pos = i * PART_LEN1;
 
     for (j = 0; j < PART_LEN; j++) {
 
-      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
-                         -aec->xfBuf[1][xPos + j],
-                         ef[0][j],
-                         ef[1][j]);
-      fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
-                             -aec->xfBuf[1][xPos + j],
-                             ef[0][j],
-                             ef[1][j]);
+      fft[2 * j] = MulRe(x_fft_buf[0][xPos + j],
+                         -x_fft_buf[1][xPos + j],
+                         e_fft[0][j],
+                         e_fft[1][j]);
+      fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j],
+                             -x_fft_buf[1][xPos + j],
+                             e_fft[0][j],
+                             e_fft[1][j]);
     }
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -273,12 +255,12 @@
     }
     aec_rdft_forward_128(fft);
 
-    aec->wfBuf[0][pos] += fft[0];
-    aec->wfBuf[0][pos + PART_LEN] += fft[1];
+    h_fft_buf[0][pos] += fft[0];
+    h_fft_buf[0][pos + PART_LEN] += fft[1];
 
     for (j = 1; j < PART_LEN; j++) {
-      aec->wfBuf[0][pos + j] += fft[2 * j];
-      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
+      h_fft_buf[0][pos + j] += fft[2 * j];
+      h_fft_buf[1][pos + j] += fft[2 * j + 1];
     }
   }
 }
@@ -845,34 +827,26 @@
   return;
 }
 
-static void FrequencyToTime(float freq_data[2][PART_LEN1],
-                            float time_data[PART_LEN2]) {
+static void InverseFft(float freq_data[2][PART_LEN1],
+                       float time_data[PART_LEN2]) {
   int i;
-  time_data[0] = freq_data[0][0];
-  time_data[1] = freq_data[0][PART_LEN];
+  const float scale = 1.0f / PART_LEN2;
+  time_data[0] = freq_data[0][0] * scale;
+  time_data[1] = freq_data[0][PART_LEN] * scale;
   for (i = 1; i < PART_LEN; i++) {
-    time_data[2 * i] = freq_data[0][i];
-    time_data[2 * i + 1] = freq_data[1][i];
+    time_data[2 * i] = freq_data[0][i] * scale;
+    time_data[2 * i + 1] = freq_data[1][i] * scale;
   }
   aec_rdft_inverse_128(time_data);
 }
 
 
-static void TimeToFrequency(float time_data[PART_LEN2],
-                            float freq_data[2][PART_LEN1],
-                            int window) {
-  int i = 0;
-
-  // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
-  if (window) {
-    for (i = 0; i < PART_LEN; i++) {
-      time_data[i] *= WebRtcAec_sqrtHanning[i];
-      time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i];
-    }
-  }
-
+static void Fft(float time_data[PART_LEN2],
+                float freq_data[2][PART_LEN1]) {
+  int i;
   aec_rdft_forward_128(time_data);
-  // Reorder.
+
+  // Reorder fft output data.
   freq_data[1][0] = 0;
   freq_data[1][PART_LEN] = 0;
   freq_data[0][0] = time_data[0];
@@ -963,61 +937,76 @@
   return delay_correction;
 }
 
-static void EchoSubtraction(AecCore* aec,
-                            float* nearend_ptr) {
-  float yf[2][PART_LEN1];
-  float fft[PART_LEN2];
-  float y[PART_LEN];
+static void EchoSubtraction(
+    AecCore* aec,
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    int metrics_mode,
+    int extended_filter_enabled,
+    float normal_mu,
+    float normal_error_threshold,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float* const y,
+    float x_pow[PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    PowerLevel* linout_level,
+    float echo_subtractor_output[PART_LEN]) {
+  float s_fft[2][PART_LEN1];
+  float e_extended[PART_LEN2];
+  float s_extended[PART_LEN2];
+  float *s;
   float e[PART_LEN];
-  float ef[2][PART_LEN1];
-  float scale;
+  float e_fft[2][PART_LEN1];
   int i;
-  memset(yf, 0, sizeof(yf));
+  memset(s_fft, 0, sizeof(s_fft));
 
-  // Produce frequency domain echo estimate.
-  WebRtcAec_FilterFar(aec->num_partitions,
-                      aec->xfBufBlockPos,
-                      aec->xfBuf,
-                      aec->wfBuf,
-                      yf);
+  // Produce echo estimate s_fft.
+  WebRtcAec_FilterFar(num_partitions,
+                      x_fft_buf_block_pos,
+                      x_fft_buf,
+                      h_fft_buf,
+                      s_fft);
 
-  // Inverse fft to obtain echo estimate and error.
-  FrequencyToTime(yf, fft);
-
-  // Extract the output signal and compute the time-domain error.
-  scale = 2.0f / PART_LEN2;
+  // Compute the time-domain echo estimate s.
+  InverseFft(s_fft, s_extended);
+  s = &s_extended[PART_LEN];
   for (i = 0; i < PART_LEN; ++i) {
-    y[i] = fft[PART_LEN + i] * scale;  // fft scaling.
-    e[i] = nearend_ptr[i] - y[i];
+    s[i] *= 2.0f;
   }
 
-  // Error fft
-  memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
-  memset(fft, 0, sizeof(float) * PART_LEN);
-  memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
-  TimeToFrequency(fft, ef, 0);
+  // Compute the time-domain echo prediction error.
+  for (i = 0; i < PART_LEN; ++i) {
+    e[i] = y[i] - s[i];
+  }
+
+  // Compute the frequency domain echo prediction error.
+  memset(e_extended, 0, sizeof(float) * PART_LEN);
+  memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN);
+  Fft(e_extended, e_fft);
 
   RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file,
-                          &ef[0][0],
-                          sizeof(ef[0][0]) * PART_LEN1 * 2);
+                          &e_fft[0][0],
+                          sizeof(e_fft[0][0]) * PART_LEN1 * 2);
 
-  if (aec->metricsMode == 1) {
+  if (metrics_mode == 1) {
     // Note that the first PART_LEN samples in fft (before transformation) are
     // zero. Hence, the scaling by two in UpdateLevel() should not be
     // performed. That scaling is taken care of in UpdateMetrics() instead.
-    UpdateLevel(&aec->linoutlevel, ef);
+    UpdateLevel(linout_level, e_fft);
   }
 
   // Scale error signal inversely with far power.
-  WebRtcAec_ScaleErrorSignal(aec->extended_filter_enabled,
-                             aec->normal_mu,
-                             aec->normal_error_threshold,
-                             aec->xPow,
-                             ef);
-  WebRtcAec_FilterAdaptation(aec, fft, ef);
-
-
-  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN);
+  WebRtcAec_ScaleErrorSignal(extended_filter_enabled,
+                             normal_mu,
+                             normal_error_threshold,
+                             x_pow,
+                             e_fft);
+  WebRtcAec_FilterAdaptation(num_partitions,
+                             x_fft_buf_block_pos,
+                             x_fft_buf,
+                             e_fft,
+                             h_fft_buf);
+  memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN);
 }
 
 
@@ -1274,6 +1263,7 @@
   const float gInitNoise[2] = {0.999f, 0.001f};
 
   float nearend[PART_LEN];
+  float echo_subtractor_output[PART_LEN];
   float* nearend_ptr = NULL;
   float output[PART_LEN];
   float outputH[NUM_HIGH_BANDS_MAX][PART_LEN];
@@ -1313,7 +1303,7 @@
 
   // Near fft
   memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, df, 0);
+  Fft(fft, df);
 
   // Power smoothing
   for (i = 0; i < PART_LEN1; i++) {
@@ -1392,9 +1382,26 @@
          sizeof(float) * PART_LEN1);
 
   // Perform echo subtraction.
-  EchoSubtraction(aec, nearend_ptr);
+  EchoSubtraction(aec,
+                  aec->num_partitions,
+                  aec->xfBufBlockPos,
+                  aec->metricsMode,
+                  aec->extended_filter_enabled,
+                  aec->normal_mu,
+                  aec->normal_error_threshold,
+                  aec->xfBuf,
+                  nearend_ptr,
+                  aec->xPow,
+                  aec->wfBuf,
+                  &aec->linoutlevel,
+                  echo_subtractor_output);
+
+  RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN);
 
   // Perform echo suppression.
+  memcpy(aec->eBuf + PART_LEN,
+         echo_subtractor_output,
+         sizeof(float) * PART_LEN);
   EchoSuppression(aec, output, outputH_ptr);
 
   if (aec->metricsMode == 1) {
@@ -1737,12 +1744,12 @@
   }
   // Convert far-end partition to the frequency domain without windowing.
   memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 0);
+  Fft(fft, xf);
   WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
 
   // Convert far-end partition to the frequency domain with windowing.
-  memcpy(fft, farend, sizeof(float) * PART_LEN2);
-  TimeToFrequency(fft, xf, 1);
+  WindowData(fft, farend);
+  Fft(fft, xf);
   WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
 }
 

diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h
index 5d660de..881cac6 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h

@@ -172,20 +172,23 @@
 
 typedef void (*WebRtcAecFilterFar)(
     int num_partitions,
-    int x_fft_buffer_block_pos,
-    float x_fft_buffer[2][kExtendedNumPartitions * PART_LEN1],
-    float h_fft_buffer[2][kExtendedNumPartitions * PART_LEN1],
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
     float y_fft[2][PART_LEN1]);
 extern WebRtcAecFilterFar WebRtcAec_FilterFar;
 typedef void (*WebRtcAecScaleErrorSignal)(int extended_filter_enabled,
                                           float normal_mu,
                                           float normal_error_threshold,
-                                          float* xPow,
+                                          float x_pow[PART_LEN1],
                                           float ef[2][PART_LEN1]);
 extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
-typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
-                                          float* fft,
-                                          float ef[2][PART_LEN1]);
+typedef void (*WebRtcAecFilterAdaptation)(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]);
 extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
 typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
                                               float hNl[PART_LEN1],

diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c
index 58e471f..dfd4dc6 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c

@@ -322,24 +322,24 @@
 
 void WebRtcAec_FilterFar_mips(
     int num_partitions,
-    int xfBufBlockPos,
-    float xfBuf[2][kExtendedNumPartitions * PART_LEN1],
-    float wfBuf[2][kExtendedNumPartitions * PART_LEN1],
-    float yf[2][PART_LEN1]) {
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
   for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + xfBufBlockPos >=  num_partitions) {
+    if (i + x_fft_buf_block_pos >=  num_partitions) {
       xPos -=  num_partitions * (PART_LEN1);
     }
-    float* yf0 = yf[0];
-    float* yf1 = yf[1];
-    float* aRe = xfBuf[0] + xPos;
-    float* aIm = xfBuf[1] + xPos;
-    float* bRe = wfBuf[0] + pos;
-    float* bIm = wfBuf[1] + pos;
+    float* yf0 = y_fft[0];
+    float* yf1 = y_fft[1];
+    float* aRe = x_fft_buf[0] + xPos;
+    float* aIm = x_fft_buf[1] + xPos;
+    float* bRe = h_fft_buf[0] + pos;
+    float* bIm = h_fft_buf[1] + pos;
     float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
     int len = PART_LEN1 >> 1;
 
@@ -437,23 +437,27 @@
   }
 }
 
-void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
-                                     float* fft,
-                                     float ef[2][PART_LEN1]) {
+void WebRtcAec_FilterAdaptation_mips(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + x_fft_buf_block_pos)*(PART_LEN1);
     int pos;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
+    if (i + x_fft_buf_block_pos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
     }
 
     pos = i * PART_LEN1;
-    float* aRe = aec->xfBuf[0] + xPos;
-    float* aIm = aec->xfBuf[1] + xPos;
-    float* bRe = ef[0];
-    float* bIm = ef[1];
+    float* aRe = x_fft_buf[0] + xPos;
+    float* aIm = x_fft_buf[1] + xPos;
+    float* bRe = e_fft[0];
+    float* bIm = e_fft[1];
     float* fft_tmp;
 
     float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
@@ -578,8 +582,8 @@
       );
     }
     aec_rdft_forward_128(fft);
-    aRe = aec->wfBuf[0] + pos;
-    aIm = aec->wfBuf[1] + pos;
+    aRe = h_fft_buf[0] + pos;
+    aIm = h_fft_buf[1] + pos;
     __asm __volatile (
       ".set     push                                    \n\t"
       ".set     noreorder                               \n\t"
@@ -707,7 +711,7 @@
 void WebRtcAec_ScaleErrorSignal_mips(int extended_filter_enabled,
                                      float normal_mu,
                                      float normal_error_threshold,
-                                     float *x_pow,
+                                     float x_pow[PART_LEN1],
                                      float ef[2][PART_LEN1]) {
   const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
   const float error_threshold = extended_filter_enabled

diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c
index ba74ebe..6c94a2e 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c

@@ -34,48 +34,49 @@
   return aRe * bIm + aIm * bRe;
 }
 
-static void FilterFarNEON(int num_partitions,
-                          int xfBufBlockPos,
-                          float xfBuf[2][kExtendedNumPartitions * PART_LEN1],
-                          float wfBuf[2][kExtendedNumPartitions * PART_LEN1],
-                          float yf[2][PART_LEN1]) {
+static void FilterFarNEON(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
   int i;
   for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const float32x4_t xfBuf_re = vld1q_f32(&xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&xfBuf[1][xPos + j]);
-      const float32x4_t wfBuf_re = vld1q_f32(&wfBuf[0][pos + j]);
-      const float32x4_t wfBuf_im = vld1q_f32(&wfBuf[1][pos + j]);
-      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
-      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
-      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
-      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
-      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
-      const float32x4_t g = vaddq_f32(yf_re, e);
-      const float32x4_t h = vaddq_f32(yf_im, f);
-      vst1q_f32(&yf[0][j], g);
-      vst1q_f32(&yf[1][j], h);
+      const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]);
+      const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]);
+      const float32x4_t h_fft_buf_re = vld1q_f32(&h_fft_buf[0][pos + j]);
+      const float32x4_t h_fft_buf_im = vld1q_f32(&h_fft_buf[1][pos + j]);
+      const float32x4_t y_fft_re = vld1q_f32(&y_fft[0][j]);
+      const float32x4_t y_fft_im = vld1q_f32(&y_fft[1][j]);
+      const float32x4_t a = vmulq_f32(x_fft_buf_re, h_fft_buf_re);
+      const float32x4_t e = vmlsq_f32(a, x_fft_buf_im, h_fft_buf_im);
+      const float32x4_t c = vmulq_f32(x_fft_buf_re, h_fft_buf_im);
+      const float32x4_t f = vmlaq_f32(c, x_fft_buf_im, h_fft_buf_re);
+      const float32x4_t g = vaddq_f32(y_fft_re, e);
+      const float32x4_t h = vaddq_f32(y_fft_im, f);
+      vst1q_f32(&y_fft[0][j], g);
+      vst1q_f32(&y_fft[1][j], h);
     }
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(xfBuf[0][xPos + j],
-                        xfBuf[1][xPos + j],
-                        wfBuf[0][pos + j],
-                        wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(xfBuf[0][xPos + j],
-                        xfBuf[1][xPos + j],
-                        wfBuf[0][pos + j],
-                        wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
@@ -128,7 +129,7 @@
 static void ScaleErrorSignalNEON(int extended_filter_enabled,
                                  float normal_mu,
                                  float normal_error_threshold,
-                                 float *x_pow,
+                                 float x_pow[PART_LEN1],
                                  float ef[2][PART_LEN1]) {
   const float mu = extended_filter_enabled ? kExtendedMu : normal_mu;
   const float error_threshold = extended_filter_enabled ?
@@ -185,34 +186,37 @@
   }
 }
 
-static void FilterAdaptationNEON(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
+static void FilterAdaptationNEON(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     int j;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // Process the whole array...
     for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
-      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
-      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
+      // Load x_fft_buf and e_fft.
+      const float32x4_t x_fft_buf_re = vld1q_f32(&x_fft_buf[0][xPos + j]);
+      const float32x4_t x_fft_buf_im = vld1q_f32(&x_fft_buf[1][xPos + j]);
+      const float32x4_t e_fft_re = vld1q_f32(&e_fft[0][j]);
+      const float32x4_t e_fft_im = vld1q_f32(&e_fft[1][j]);
+      // Calculate the product of conjugate(x_fft_buf) by e_fft.
       //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
       //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
-      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
-      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+      const float32x4_t a = vmulq_f32(x_fft_buf_re, e_fft_re);
+      const float32x4_t e = vmlaq_f32(a, x_fft_buf_im, e_fft_im);
+      const float32x4_t c = vmulq_f32(x_fft_buf_re, e_fft_im);
+      const float32x4_t f = vmlsq_f32(c, x_fft_buf_im, e_fft_re);
       // Interleave real and imaginary parts.
       const float32x4x2_t g_n_h = vzipq_f32(e, f);
       // Store
@@ -220,10 +224,10 @@
       vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
     }
     // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -241,21 +245,21 @@
     aec_rdft_forward_128(fft);
 
     {
-      const float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      const float wt1 = h_fft_buf[1][pos];
+      h_fft_buf[0][pos + PART_LEN] += fft[1];
       for (j = 0; j < PART_LEN; j += 4) {
-        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
-        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+        float32x4_t wtBuf_re = vld1q_f32(&h_fft_buf[0][pos + j]);
+        float32x4_t wtBuf_im = vld1q_f32(&h_fft_buf[1][pos + j]);
         const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
         const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
         const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
         wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
         wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
 
-        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
-        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+        vst1q_f32(&h_fft_buf[0][pos + j], wtBuf_re);
+        vst1q_f32(&h_fft_buf[1][pos + j], wtBuf_im);
       }
-      aec->wfBuf[1][pos] = wt1;
+      h_fft_buf[1][pos] = wt1;
     }
   }
 }

diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c
index b874c98..5b950ad 100644
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c

@@ -29,51 +29,52 @@
   return aRe * bIm + aIm * bRe;
 }
 
-static void FilterFarSSE2(int num_partitions,
-                          int xfBufBlockPos,
-                          float xfBuf[2][kExtendedNumPartitions * PART_LEN1],
-                          float wfBuf[2][kExtendedNumPartitions * PART_LEN1],
-                          float yf[2][PART_LEN1]) {
+static void FilterFarSSE2(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float y_fft[2][PART_LEN1]) {
 
   int i;
   for (i = 0; i < num_partitions; i++) {
     int j;
-    int xPos = (i + xfBufBlockPos) * PART_LEN1;
+    int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * (PART_LEN1);
     }
 
     // vectorized code (four at once)
     for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]);
-      const __m128 wfBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]);
-      const __m128 wfBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]);
-      const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
-      const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
-      const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
+      const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
+      const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
+      const __m128 h_fft_buf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
+      const __m128 h_fft_buf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
+      const __m128 y_fft_re = _mm_loadu_ps(&y_fft[0][j]);
+      const __m128 y_fft_im = _mm_loadu_ps(&y_fft[1][j]);
+      const __m128 a = _mm_mul_ps(x_fft_buf_re, h_fft_buf_re);
+      const __m128 b = _mm_mul_ps(x_fft_buf_im, h_fft_buf_im);
+      const __m128 c = _mm_mul_ps(x_fft_buf_re, h_fft_buf_im);
+      const __m128 d = _mm_mul_ps(x_fft_buf_im, h_fft_buf_re);
       const __m128 e = _mm_sub_ps(a, b);
       const __m128 f = _mm_add_ps(c, d);
-      const __m128 g = _mm_add_ps(yf_re, e);
-      const __m128 h = _mm_add_ps(yf_im, f);
-      _mm_storeu_ps(&yf[0][j], g);
-      _mm_storeu_ps(&yf[1][j], h);
+      const __m128 g = _mm_add_ps(y_fft_re, e);
+      const __m128 h = _mm_add_ps(y_fft_im, f);
+      _mm_storeu_ps(&y_fft[0][j], g);
+      _mm_storeu_ps(&y_fft[1][j], h);
     }
     // scalar code for the remaining items.
     for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(xfBuf[0][xPos + j],
-                        xfBuf[1][xPos + j],
-                        wfBuf[0][pos + j],
-                        wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(xfBuf[0][xPos + j],
-                        xfBuf[1][xPos + j],
-                        wfBuf[0][pos + j],
-                        wfBuf[1][pos + j]);
+      y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
+      y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j],
+                           x_fft_buf[1][xPos + j],
+                           h_fft_buf[0][pos + j],
+                           h_fft_buf[1][pos + j]);
     }
   }
 }
@@ -81,7 +82,7 @@
 static void ScaleErrorSignalSSE2(int extended_filter_enabled,
                                  float normal_mu,
                                  float normal_error_threshold,
-                                 float *x_pow,
+                                 float x_pow[PART_LEN1],
                                  float ef[2][PART_LEN1]) {
   const __m128 k1e_10f = _mm_set1_ps(1e-10f);
   const __m128 kMu = extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
@@ -147,33 +148,36 @@
   }
 }
 
-static void FilterAdaptationSSE2(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
+static void FilterAdaptationSSE2(
+    int num_partitions,
+    int x_fft_buf_block_pos,
+    float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
+    float e_fft[2][PART_LEN1],
+    float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
+  float fft[PART_LEN2];
   int i, j;
-  const int num_partitions = aec->num_partitions;
   for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
+    int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
     int pos = i * PART_LEN1;
     // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
+    if (i + x_fft_buf_block_pos >= num_partitions) {
       xPos -= num_partitions * PART_LEN1;
     }
 
     // Process the whole array...
     for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
-      const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
-      const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
+      // Load x_fft_buf and e_fft.
+      const __m128 x_fft_buf_re = _mm_loadu_ps(&x_fft_buf[0][xPos + j]);
+      const __m128 x_fft_buf_im = _mm_loadu_ps(&x_fft_buf[1][xPos + j]);
+      const __m128 e_fft_re = _mm_loadu_ps(&e_fft[0][j]);
+      const __m128 e_fft_im = _mm_loadu_ps(&e_fft[1][j]);
+      // Calculate the product of conjugate(x_fft_buf) by e_fft.
       //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
       //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
+      const __m128 a = _mm_mul_ps(x_fft_buf_re, e_fft_re);
+      const __m128 b = _mm_mul_ps(x_fft_buf_im, e_fft_im);
+      const __m128 c = _mm_mul_ps(x_fft_buf_re, e_fft_im);
+      const __m128 d = _mm_mul_ps(x_fft_buf_im, e_fft_re);
       const __m128 e = _mm_add_ps(a, b);
       const __m128 f = _mm_sub_ps(c, d);
       // Interleave real and imaginary parts.
@@ -184,10 +188,10 @@
       _mm_storeu_ps(&fft[2 * j + 4], h);
     }
     // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
+    fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN],
+                   -x_fft_buf[1][xPos + PART_LEN],
+                   e_fft[0][PART_LEN],
+                   e_fft[1][PART_LEN]);
 
     aec_rdft_inverse_128(fft);
     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
@@ -205,11 +209,11 @@
     aec_rdft_forward_128(fft);
 
     {
-      float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      float wt1 = h_fft_buf[1][pos];
+      h_fft_buf[0][pos + PART_LEN] += fft[1];
       for (j = 0; j < PART_LEN; j += 4) {
-        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+        __m128 wtBuf_re = _mm_loadu_ps(&h_fft_buf[0][pos + j]);
+        __m128 wtBuf_im = _mm_loadu_ps(&h_fft_buf[1][pos + j]);
         const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
         const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
         const __m128 fft_re =
@@ -218,10 +222,10 @@
             _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
         wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
         wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
-        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
-        _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
+        _mm_storeu_ps(&h_fft_buf[0][pos + j], wtBuf_re);
+        _mm_storeu_ps(&h_fft_buf[1][pos + j], wtBuf_im);
       }
-      aec->wfBuf[1][pos] = wt1;
+      h_fft_buf[1][pos] = wt1;
     }
   }
 }
commit	7e43138c0890bd99f627fa061b122c8d5716a99d	[log] [tgz]
author	peah <peah@webrtc.org>	Fri Nov 27 15:24:27 2015 -0800
committer	Commit bot <commit-bot@chromium.org>	Fri Nov 27 23:24:32 2015 +0000
tree	9b4ddce83033f6f504e36b65ea32994f3895470d
parent	19822d63c1162c0989bc60b524db8c5c27815f72 [diff]